xref: /freebsd/contrib/processor-trace/libipt/src/pt_block_decoder.c (revision 6683132d54bd6d589889e43dabdc53d35e38a028)
1 /*
2  * Copyright (c) 2016-2018, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright notice,
10  *    this list of conditions and the following disclaimer in the documentation
11  *    and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "pt_block_decoder.h"
30 #include "pt_block_cache.h"
31 #include "pt_section.h"
32 #include "pt_image.h"
33 #include "pt_insn.h"
34 #include "pt_config.h"
35 #include "pt_asid.h"
36 #include "pt_compiler.h"
37 
38 #include "intel-pt.h"
39 
40 #include <string.h>
41 #include <stdlib.h>
42 
43 
44 static int pt_blk_proceed_trailing_event(struct pt_block_decoder *,
45 					 struct pt_block *);
46 
47 
48 static int pt_blk_status(const struct pt_block_decoder *decoder, int flags)
49 {
50 	int status;
51 
52 	if (!decoder)
53 		return -pte_internal;
54 
55 	status = decoder->status;
56 
57 	/* Indicate whether tracing is disabled or enabled.
58 	 *
59 	 * This duplicates the indication in struct pt_insn and covers the case
60 	 * where we indicate the status after synchronizing.
61 	 */
62 	if (!decoder->enabled)
63 		flags |= pts_ip_suppressed;
64 
65 	/* Forward end-of-trace indications.
66 	 *
67 	 * Postpone it as long as we're still processing events, though.
68 	 */
69 	if ((status & pts_eos) && !decoder->process_event)
70 		flags |= pts_eos;
71 
72 	return flags;
73 }
74 
75 static void pt_blk_reset(struct pt_block_decoder *decoder)
76 {
77 	if (!decoder)
78 		return;
79 
80 	decoder->mode = ptem_unknown;
81 	decoder->ip = 0ull;
82 	decoder->status = 0;
83 	decoder->enabled = 0;
84 	decoder->process_event = 0;
85 	decoder->speculative = 0;
86 	decoder->process_insn = 0;
87 	decoder->bound_paging = 0;
88 	decoder->bound_vmcs = 0;
89 	decoder->bound_ptwrite = 0;
90 
91 	memset(&decoder->event, 0, sizeof(decoder->event));
92 	pt_retstack_init(&decoder->retstack);
93 	pt_asid_init(&decoder->asid);
94 }
95 
96 /* Initialize the query decoder flags based on our flags. */
97 
98 static int pt_blk_init_qry_flags(struct pt_conf_flags *qflags,
99 				 const struct pt_conf_flags *flags)
100 {
101 	if (!qflags || !flags)
102 		return -pte_internal;
103 
104 	memset(qflags, 0, sizeof(*qflags));
105 
106 	return 0;
107 }
108 
109 int pt_blk_decoder_init(struct pt_block_decoder *decoder,
110 			const struct pt_config *uconfig)
111 {
112 	struct pt_config config;
113 	int errcode;
114 
115 	if (!decoder)
116 		return -pte_internal;
117 
118 	errcode = pt_config_from_user(&config, uconfig);
119 	if (errcode < 0)
120 		return errcode;
121 
122 	/* The user supplied decoder flags. */
123 	decoder->flags = config.flags;
124 
125 	/* Set the flags we need for the query decoder we use. */
126 	errcode = pt_blk_init_qry_flags(&config.flags, &decoder->flags);
127 	if (errcode < 0)
128 		return errcode;
129 
130 	errcode = pt_qry_decoder_init(&decoder->query, &config);
131 	if (errcode < 0)
132 		return errcode;
133 
134 	pt_image_init(&decoder->default_image, NULL);
135 	decoder->image = &decoder->default_image;
136 
137 	errcode = pt_msec_cache_init(&decoder->scache);
138 	if (errcode < 0)
139 		return errcode;
140 
141 	pt_blk_reset(decoder);
142 
143 	return 0;
144 }
145 
146 void pt_blk_decoder_fini(struct pt_block_decoder *decoder)
147 {
148 	if (!decoder)
149 		return;
150 
151 	pt_msec_cache_fini(&decoder->scache);
152 	pt_image_fini(&decoder->default_image);
153 	pt_qry_decoder_fini(&decoder->query);
154 }
155 
156 struct pt_block_decoder *
157 pt_blk_alloc_decoder(const struct pt_config *config)
158 {
159 	struct pt_block_decoder *decoder;
160 	int errcode;
161 
162 	decoder = malloc(sizeof(*decoder));
163 	if (!decoder)
164 		return NULL;
165 
166 	errcode = pt_blk_decoder_init(decoder, config);
167 	if (errcode < 0) {
168 		free(decoder);
169 		return NULL;
170 	}
171 
172 	return decoder;
173 }
174 
175 void pt_blk_free_decoder(struct pt_block_decoder *decoder)
176 {
177 	if (!decoder)
178 		return;
179 
180 	pt_blk_decoder_fini(decoder);
181 	free(decoder);
182 }
183 
184 /* Maybe synthesize a tick event.
185  *
186  * If we're not already processing events, check the current time against the
187  * last event's time.  If it changed, synthesize a tick event with the new time.
188  *
189  * Returns zero if no tick event has been created.
190  * Returns a positive integer if a tick event has been created.
191  * Returns a negative error code otherwise.
192  */
193 static int pt_blk_tick(struct pt_block_decoder *decoder, uint64_t ip)
194 {
195 	struct pt_event *ev;
196 	uint64_t tsc;
197 	uint32_t lost_mtc, lost_cyc;
198 	int errcode;
199 
200 	if (!decoder)
201 		return -pte_internal;
202 
203 	/* We're not generating tick events if tracing is disabled. */
204 	if (!decoder->enabled)
205 		return -pte_internal;
206 
207 	/* Events already provide a timestamp so there is no need to synthesize
208 	 * an artificial tick event.  There's no room, either, since this would
209 	 * overwrite the in-progress event.
210 	 *
211 	 * In rare cases where we need to proceed to an event location using
212 	 * trace this may cause us to miss a timing update if the event is not
213 	 * forwarded to the user.
214 	 *
215 	 * The only case I can come up with at the moment is a MODE.EXEC binding
216 	 * to the TIP IP of a far branch.
217 	 */
218 	if (decoder->process_event)
219 		return 0;
220 
221 	errcode = pt_qry_time(&decoder->query, &tsc, &lost_mtc, &lost_cyc);
222 	if (errcode < 0) {
223 		/* If we don't have wall-clock time, we use relative time. */
224 		if (errcode != -pte_no_time)
225 			return errcode;
226 	}
227 
228 	ev = &decoder->event;
229 
230 	/* We're done if time has not changed since the last event. */
231 	if (tsc == ev->tsc)
232 		return 0;
233 
234 	/* Time has changed so we create a new tick event. */
235 	memset(ev, 0, sizeof(*ev));
236 	ev->type = ptev_tick;
237 	ev->variant.tick.ip = ip;
238 
239 	/* Indicate if we have wall-clock time or only relative time. */
240 	if (errcode != -pte_no_time)
241 		ev->has_tsc = 1;
242 	ev->tsc = tsc;
243 	ev->lost_mtc = lost_mtc;
244 	ev->lost_cyc = lost_cyc;
245 
246 	/* We now have an event to process. */
247 	decoder->process_event = 1;
248 
249 	return 1;
250 }
251 
252 /* Query an indirect branch.
253  *
254  * Returns zero on success, a negative error code otherwise.
255  */
256 static int pt_blk_indirect_branch(struct pt_block_decoder *decoder,
257 				  uint64_t *ip)
258 {
259 	uint64_t evip;
260 	int status, errcode;
261 
262 	if (!decoder)
263 		return -pte_internal;
264 
265 	evip = decoder->ip;
266 
267 	status = pt_qry_indirect_branch(&decoder->query, ip);
268 	if (status < 0)
269 		return status;
270 
271 	if (decoder->flags.variant.block.enable_tick_events) {
272 		errcode = pt_blk_tick(decoder, evip);
273 		if (errcode < 0)
274 			return errcode;
275 	}
276 
277 	return status;
278 }
279 
280 /* Query a conditional branch.
281  *
282  * Returns zero on success, a negative error code otherwise.
283  */
284 static int pt_blk_cond_branch(struct pt_block_decoder *decoder, int *taken)
285 {
286 	int status, errcode;
287 
288 	if (!decoder)
289 		return -pte_internal;
290 
291 	status = pt_qry_cond_branch(&decoder->query, taken);
292 	if (status < 0)
293 		return status;
294 
295 	if (decoder->flags.variant.block.enable_tick_events) {
296 		errcode = pt_blk_tick(decoder, decoder->ip);
297 		if (errcode < 0)
298 			return errcode;
299 	}
300 
301 	return status;
302 }
303 
304 static int pt_blk_start(struct pt_block_decoder *decoder, int status)
305 {
306 	if (!decoder)
307 		return -pte_internal;
308 
309 	if (status < 0)
310 		return status;
311 
312 	decoder->status = status;
313 	if (!(status & pts_ip_suppressed))
314 		decoder->enabled = 1;
315 
316 	/* We will always have an event.
317 	 *
318 	 * If we synchronized onto an empty PSB+, tracing is disabled and we'll
319 	 * process events until the enabled event.
320 	 *
321 	 * If tracing is enabled, PSB+ must at least provide the execution mode,
322 	 * which we're going to forward to the user.
323 	 */
324 	return pt_blk_proceed_trailing_event(decoder, NULL);
325 }
326 
327 static int pt_blk_sync_reset(struct pt_block_decoder *decoder)
328 {
329 	if (!decoder)
330 		return -pte_internal;
331 
332 	pt_blk_reset(decoder);
333 
334 	return 0;
335 }
336 
337 int pt_blk_sync_forward(struct pt_block_decoder *decoder)
338 {
339 	int errcode, status;
340 
341 	if (!decoder)
342 		return -pte_invalid;
343 
344 	errcode = pt_blk_sync_reset(decoder);
345 	if (errcode < 0)
346 		return errcode;
347 
348 	status = pt_qry_sync_forward(&decoder->query, &decoder->ip);
349 
350 	return pt_blk_start(decoder, status);
351 }
352 
353 int pt_blk_sync_backward(struct pt_block_decoder *decoder)
354 {
355 	int errcode, status;
356 
357 	if (!decoder)
358 		return -pte_invalid;
359 
360 	errcode = pt_blk_sync_reset(decoder);
361 	if (errcode < 0)
362 		return errcode;
363 
364 	status = pt_qry_sync_backward(&decoder->query, &decoder->ip);
365 
366 	return pt_blk_start(decoder, status);
367 }
368 
369 int pt_blk_sync_set(struct pt_block_decoder *decoder, uint64_t offset)
370 {
371 	int errcode, status;
372 
373 	if (!decoder)
374 		return -pte_invalid;
375 
376 	errcode = pt_blk_sync_reset(decoder);
377 	if (errcode < 0)
378 		return errcode;
379 
380 	status = pt_qry_sync_set(&decoder->query, &decoder->ip, offset);
381 
382 	return pt_blk_start(decoder, status);
383 }
384 
385 int pt_blk_get_offset(const struct pt_block_decoder *decoder, uint64_t *offset)
386 {
387 	if (!decoder)
388 		return -pte_invalid;
389 
390 	return pt_qry_get_offset(&decoder->query, offset);
391 }
392 
393 int pt_blk_get_sync_offset(const struct pt_block_decoder *decoder,
394 			   uint64_t *offset)
395 {
396 	if (!decoder)
397 		return -pte_invalid;
398 
399 	return pt_qry_get_sync_offset(&decoder->query, offset);
400 }
401 
402 struct pt_image *pt_blk_get_image(struct pt_block_decoder *decoder)
403 {
404 	if (!decoder)
405 		return NULL;
406 
407 	return decoder->image;
408 }
409 
410 int pt_blk_set_image(struct pt_block_decoder *decoder, struct pt_image *image)
411 {
412 	if (!decoder)
413 		return -pte_invalid;
414 
415 	if (!image)
416 		image = &decoder->default_image;
417 
418 	decoder->image = image;
419 	return 0;
420 }
421 
422 const struct pt_config *
423 pt_blk_get_config(const struct pt_block_decoder *decoder)
424 {
425 	if (!decoder)
426 		return NULL;
427 
428 	return pt_qry_get_config(&decoder->query);
429 }
430 
431 int pt_blk_time(struct pt_block_decoder *decoder, uint64_t *time,
432 		uint32_t *lost_mtc, uint32_t *lost_cyc)
433 {
434 	if (!decoder || !time)
435 		return -pte_invalid;
436 
437 	return pt_qry_time(&decoder->query, time, lost_mtc, lost_cyc);
438 }
439 
440 int pt_blk_core_bus_ratio(struct pt_block_decoder *decoder, uint32_t *cbr)
441 {
442 	if (!decoder || !cbr)
443 		return -pte_invalid;
444 
445 	return pt_qry_core_bus_ratio(&decoder->query, cbr);
446 }
447 
448 int pt_blk_asid(const struct pt_block_decoder *decoder, struct pt_asid *asid,
449 		size_t size)
450 {
451 	if (!decoder || !asid)
452 		return -pte_invalid;
453 
454 	return pt_asid_to_user(asid, &decoder->asid, size);
455 }
456 
457 /* Fetch the next pending event.
458  *
459  * Checks for pending events.  If an event is pending, fetches it (if not
460  * already in process).
461  *
462  * Returns zero if no event is pending.
463  * Returns a positive integer if an event is pending or in process.
464  * Returns a negative error code otherwise.
465  */
466 static inline int pt_blk_fetch_event(struct pt_block_decoder *decoder)
467 {
468 	int status;
469 
470 	if (!decoder)
471 		return -pte_internal;
472 
473 	if (decoder->process_event)
474 		return 1;
475 
476 	if (!(decoder->status & pts_event_pending))
477 		return 0;
478 
479 	status = pt_qry_event(&decoder->query, &decoder->event,
480 			      sizeof(decoder->event));
481 	if (status < 0)
482 		return status;
483 
484 	decoder->process_event = 1;
485 	decoder->status = status;
486 
487 	return 1;
488 }
489 
490 static inline int pt_blk_block_is_empty(const struct pt_block *block)
491 {
492 	if (!block)
493 		return 1;
494 
495 	return !block->ninsn;
496 }
497 
498 static inline int block_to_user(struct pt_block *ublock, size_t size,
499 				const struct pt_block *block)
500 {
501 	if (!ublock || !block)
502 		return -pte_internal;
503 
504 	if (ublock == block)
505 		return 0;
506 
507 	/* Zero out any unknown bytes. */
508 	if (sizeof(*block) < size) {
509 		memset(ublock + sizeof(*block), 0, size - sizeof(*block));
510 
511 		size = sizeof(*block);
512 	}
513 
514 	memcpy(ublock, block, size);
515 
516 	return 0;
517 }
518 
519 static int pt_insn_false(const struct pt_insn *insn,
520 			 const struct pt_insn_ext *iext)
521 {
522 	(void) insn;
523 	(void) iext;
524 
525 	return 0;
526 }
527 
528 /* Determine the next IP using trace.
529  *
530  * Tries to determine the IP of the next instruction using trace and provides it
531  * in @pip.
532  *
533  * Not requiring trace to determine the IP is treated as an internal error.
534  *
535  * Does not update the return compression stack for indirect calls.  This is
536  * expected to have been done, already, when trying to determine the next IP
537  * without using trace.
538  *
539  * Does not update @decoder->status.  The caller is expected to do that.
540  *
541  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
542  * code otherwise.
543  * Returns -pte_internal if @pip, @decoder, @insn, or @iext are NULL.
544  * Returns -pte_internal if no trace is required.
545  */
546 static int pt_blk_next_ip(uint64_t *pip, struct pt_block_decoder *decoder,
547 			  const struct pt_insn *insn,
548 			  const struct pt_insn_ext *iext)
549 {
550 	int status, errcode;
551 
552 	if (!pip || !decoder || !insn || !iext)
553 		return -pte_internal;
554 
555 	/* We handle non-taken conditional branches, and compressed returns
556 	 * directly in the switch.
557 	 *
558 	 * All kinds of branches are handled below the switch.
559 	 */
560 	switch (insn->iclass) {
561 	case ptic_cond_jump: {
562 		uint64_t ip;
563 		int taken;
564 
565 		status = pt_blk_cond_branch(decoder, &taken);
566 		if (status < 0)
567 			return status;
568 
569 		ip = insn->ip + insn->size;
570 		if (taken)
571 			ip += iext->variant.branch.displacement;
572 
573 		*pip = ip;
574 		return status;
575 	}
576 
577 	case ptic_return: {
578 		int taken;
579 
580 		/* Check for a compressed return. */
581 		status = pt_blk_cond_branch(decoder, &taken);
582 		if (status < 0) {
583 			if (status != -pte_bad_query)
584 				return status;
585 
586 			break;
587 		}
588 
589 		/* A compressed return is indicated by a taken conditional
590 		 * branch.
591 		 */
592 		if (!taken)
593 			return -pte_bad_retcomp;
594 
595 		errcode = pt_retstack_pop(&decoder->retstack, pip);
596 		if (errcode < 0)
597 			return errcode;
598 
599 		return status;
600 	}
601 
602 	case ptic_jump:
603 	case ptic_call:
604 		/* A direct jump or call wouldn't require trace. */
605 		if (iext->variant.branch.is_direct)
606 			return -pte_internal;
607 
608 		break;
609 
610 	case ptic_far_call:
611 	case ptic_far_return:
612 	case ptic_far_jump:
613 		break;
614 
615 	case ptic_ptwrite:
616 	case ptic_other:
617 		return -pte_internal;
618 
619 	case ptic_error:
620 		return -pte_bad_insn;
621 	}
622 
623 	/* Process an indirect branch.
624 	 *
625 	 * This covers indirect jumps and calls, non-compressed returns, and all
626 	 * flavors of far transfers.
627 	 */
628 	return pt_blk_indirect_branch(decoder, pip);
629 }
630 
631 /* Proceed to the next IP using trace.
632  *
633  * We failed to proceed without trace.  This ends the current block.  Now use
634  * trace to do one final step to determine the start IP of the next block.
635  *
636  * Returns zero on success, a negative error code otherwise.
637  */
638 static int pt_blk_proceed_with_trace(struct pt_block_decoder *decoder,
639 				     const struct pt_insn *insn,
640 				     const struct pt_insn_ext *iext)
641 {
642 	int status;
643 
644 	if (!decoder)
645 		return -pte_internal;
646 
647 	status = pt_blk_next_ip(&decoder->ip, decoder, insn, iext);
648 	if (status < 0)
649 		return status;
650 
651 	/* Preserve the query decoder's response which indicates upcoming
652 	 * events.
653 	 */
654 	decoder->status = status;
655 
656 	/* We do need an IP in order to proceed. */
657 	if (status & pts_ip_suppressed)
658 		return -pte_noip;
659 
660 	return 0;
661 }
662 
663 /* Decode one instruction in a known section.
664  *
665  * Decode the instruction at @insn->ip in @msec assuming execution mode
666  * @insn->mode.
667  *
668  * Returns zero on success, a negative error code otherwise.
669  */
670 static int pt_blk_decode_in_section(struct pt_insn *insn,
671 				    struct pt_insn_ext *iext,
672 				    const struct pt_mapped_section *msec)
673 {
674 	int status;
675 
676 	if (!insn || !iext)
677 		return -pte_internal;
678 
679 	/* We know that @ip is contained in @section.
680 	 *
681 	 * Note that we need to translate @ip into a section offset.
682 	 */
683 	status = pt_msec_read(msec, insn->raw, sizeof(insn->raw), insn->ip);
684 	if (status < 0)
685 		return status;
686 
687 	/* We initialize @insn->size to the maximal possible size.  It will be
688 	 * set to the actual size during instruction decode.
689 	 */
690 	insn->size = (uint8_t) status;
691 
692 	return pt_ild_decode(insn, iext);
693 }
694 
695 /* Update the return-address stack if @insn is a near call.
696  *
697  * Returns zero on success, a negative error code otherwise.
698  */
699 static inline int pt_blk_log_call(struct pt_block_decoder *decoder,
700 				  const struct pt_insn *insn,
701 				  const struct pt_insn_ext *iext)
702 {
703 	if (!decoder || !insn || !iext)
704 		return -pte_internal;
705 
706 	if (insn->iclass != ptic_call)
707 		return 0;
708 
709 	/* Ignore direct calls to the next instruction that are used for
710 	 * position independent code.
711 	 */
712 	if (iext->variant.branch.is_direct &&
713 	    !iext->variant.branch.displacement)
714 		return 0;
715 
716 	return pt_retstack_push(&decoder->retstack, insn->ip + insn->size);
717 }
718 
719 /* Proceed by one instruction.
720  *
721  * Tries to decode the instruction at @decoder->ip and, on success, adds it to
722  * @block and provides it in @pinsn and @piext.
723  *
724  * The instruction will not be added if:
725  *
726  *   - the memory could not be read:  return error
727  *   - it could not be decoded:       return error
728  *   - @block is already full:        return zero
729  *   - @block would switch sections:  return zero
730  *
731  * Returns a positive integer if the instruction was added.
732  * Returns zero if the instruction didn't fit into @block.
733  * Returns a negative error code otherwise.
734  */
735 static int pt_blk_proceed_one_insn(struct pt_block_decoder *decoder,
736 				   struct pt_block *block,
737 				   struct pt_insn *pinsn,
738 				   struct pt_insn_ext *piext)
739 {
740 	struct pt_insn_ext iext;
741 	struct pt_insn insn;
742 	uint16_t ninsn;
743 	int status;
744 
745 	if (!decoder || !block || !pinsn || !piext)
746 		return -pte_internal;
747 
748 	/* There's nothing to do if there is no room in @block. */
749 	ninsn = block->ninsn + 1;
750 	if (!ninsn)
751 		return 0;
752 
753 	/* The truncated instruction must be last. */
754 	if (block->truncated)
755 		return 0;
756 
757 	memset(&insn, 0, sizeof(insn));
758 	memset(&iext, 0, sizeof(iext));
759 
760 	insn.mode = decoder->mode;
761 	insn.ip = decoder->ip;
762 
763 	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
764 	if (status < 0)
765 		return status;
766 
767 	/* We do not switch sections inside a block. */
768 	if (insn.isid != block->isid) {
769 		if (!pt_blk_block_is_empty(block))
770 			return 0;
771 
772 		block->isid = insn.isid;
773 	}
774 
775 	/* If we couldn't read @insn's memory in one chunk from @insn.isid, we
776 	 * provide the memory in @block.
777 	 */
778 	if (insn.truncated) {
779 		memcpy(block->raw, insn.raw, insn.size);
780 		block->size = insn.size;
781 		block->truncated = 1;
782 	}
783 
784 	/* Log calls' return addresses for return compression. */
785 	status = pt_blk_log_call(decoder, &insn, &iext);
786 	if (status < 0)
787 		return status;
788 
789 	/* We have a new instruction. */
790 	block->iclass = insn.iclass;
791 	block->end_ip = insn.ip;
792 	block->ninsn = ninsn;
793 
794 	*pinsn = insn;
795 	*piext = iext;
796 
797 	return 1;
798 }
799 
800 
801 /* Proceed to a particular type of instruction without using trace.
802  *
803  * Proceed until we reach an instruction for which @predicate returns a positive
804  * integer or until:
805  *
806  *   - @predicate returns an error:  return error
807  *   - @block is full:               return zero
808  *   - @block would switch sections: return zero
809  *   - we would need trace:          return -pte_bad_query
810  *
811  * Provide the last instruction that was reached in @insn and @iext.
812  *
813  * Update @decoder->ip to point to the last IP that was reached.  If we fail due
814  * to lack of trace or if we reach a desired instruction, this is @insn->ip;
815  * otherwise this is the next instruction's IP.
816  *
817  * Returns a positive integer if a suitable instruction was reached.
818  * Returns zero if no such instruction was reached.
819  * Returns a negative error code otherwise.
820  */
821 static int pt_blk_proceed_to_insn(struct pt_block_decoder *decoder,
822 				  struct pt_block *block,
823 				  struct pt_insn *insn,
824 				  struct pt_insn_ext *iext,
825 				  int (*predicate)(const struct pt_insn *,
826 						   const struct pt_insn_ext *))
827 {
828 	int status;
829 
830 	if (!decoder || !insn || !predicate)
831 		return -pte_internal;
832 
833 	for (;;) {
834 		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
835 		if (status <= 0)
836 			return status;
837 
838 		/* We're done if this instruction matches the spec (positive
839 		 * status) or we run into an error (negative status).
840 		 */
841 		status = predicate(insn, iext);
842 		if (status != 0)
843 			return status;
844 
845 		/* Let's see if we can proceed to the next IP without trace. */
846 		status = pt_insn_next_ip(&decoder->ip, insn, iext);
847 		if (status < 0)
848 			return status;
849 
850 		/* End the block if the user asked us to.
851 		 *
852 		 * We only need to take care about direct near branches.
853 		 * Indirect and far branches require trace and will naturally
854 		 * end a block.
855 		 */
856 		if ((decoder->flags.variant.block.end_on_call &&
857 		     (insn->iclass == ptic_call)) ||
858 		    (decoder->flags.variant.block.end_on_jump &&
859 		     (insn->iclass == ptic_jump)))
860 			return 0;
861 	}
862 }
863 
864 /* Proceed to a particular IP without using trace.
865  *
866  * Proceed until we reach @ip or until:
867  *
868  *   - @block is full:               return zero
869  *   - @block would switch sections: return zero
870  *   - we would need trace:          return -pte_bad_query
871  *
872  * Provide the last instruction that was reached in @insn and @iext.  If we
873  * reached @ip, this is the instruction preceding it.
874  *
875  * Update @decoder->ip to point to the last IP that was reached.  If we fail due
876  * to lack of trace, this is @insn->ip; otherwise this is the next instruction's
877  * IP.
878  *
879  * Returns a positive integer if @ip was reached.
880  * Returns zero if no such instruction was reached.
881  * Returns a negative error code otherwise.
882  */
883 static int pt_blk_proceed_to_ip(struct pt_block_decoder *decoder,
884 				struct pt_block *block, struct pt_insn *insn,
885 				struct pt_insn_ext *iext, uint64_t ip)
886 {
887 	int status;
888 
889 	if (!decoder || !insn)
890 		return -pte_internal;
891 
892 	for (;;) {
893 		/* We're done when we reach @ip.  We may not even have to decode
894 		 * a single instruction in some cases.
895 		 */
896 		if (decoder->ip == ip)
897 			return 1;
898 
899 		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
900 		if (status <= 0)
901 			return status;
902 
903 		/* Let's see if we can proceed to the next IP without trace. */
904 		status = pt_insn_next_ip(&decoder->ip, insn, iext);
905 		if (status < 0)
906 			return status;
907 
908 		/* End the block if the user asked us to.
909 		 *
910 		 * We only need to take care about direct near branches.
911 		 * Indirect and far branches require trace and will naturally
912 		 * end a block.
913 		 *
914 		 * The call at the end of the block may have reached @ip; make
915 		 * sure to indicate that.
916 		 */
917 		if ((decoder->flags.variant.block.end_on_call &&
918 		     (insn->iclass == ptic_call)) ||
919 		    (decoder->flags.variant.block.end_on_jump &&
920 		     (insn->iclass == ptic_jump))) {
921 			return (decoder->ip == ip ? 1 : 0);
922 		}
923 	}
924 }
925 
926 /* Proceed to a particular IP with trace, if necessary.
927  *
928  * Proceed until we reach @ip or until:
929  *
930  *   - @block is full:               return zero
931  *   - @block would switch sections: return zero
932  *   - we need trace:                return zero
933  *
934  * Update @decoder->ip to point to the last IP that was reached.
935  *
936  * A return of zero ends @block.
937  *
938  * Returns a positive integer if @ip was reached.
939  * Returns zero if no such instruction was reached.
940  * Returns a negative error code otherwise.
941  */
942 static int pt_blk_proceed_to_ip_with_trace(struct pt_block_decoder *decoder,
943 					   struct pt_block *block,
944 					   uint64_t ip)
945 {
946 	struct pt_insn_ext iext;
947 	struct pt_insn insn;
948 	int status;
949 
950 	/* Try to reach @ip without trace.
951 	 *
952 	 * We're also OK if @block overflowed or we switched sections and we
953 	 * have to try again in the next iteration.
954 	 */
955 	status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext, ip);
956 	if (status != -pte_bad_query)
957 		return status;
958 
959 	/* Needing trace is not an error.  We use trace to determine the next
960 	 * start IP and end the block.
961 	 */
962 	return pt_blk_proceed_with_trace(decoder, &insn, &iext);
963 }
964 
965 static int pt_insn_skl014(const struct pt_insn *insn,
966 			  const struct pt_insn_ext *iext)
967 {
968 	if (!insn || !iext)
969 		return 0;
970 
971 	switch (insn->iclass) {
972 	default:
973 		return 0;
974 
975 	case ptic_call:
976 	case ptic_jump:
977 		return iext->variant.branch.is_direct;
978 
979 	case ptic_other:
980 		return pt_insn_changes_cr3(insn, iext);
981 	}
982 }
983 
984 /* Proceed to the location of a synchronous disabled event with suppressed IP
985  * considering SKL014.
986  *
987  * We have a (synchronous) disabled event pending.  Proceed to the event
988  * location and indicate whether we were able to reach it.
989  *
990  * With SKL014 a TIP.PGD with suppressed IP may also be generated by a direct
991  * unconditional branch that clears FilterEn by jumping out of a filter region
992  * or into a TraceStop region.  Use the filter configuration to determine the
993  * exact branch the event binds to.
994  *
995  * The last instruction that was reached is stored in @insn/@iext.
996  *
997  * Returns a positive integer if the event location was reached.
998  * Returns zero if the event location was not reached.
999  * Returns a negative error code otherwise.
1000  */
1001 static int pt_blk_proceed_skl014(struct pt_block_decoder *decoder,
1002 				 struct pt_block *block, struct pt_insn *insn,
1003 				 struct pt_insn_ext *iext)
1004 {
1005 	const struct pt_conf_addr_filter *addr_filter;
1006 	int status;
1007 
1008 	if (!decoder || !block || !insn || !iext)
1009 		return -pte_internal;
1010 
1011 	addr_filter = &decoder->query.config.addr_filter;
1012 	for (;;) {
1013 		uint64_t ip;
1014 
1015 		status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
1016 						pt_insn_skl014);
1017 		if (status <= 0)
1018 			break;
1019 
1020 		/* The erratum doesn't apply if we can bind the event to a
1021 		 * CR3-changing instruction.
1022 		 */
1023 		if (pt_insn_changes_cr3(insn, iext))
1024 			break;
1025 
1026 		/* Check the filter against the branch target. */
1027 		status = pt_insn_next_ip(&ip, insn, iext);
1028 		if (status < 0)
1029 			break;
1030 
1031 		status = pt_filter_addr_check(addr_filter, ip);
1032 		if (status <= 0) {
1033 			/* We need to flip the indication.
1034 			 *
1035 			 * We reached the event location when @ip lies inside a
1036 			 * tracing-disabled region.
1037 			 */
1038 			if (!status)
1039 				status = 1;
1040 
1041 			break;
1042 		}
1043 
1044 		/* This is not the correct instruction.  Proceed past it and try
1045 		 * again.
1046 		 */
1047 		decoder->ip = ip;
1048 
1049 		/* End the block if the user asked us to.
1050 		 *
1051 		 * We only need to take care about direct near branches.
1052 		 * Indirect and far branches require trace and will naturally
1053 		 * end a block.
1054 		 */
1055 		if ((decoder->flags.variant.block.end_on_call &&
1056 		    (insn->iclass == ptic_call)) ||
1057 		    (decoder->flags.variant.block.end_on_jump &&
1058 		    (insn->iclass == ptic_jump)))
1059 			break;
1060 	}
1061 
1062 	return status;
1063 }
1064 
1065 /* Proceed to the event location for a disabled event.
1066  *
1067  * We have a (synchronous) disabled event pending.  Proceed to the event
1068  * location and indicate whether we were able to reach it.
1069  *
1070  * The last instruction that was reached is stored in @insn/@iext.
1071  *
1072  * Returns a positive integer if the event location was reached.
1073  * Returns zero if the event location was not reached.
1074  * Returns a negative error code otherwise.
1075  */
1076 static int pt_blk_proceed_to_disabled(struct pt_block_decoder *decoder,
1077 				      struct pt_block *block,
1078 				      struct pt_insn *insn,
1079 				      struct pt_insn_ext *iext,
1080 				      const struct pt_event *ev)
1081 {
1082 	if (!decoder || !block || !ev)
1083 		return -pte_internal;
1084 
1085 	if (ev->ip_suppressed) {
1086 		/* Due to SKL014 the TIP.PGD payload may be suppressed also for
1087 		 * direct branches.
1088 		 *
1089 		 * If we don't have a filter configuration we assume that no
1090 		 * address filters were used and the erratum does not apply.
1091 		 *
1092 		 * We might otherwise disable tracing too early.
1093 		 */
1094 		if (decoder->query.config.addr_filter.config.addr_cfg &&
1095 		    decoder->query.config.errata.skl014)
1096 			return pt_blk_proceed_skl014(decoder, block, insn,
1097 						     iext);
1098 
1099 		/* A synchronous disabled event also binds to far branches and
1100 		 * CPL-changing instructions.  Both would require trace,
1101 		 * however, and are thus implicitly handled by erroring out.
1102 		 *
1103 		 * The would-require-trace error is handled by our caller.
1104 		 */
1105 		return pt_blk_proceed_to_insn(decoder, block, insn, iext,
1106 					      pt_insn_changes_cr3);
1107 	} else
1108 		return pt_blk_proceed_to_ip(decoder, block, insn, iext,
1109 					    ev->variant.disabled.ip);
1110 }
1111 
1112 /* Set the expected resume address for a synchronous disable.
1113  *
1114  * On a synchronous disable, @decoder->ip still points to the instruction to
1115  * which the event bound.  That's not where we expect tracing to resume.
1116  *
1117  * For calls, a fair assumption is that tracing resumes after returning from the
1118  * called function.  For other types of instructions, we simply don't know.
1119  *
1120  * Returns zero on success, a negative pt_error_code otherwise.
1121  */
1122 static int pt_blk_set_disable_resume_ip(struct pt_block_decoder *decoder,
1123 					const struct pt_insn *insn)
1124 {
1125 	if (!decoder || !insn)
1126 		return -pte_internal;
1127 
1128 	switch (insn->iclass) {
1129 	case ptic_call:
1130 	case ptic_far_call:
1131 		decoder->ip = insn->ip + insn->size;
1132 		break;
1133 
1134 	default:
1135 		decoder->ip = 0ull;
1136 		break;
1137 	}
1138 
1139 	return 0;
1140 }
1141 
1142 /* Proceed to the event location for an async paging event.
1143  *
1144  * We have an async paging event pending.  Proceed to the event location and
1145  * indicate whether we were able to reach it.  Needing trace in order to proceed
1146  * is not an error in this case but ends the block.
1147  *
1148  * Returns a positive integer if the event location was reached.
1149  * Returns zero if the event location was not reached.
1150  * Returns a negative error code otherwise.
1151  */
1152 static int pt_blk_proceed_to_async_paging(struct pt_block_decoder *decoder,
1153 					  struct pt_block *block,
1154 					  const struct pt_event *ev)
1155 {
1156 	int status;
1157 
1158 	if (!decoder || !ev)
1159 		return -pte_internal;
1160 
1161 	/* Apply the event immediately if we don't have an IP. */
1162 	if (ev->ip_suppressed)
1163 		return 1;
1164 
1165 	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1166 						 ev->variant.async_paging.ip);
1167 	if (status < 0)
1168 		return status;
1169 
1170 	/* We may have reached the IP. */
1171 	return (decoder->ip == ev->variant.async_paging.ip ? 1 : 0);
1172 }
1173 
1174 /* Proceed to the event location for an async vmcs event.
1175  *
1176  * We have an async vmcs event pending.  Proceed to the event location and
1177  * indicate whether we were able to reach it.  Needing trace in order to proceed
1178  * is not an error in this case but ends the block.
1179  *
1180  * Returns a positive integer if the event location was reached.
1181  * Returns zero if the event location was not reached.
1182  * Returns a negative error code otherwise.
1183  */
1184 static int pt_blk_proceed_to_async_vmcs(struct pt_block_decoder *decoder,
1185 					struct pt_block *block,
1186 					const struct pt_event *ev)
1187 {
1188 	int status;
1189 
1190 	if (!decoder || !ev)
1191 		return -pte_internal;
1192 
1193 	/* Apply the event immediately if we don't have an IP. */
1194 	if (ev->ip_suppressed)
1195 		return 1;
1196 
1197 	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1198 						 ev->variant.async_vmcs.ip);
1199 	if (status < 0)
1200 		return status;
1201 
1202 	/* We may have reached the IP. */
1203 	return (decoder->ip == ev->variant.async_vmcs.ip ? 1 : 0);
1204 }
1205 
1206 /* Proceed to the event location for an exec mode event.
1207  *
1208  * We have an exec mode event pending.  Proceed to the event location and
1209  * indicate whether we were able to reach it.  Needing trace in order to proceed
1210  * is not an error in this case but ends the block.
1211  *
1212  * Returns a positive integer if the event location was reached.
1213  * Returns zero if the event location was not reached.
1214  * Returns a negative error code otherwise.
1215  */
1216 static int pt_blk_proceed_to_exec_mode(struct pt_block_decoder *decoder,
1217 				       struct pt_block *block,
1218 				       const struct pt_event *ev)
1219 {
1220 	int status;
1221 
1222 	if (!decoder || !ev)
1223 		return -pte_internal;
1224 
1225 	/* Apply the event immediately if we don't have an IP. */
1226 	if (ev->ip_suppressed)
1227 		return 1;
1228 
1229 	status = pt_blk_proceed_to_ip_with_trace(decoder, block,
1230 						 ev->variant.exec_mode.ip);
1231 	if (status < 0)
1232 		return status;
1233 
1234 	/* We may have reached the IP. */
1235 	return (decoder->ip == ev->variant.exec_mode.ip ? 1 : 0);
1236 }
1237 
1238 /* Proceed to the event location for a ptwrite event.
1239  *
1240  * We have a ptwrite event pending.  Proceed to the event location and indicate
1241  * whether we were able to reach it.
1242  *
1243  * In case of the event binding to a ptwrite instruction, we pass beyond that
1244  * instruction and update the event to provide the instruction's IP.
1245  *
1246  * In the case of the event binding to an IP provided in the event, we move
1247  * beyond the instruction at that IP.
1248  *
1249  * Returns a positive integer if the event location was reached.
1250  * Returns zero if the event location was not reached.
1251  * Returns a negative error code otherwise.
1252  */
1253 static int pt_blk_proceed_to_ptwrite(struct pt_block_decoder *decoder,
1254 				     struct pt_block *block,
1255 				     struct pt_insn *insn,
1256 				     struct pt_insn_ext *iext,
1257 				     struct pt_event *ev)
1258 {
1259 	int status;
1260 
1261 	if (!insn || !ev)
1262 		return -pte_internal;
1263 
1264 	/* If we don't have an IP, the event binds to the next PTWRITE
1265 	 * instruction.
1266 	 *
1267 	 * If we have an IP it still binds to the next PTWRITE instruction but
1268 	 * now the IP tells us where that instruction is.  This makes most sense
1269 	 * when tracing is disabled and we don't have any other means of finding
1270 	 * the PTWRITE instruction.  We nevertheless distinguish the two cases,
1271 	 * here.
1272 	 *
1273 	 * In both cases, we move beyond the PTWRITE instruction, so it will be
1274 	 * the last instruction in the current block and @decoder->ip will point
1275 	 * to the instruction following it.
1276 	 */
1277 	if (ev->ip_suppressed) {
1278 		status = pt_blk_proceed_to_insn(decoder, block, insn, iext,
1279 						pt_insn_is_ptwrite);
1280 		if (status <= 0)
1281 			return status;
1282 
1283 		/* We now know the IP of the PTWRITE instruction corresponding
1284 		 * to this event.  Fill it in to make it more convenient for the
1285 		 * user to process the event.
1286 		 */
1287 		ev->variant.ptwrite.ip = insn->ip;
1288 		ev->ip_suppressed = 0;
1289 	} else {
1290 		status = pt_blk_proceed_to_ip(decoder, block, insn, iext,
1291 					      ev->variant.ptwrite.ip);
1292 		if (status <= 0)
1293 			return status;
1294 
1295 		/* We reached the PTWRITE instruction and @decoder->ip points to
1296 		 * it; @insn/@iext still contain the preceding instruction.
1297 		 *
1298 		 * Proceed beyond the PTWRITE to account for it.  Note that we
1299 		 * may still overflow the block, which would cause us to
1300 		 * postpone both instruction and event to the next block.
1301 		 */
1302 		status = pt_blk_proceed_one_insn(decoder, block, insn, iext);
1303 		if (status <= 0)
1304 			return status;
1305 	}
1306 
1307 	return 1;
1308 }
1309 
1310 /* Try to work around erratum SKD022.
1311  *
1312  * If we get an asynchronous disable on VMLAUNCH or VMRESUME, the FUP that
1313  * caused the disable to be asynchronous might have been bogous.
1314  *
1315  * Returns a positive integer if the erratum has been handled.
1316  * Returns zero if the erratum does not apply.
1317  * Returns a negative error code otherwise.
1318  */
1319 static int pt_blk_handle_erratum_skd022(struct pt_block_decoder *decoder,
1320 					struct pt_event *ev)
1321 {
1322 	struct pt_insn_ext iext;
1323 	struct pt_insn insn;
1324 	int errcode;
1325 
1326 	if (!decoder || !ev)
1327 		return -pte_internal;
1328 
1329 	insn.mode = decoder->mode;
1330 	insn.ip = ev->variant.async_disabled.at;
1331 
1332 	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
1333 	if (errcode < 0)
1334 		return 0;
1335 
1336 	switch (iext.iclass) {
1337 	default:
1338 		/* The erratum does not apply. */
1339 		return 0;
1340 
1341 	case PTI_INST_VMLAUNCH:
1342 	case PTI_INST_VMRESUME:
1343 		/* The erratum may apply.  We can't be sure without a lot more
1344 		 * analysis.  Let's assume it does.
1345 		 *
1346 		 * We turn the async disable into a sync disable.  Our caller
1347 		 * will restart event processing.
1348 		 */
1349 		ev->type = ptev_disabled;
1350 		ev->variant.disabled.ip = ev->variant.async_disabled.ip;
1351 
1352 		return 1;
1353 	}
1354 }
1355 
1356 /* Postpone proceeding past @insn/@iext and indicate a pending event.
1357  *
1358  * There may be further events pending on @insn/@iext.  Postpone proceeding past
1359  * @insn/@iext until we processed all events that bind to it.
1360  *
1361  * Returns a non-negative pt_status_flag bit-vector indicating a pending event
1362  * on success, a negative pt_error_code otherwise.
1363  */
1364 static int pt_blk_postpone_insn(struct pt_block_decoder *decoder,
1365 				const struct pt_insn *insn,
1366 				const struct pt_insn_ext *iext)
1367 {
1368 	if (!decoder || !insn || !iext)
1369 		return -pte_internal;
1370 
1371 	/* Only one can be active. */
1372 	if (decoder->process_insn)
1373 		return -pte_internal;
1374 
1375 	decoder->process_insn = 1;
1376 	decoder->insn = *insn;
1377 	decoder->iext = *iext;
1378 
1379 	return pt_blk_status(decoder, pts_event_pending);
1380 }
1381 
1382 /* Remove any postponed instruction from @decoder.
1383  *
1384  * Returns zero on success, a negative pt_error_code otherwise.
1385  */
1386 static int pt_blk_clear_postponed_insn(struct pt_block_decoder *decoder)
1387 {
1388 	if (!decoder)
1389 		return -pte_internal;
1390 
1391 	decoder->process_insn = 0;
1392 	decoder->bound_paging = 0;
1393 	decoder->bound_vmcs = 0;
1394 	decoder->bound_ptwrite = 0;
1395 
1396 	return 0;
1397 }
1398 
1399 /* Proceed past a postponed instruction.
1400  *
1401  * If an instruction has been postponed in @decoder, proceed past it.
1402  *
1403  * Returns zero on success, a negative pt_error_code otherwise.
1404  */
1405 static int pt_blk_proceed_postponed_insn(struct pt_block_decoder *decoder)
1406 {
1407 	int status;
1408 
1409 	if (!decoder)
1410 		return -pte_internal;
1411 
1412 	/* There's nothing to do if we have no postponed instruction. */
1413 	if (!decoder->process_insn)
1414 		return 0;
1415 
1416 	/* There's nothing to do if tracing got disabled. */
1417 	if (!decoder->enabled)
1418 		return pt_blk_clear_postponed_insn(decoder);
1419 
1420 	status = pt_insn_next_ip(&decoder->ip, &decoder->insn, &decoder->iext);
1421 	if (status < 0) {
1422 		if (status != -pte_bad_query)
1423 			return status;
1424 
1425 		status = pt_blk_proceed_with_trace(decoder, &decoder->insn,
1426 						   &decoder->iext);
1427 		if (status < 0)
1428 			return status;
1429 	}
1430 
1431 	return pt_blk_clear_postponed_insn(decoder);
1432 }
1433 
1434 /* Proceed to the next event.
1435  *
1436  * We have an event pending.  Proceed to the event location and indicate the
1437  * event to the user.
1438  *
1439  * On our way to the event location we may also be forced to postpone the event
1440  * to the next block, e.g. if we overflow the number of instructions in the
1441  * block or if we need trace in order to reach the event location.
1442  *
1443  * If we're not able to reach the event location, we return zero.  This is what
1444  * pt_blk_status() would return since:
1445  *
1446  *   - we suppress pts_eos as long as we're processing events
1447  *   - we do not set pts_ip_suppressed since tracing must be enabled
1448  *
1449  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
1450  * code otherwise.
1451  */
1452 static int pt_blk_proceed_event(struct pt_block_decoder *decoder,
1453 				struct pt_block *block)
1454 {
1455 	struct pt_insn_ext iext;
1456 	struct pt_insn insn;
1457 	struct pt_event *ev;
1458 	int status;
1459 
1460 	if (!decoder || !decoder->process_event || !block)
1461 		return -pte_internal;
1462 
1463 	ev = &decoder->event;
1464 	switch (ev->type) {
1465 	case ptev_enabled:
1466 		break;
1467 
1468 	case ptev_disabled:
1469 		status = pt_blk_proceed_to_disabled(decoder, block, &insn,
1470 						    &iext, ev);
1471 		if (status <= 0) {
1472 			/* A synchronous disable event also binds to the next
1473 			 * indirect or conditional branch, i.e. to any branch
1474 			 * that would have required trace.
1475 			 */
1476 			if (status != -pte_bad_query)
1477 				return status;
1478 
1479 			status = pt_blk_set_disable_resume_ip(decoder, &insn);
1480 			if (status < 0)
1481 				return status;
1482 		}
1483 
1484 		break;
1485 
1486 	case ptev_async_disabled:
1487 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1488 					      ev->variant.async_disabled.at);
1489 		if (status <= 0)
1490 			return status;
1491 
1492 		if (decoder->query.config.errata.skd022) {
1493 			status = pt_blk_handle_erratum_skd022(decoder, ev);
1494 			if (status != 0) {
1495 				if (status < 0)
1496 					return status;
1497 
1498 				/* If the erratum hits, we modify the event.
1499 				 * Try again.
1500 				 */
1501 				return pt_blk_proceed_event(decoder, block);
1502 			}
1503 		}
1504 
1505 		break;
1506 
1507 	case ptev_async_branch:
1508 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1509 					      ev->variant.async_branch.from);
1510 		if (status <= 0)
1511 			return status;
1512 
1513 		break;
1514 
1515 	case ptev_paging:
1516 		if (!decoder->enabled)
1517 			break;
1518 
1519 		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1520 						pt_insn_binds_to_pip);
1521 		if (status <= 0)
1522 			return status;
1523 
1524 		/* We bound a paging event.  Make sure we do not bind further
1525 		 * paging events to this instruction.
1526 		 */
1527 		decoder->bound_paging = 1;
1528 
1529 		return pt_blk_postpone_insn(decoder, &insn, &iext);
1530 
1531 	case ptev_async_paging:
1532 		status = pt_blk_proceed_to_async_paging(decoder, block, ev);
1533 		if (status <= 0)
1534 			return status;
1535 
1536 		break;
1537 
1538 	case ptev_vmcs:
1539 		if (!decoder->enabled)
1540 			break;
1541 
1542 		status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1543 						pt_insn_binds_to_vmcs);
1544 		if (status <= 0)
1545 			return status;
1546 
1547 		/* We bound a vmcs event.  Make sure we do not bind further vmcs
1548 		 * events to this instruction.
1549 		 */
1550 		decoder->bound_vmcs = 1;
1551 
1552 		return pt_blk_postpone_insn(decoder, &insn, &iext);
1553 
1554 	case ptev_async_vmcs:
1555 		status = pt_blk_proceed_to_async_vmcs(decoder, block, ev);
1556 		if (status <= 0)
1557 			return status;
1558 
1559 		break;
1560 
1561 	case ptev_overflow:
1562 		break;
1563 
1564 	case ptev_exec_mode:
1565 		status = pt_blk_proceed_to_exec_mode(decoder, block, ev);
1566 		if (status <= 0)
1567 			return status;
1568 
1569 		break;
1570 
1571 	case ptev_tsx:
1572 		if (ev->ip_suppressed)
1573 			break;
1574 
1575 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1576 					      ev->variant.tsx.ip);
1577 		if (status <= 0)
1578 			return status;
1579 
1580 		break;
1581 
1582 	case ptev_stop:
1583 		break;
1584 
1585 	case ptev_exstop:
1586 		if (!decoder->enabled || ev->ip_suppressed)
1587 			break;
1588 
1589 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1590 					      ev->variant.exstop.ip);
1591 		if (status <= 0)
1592 			return status;
1593 
1594 		break;
1595 
1596 	case ptev_mwait:
1597 		if (!decoder->enabled || ev->ip_suppressed)
1598 			break;
1599 
1600 		status = pt_blk_proceed_to_ip(decoder, block, &insn, &iext,
1601 					      ev->variant.mwait.ip);
1602 		if (status <= 0)
1603 			return status;
1604 
1605 		break;
1606 
1607 	case ptev_pwre:
1608 	case ptev_pwrx:
1609 		break;
1610 
1611 	case ptev_ptwrite:
1612 		if (!decoder->enabled)
1613 			break;
1614 
1615 		status = pt_blk_proceed_to_ptwrite(decoder, block, &insn,
1616 						   &iext, ev);
1617 		if (status <= 0)
1618 			return status;
1619 
1620 		/* We bound a ptwrite event.  Make sure we do not bind further
1621 		 * ptwrite events to this instruction.
1622 		 */
1623 		decoder->bound_ptwrite = 1;
1624 
1625 		return pt_blk_postpone_insn(decoder, &insn, &iext);
1626 
1627 	case ptev_tick:
1628 	case ptev_cbr:
1629 	case ptev_mnt:
1630 		break;
1631 	}
1632 
1633 	return pt_blk_status(decoder, pts_event_pending);
1634 }
1635 
1636 /* Proceed to the next decision point without using the block cache.
1637  *
1638  * Tracing is enabled and we don't have an event pending.  Proceed as far as
1639  * we get without trace.  Stop when we either:
1640  *
1641  *   - need trace in order to continue
1642  *   - overflow the max number of instructions in a block
1643  *
1644  * We actually proceed one instruction further to get the start IP for the next
1645  * block.  This only updates @decoder's internal state, though.
1646  *
1647  * Returns zero on success, a negative error code otherwise.
1648  */
1649 static int pt_blk_proceed_no_event_uncached(struct pt_block_decoder *decoder,
1650 					    struct pt_block *block)
1651 {
1652 	struct pt_insn_ext iext;
1653 	struct pt_insn insn;
1654 	int status;
1655 
1656 	if (!decoder || !block)
1657 		return -pte_internal;
1658 
1659 	/* This is overly conservative, really.  We shouldn't get a bad-query
1660 	 * status unless we decoded at least one instruction successfully.
1661 	 */
1662 	memset(&insn, 0, sizeof(insn));
1663 	memset(&iext, 0, sizeof(iext));
1664 
1665 	/* Proceed as far as we get without trace. */
1666 	status = pt_blk_proceed_to_insn(decoder, block, &insn, &iext,
1667 					pt_insn_false);
1668 	if (status < 0) {
1669 		if (status != -pte_bad_query)
1670 			return status;
1671 
1672 		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
1673 	}
1674 
1675 	return 0;
1676 }
1677 
1678 /* Check if @ip is contained in @section loaded at @laddr.
1679  *
1680  * Returns non-zero if it is.
1681  * Returns zero if it isn't or of @section is NULL.
1682  */
1683 static inline int pt_blk_is_in_section(const struct pt_mapped_section *msec,
1684 				       uint64_t ip)
1685 {
1686 	uint64_t begin, end;
1687 
1688 	begin = pt_msec_begin(msec);
1689 	end = pt_msec_end(msec);
1690 
1691 	return (begin <= ip && ip < end);
1692 }
1693 
1694 /* Insert a trampoline block cache entry.
1695  *
1696  * Add a trampoline block cache entry at @ip to continue at @nip, where @nip
1697  * must be the next instruction after @ip.
1698  *
1699  * Both @ip and @nip must be section-relative
1700  *
1701  * Returns zero on success, a negative error code otherwise.
1702  */
1703 static inline int pt_blk_add_trampoline(struct pt_block_cache *bcache,
1704 					uint64_t ip, uint64_t nip,
1705 					enum pt_exec_mode mode)
1706 {
1707 	struct pt_bcache_entry bce;
1708 	int64_t disp;
1709 
1710 	/* The displacement from @ip to @nip for the trampoline. */
1711 	disp = (int64_t) (nip - ip);
1712 
1713 	memset(&bce, 0, sizeof(bce));
1714 	bce.displacement = (int32_t) disp;
1715 	bce.ninsn = 1;
1716 	bce.mode = mode;
1717 	bce.qualifier = ptbq_again;
1718 
1719 	/* If we can't reach @nip without overflowing the displacement field, we
1720 	 * have to stop and re-decode the instruction at @ip.
1721 	 */
1722 	if ((int64_t) bce.displacement != disp) {
1723 
1724 		memset(&bce, 0, sizeof(bce));
1725 		bce.ninsn = 1;
1726 		bce.mode = mode;
1727 		bce.qualifier = ptbq_decode;
1728 	}
1729 
1730 	return pt_bcache_add(bcache, ip, bce);
1731 }
1732 
1733 /* Insert a decode block cache entry.
1734  *
1735  * Add a decode block cache entry at @ioff.
1736  *
1737  * Returns zero on success, a negative error code otherwise.
1738  */
1739 static inline int pt_blk_add_decode(struct pt_block_cache *bcache,
1740 				    uint64_t ioff, enum pt_exec_mode mode)
1741 {
1742 	struct pt_bcache_entry bce;
1743 
1744 	memset(&bce, 0, sizeof(bce));
1745 	bce.ninsn = 1;
1746 	bce.mode = mode;
1747 	bce.qualifier = ptbq_decode;
1748 
1749 	return pt_bcache_add(bcache, ioff, bce);
1750 }
1751 
1752 enum {
1753 	/* The maximum number of steps when filling the block cache. */
1754 	bcache_fill_steps	= 0x400
1755 };
1756 
1757 /* Proceed to the next instruction and fill the block cache for @decoder->ip.
1758  *
1759  * Tracing is enabled and we don't have an event pending.  The current IP is not
1760  * yet cached.
1761  *
1762  * Proceed one instruction without using the block cache, then try to proceed
1763  * further using the block cache.
1764  *
1765  * On our way back, add a block cache entry for the IP before proceeding.  Note
1766  * that the recursion is bounded by @steps and ultimately by the maximum number
1767  * of instructions in a block.
1768  *
1769  * Returns zero on success, a negative error code otherwise.
1770  */
1771 static int
1772 pt_blk_proceed_no_event_fill_cache(struct pt_block_decoder *decoder,
1773 				   struct pt_block *block,
1774 				   struct pt_block_cache *bcache,
1775 				   const struct pt_mapped_section *msec,
1776 				   size_t steps)
1777 {
1778 	struct pt_bcache_entry bce;
1779 	struct pt_insn_ext iext;
1780 	struct pt_insn insn;
1781 	uint64_t nip, dip;
1782 	int64_t disp, ioff, noff;
1783 	int status;
1784 
1785 	if (!decoder || !steps)
1786 		return -pte_internal;
1787 
1788 	/* Proceed one instruction by decoding and examining it.
1789 	 *
1790 	 * Note that we also return on a status of zero that indicates that the
1791 	 * instruction didn't fit into @block.
1792 	 */
1793 	status = pt_blk_proceed_one_insn(decoder, block, &insn, &iext);
1794 	if (status <= 0)
1795 		return status;
1796 
1797 	ioff = pt_msec_unmap(msec, insn.ip);
1798 
1799 	/* Let's see if we can proceed to the next IP without trace.
1800 	 *
1801 	 * If we can't, this is certainly a decision point.
1802 	 */
1803 	status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
1804 	if (status < 0) {
1805 		if (status != -pte_bad_query)
1806 			return status;
1807 
1808 		memset(&bce, 0, sizeof(bce));
1809 		bce.ninsn = 1;
1810 		bce.mode = insn.mode;
1811 		bce.isize = insn.size;
1812 
1813 		/* Clear the instruction size in case of overflows. */
1814 		if ((uint8_t) bce.isize != insn.size)
1815 			bce.isize = 0;
1816 
1817 		switch (insn.iclass) {
1818 		case ptic_ptwrite:
1819 		case ptic_error:
1820 		case ptic_other:
1821 			return -pte_internal;
1822 
1823 		case ptic_jump:
1824 			/* A direct jump doesn't require trace. */
1825 			if (iext.variant.branch.is_direct)
1826 				return -pte_internal;
1827 
1828 			bce.qualifier = ptbq_indirect;
1829 			break;
1830 
1831 		case ptic_call:
1832 			/* A direct call doesn't require trace. */
1833 			if (iext.variant.branch.is_direct)
1834 				return -pte_internal;
1835 
1836 			bce.qualifier = ptbq_ind_call;
1837 			break;
1838 
1839 		case ptic_return:
1840 			bce.qualifier = ptbq_return;
1841 			break;
1842 
1843 		case ptic_cond_jump:
1844 			bce.qualifier = ptbq_cond;
1845 			break;
1846 
1847 		case ptic_far_call:
1848 		case ptic_far_return:
1849 		case ptic_far_jump:
1850 			bce.qualifier = ptbq_indirect;
1851 			break;
1852 		}
1853 
1854 		/* If the block was truncated, we have to decode its last
1855 		 * instruction each time.
1856 		 *
1857 		 * We could have skipped the above switch and size assignment in
1858 		 * this case but this is already a slow and hopefully infrequent
1859 		 * path.
1860 		 */
1861 		if (block->truncated)
1862 			bce.qualifier = ptbq_decode;
1863 
1864 		status = pt_bcache_add(bcache, ioff, bce);
1865 		if (status < 0)
1866 			return status;
1867 
1868 		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
1869 	}
1870 
1871 	/* The next instruction's IP. */
1872 	nip = decoder->ip;
1873 	noff = pt_msec_unmap(msec, nip);
1874 
1875 	/* Even if we were able to proceed without trace, we might have to stop
1876 	 * here for various reasons:
1877 	 *
1878 	 *   - at near direct calls to update the return-address stack
1879 	 *
1880 	 *     We are forced to re-decode @insn to get the branch displacement.
1881 	 *
1882 	 *     Even though it is constant, we don't cache it to avoid increasing
1883 	 *     the size of a cache entry.  Note that the displacement field is
1884 	 *     zero for this entry and we might be tempted to use it - but other
1885 	 *     entries that point to this decision point will have non-zero
1886 	 *     displacement.
1887 	 *
1888 	 *     We could proceed after a near direct call but we migh as well
1889 	 *     postpone it to the next iteration.  Make sure to end the block if
1890 	 *     @decoder->flags.variant.block.end_on_call is set, though.
1891 	 *
1892 	 *   - at near direct backwards jumps to detect section splits
1893 	 *
1894 	 *     In case the current section is split underneath us, we must take
1895 	 *     care to detect that split.
1896 	 *
1897 	 *     There is one corner case where the split is in the middle of a
1898 	 *     linear sequence of instructions that branches back into the
1899 	 *     originating section.
1900 	 *
1901 	 *     Calls, indirect branches, and far branches are already covered
1902 	 *     since they either require trace or already require us to stop
1903 	 *     (i.e. near direct calls) for other reasons.  That leaves near
1904 	 *     direct backward jumps.
1905 	 *
1906 	 *     Instead of the decode stop at the jump instruction we're using we
1907 	 *     could have made sure that other block cache entries that extend
1908 	 *     this one insert a trampoline to the jump's entry.  This would
1909 	 *     have been a bit more complicated.
1910 	 *
1911 	 *   - if we switched sections
1912 	 *
1913 	 *     This ends a block just like a branch that requires trace.
1914 	 *
1915 	 *     We need to re-decode @insn in order to determine the start IP of
1916 	 *     the next block.
1917 	 *
1918 	 *   - if the block is truncated
1919 	 *
1920 	 *     We need to read the last instruction's memory from multiple
1921 	 *     sections and provide it to the user.
1922 	 *
1923 	 *     We could still use the block cache but then we'd have to handle
1924 	 *     this case for each qualifier.  Truncation is hopefully rare and
1925 	 *     having to read the memory for the instruction from multiple
1926 	 *     sections is already slow.  Let's rather keep things simple and
1927 	 *     route it through the decode flow, where we already have
1928 	 *     everything in place.
1929 	 */
1930 	switch (insn.iclass) {
1931 	case ptic_call:
1932 		return pt_blk_add_decode(bcache, ioff, insn.mode);
1933 
1934 	case ptic_jump:
1935 		/* An indirect branch requires trace and should have been
1936 		 * handled above.
1937 		 */
1938 		if (!iext.variant.branch.is_direct)
1939 			return -pte_internal;
1940 
1941 		if (iext.variant.branch.displacement < 0 ||
1942 		    decoder->flags.variant.block.end_on_jump)
1943 			return pt_blk_add_decode(bcache, ioff, insn.mode);
1944 
1945 		fallthrough;
1946 	default:
1947 		if (!pt_blk_is_in_section(msec, nip) || block->truncated)
1948 			return pt_blk_add_decode(bcache, ioff, insn.mode);
1949 
1950 		break;
1951 	}
1952 
1953 	/* We proceeded one instruction.  Let's see if we have a cache entry for
1954 	 * the next instruction.
1955 	 */
1956 	status = pt_bcache_lookup(&bce, bcache, noff);
1957 	if (status < 0)
1958 		return status;
1959 
1960 	/* If we don't have a valid cache entry, yet, fill the cache some more.
1961 	 *
1962 	 * On our way back, we add a cache entry for this instruction based on
1963 	 * the cache entry of the succeeding instruction.
1964 	 */
1965 	if (!pt_bce_is_valid(bce)) {
1966 		/* If we exceeded the maximum number of allowed steps, we insert
1967 		 * a trampoline to the next instruction.
1968 		 *
1969 		 * The next time we encounter the same code, we will use the
1970 		 * trampoline to jump directly to where we left off this time
1971 		 * and continue from there.
1972 		 */
1973 		steps -= 1;
1974 		if (!steps)
1975 			return pt_blk_add_trampoline(bcache, ioff, noff,
1976 						     insn.mode);
1977 
1978 		status = pt_blk_proceed_no_event_fill_cache(decoder, block,
1979 							    bcache, msec,
1980 							    steps);
1981 		if (status < 0)
1982 			return status;
1983 
1984 		/* Let's see if we have more luck this time. */
1985 		status = pt_bcache_lookup(&bce, bcache, noff);
1986 		if (status < 0)
1987 			return status;
1988 
1989 		/* If we still don't have a valid cache entry, we're done.  Most
1990 		 * likely, @block overflowed and we couldn't proceed past the
1991 		 * next instruction.
1992 		 */
1993 		if (!pt_bce_is_valid(bce))
1994 			return 0;
1995 	}
1996 
1997 	/* We must not have switched execution modes.
1998 	 *
1999 	 * This would require an event and we're on the no-event flow.
2000 	 */
2001 	if (pt_bce_exec_mode(bce) != insn.mode)
2002 		return -pte_internal;
2003 
2004 	/* The decision point IP and the displacement from @insn.ip. */
2005 	dip = nip + bce.displacement;
2006 	disp = (int64_t) (dip - insn.ip);
2007 
2008 	/* We may have switched sections if the section was split.  See
2009 	 * pt_blk_proceed_no_event_cached() for a more elaborate comment.
2010 	 *
2011 	 * We're not adding a block cache entry since this won't apply to the
2012 	 * original section which may be shared with other decoders.
2013 	 *
2014 	 * We will instead take the slow path until the end of the section.
2015 	 */
2016 	if (!pt_blk_is_in_section(msec, dip))
2017 		return 0;
2018 
2019 	/* Let's try to reach @nip's decision point from @insn.ip.
2020 	 *
2021 	 * There are two fields that may overflow: @bce.ninsn and
2022 	 * @bce.displacement.
2023 	 */
2024 	bce.ninsn += 1;
2025 	bce.displacement = (int32_t) disp;
2026 
2027 	/* If none of them overflowed, we're done.
2028 	 *
2029 	 * If one or both overflowed, let's try to insert a trampoline, i.e. we
2030 	 * try to reach @dip via a ptbq_again entry to @nip.
2031 	 */
2032 	if (!bce.ninsn || ((int64_t) bce.displacement != disp))
2033 		return pt_blk_add_trampoline(bcache, ioff, noff, insn.mode);
2034 
2035 	/* We're done.  Add the cache entry.
2036 	 *
2037 	 * There's a chance that other decoders updated the cache entry in the
2038 	 * meantime.  They should have come to the same conclusion as we,
2039 	 * though, and the cache entries should be identical.
2040 	 *
2041 	 * Cache updates are atomic so even if the two versions were not
2042 	 * identical, we wouldn't care because they are both correct.
2043 	 */
2044 	return pt_bcache_add(bcache, ioff, bce);
2045 }
2046 
2047 /* Proceed at a potentially truncated instruction.
2048  *
2049  * We were not able to decode the instruction at @decoder->ip in @decoder's
2050  * cached section.  This is typically caused by not having enough bytes.
2051  *
2052  * Try to decode the instruction again using the entire image.  If this succeeds
2053  * we expect to end up with an instruction that was truncated in the section it
2054  * started.  We provide the full instruction in this case and end the block.
2055  *
2056  * Returns zero on success, a negative error code otherwise.
2057  */
2058 static int pt_blk_proceed_truncated(struct pt_block_decoder *decoder,
2059 				    struct pt_block *block)
2060 {
2061 	struct pt_insn_ext iext;
2062 	struct pt_insn insn;
2063 	int errcode;
2064 
2065 	if (!decoder || !block)
2066 		return -pte_internal;
2067 
2068 	memset(&iext, 0, sizeof(iext));
2069 	memset(&insn, 0, sizeof(insn));
2070 
2071 	insn.mode = decoder->mode;
2072 	insn.ip = decoder->ip;
2073 
2074 	errcode = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
2075 	if (errcode < 0)
2076 		return errcode;
2077 
2078 	/* We shouldn't use this function if the instruction isn't truncated. */
2079 	if (!insn.truncated)
2080 		return -pte_internal;
2081 
2082 	/* Provide the instruction in the block.  This ends the block. */
2083 	memcpy(block->raw, insn.raw, insn.size);
2084 	block->iclass = insn.iclass;
2085 	block->size = insn.size;
2086 	block->truncated = 1;
2087 
2088 	/* Log calls' return addresses for return compression. */
2089 	errcode = pt_blk_log_call(decoder, &insn, &iext);
2090 	if (errcode < 0)
2091 		return errcode;
2092 
2093 	/* Let's see if we can proceed to the next IP without trace.
2094 	 *
2095 	 * The truncated instruction ends the block but we still need to get the
2096 	 * next block's start IP.
2097 	 */
2098 	errcode = pt_insn_next_ip(&decoder->ip, &insn, &iext);
2099 	if (errcode < 0) {
2100 		if (errcode != -pte_bad_query)
2101 			return errcode;
2102 
2103 		return pt_blk_proceed_with_trace(decoder, &insn, &iext);
2104 	}
2105 
2106 	return 0;
2107 }
2108 
2109 /* Proceed to the next decision point using the block cache.
2110  *
2111  * Tracing is enabled and we don't have an event pending.  We already set
2112  * @block's isid.  All reads are done within @msec as we're not switching
2113  * sections between blocks.
2114  *
2115  * Proceed as far as we get without trace.  Stop when we either:
2116  *
2117  *   - need trace in order to continue
2118  *   - overflow the max number of instructions in a block
2119  *
2120  * We actually proceed one instruction further to get the start IP for the next
2121  * block.  This only updates @decoder's internal state, though.
2122  *
2123  * Returns zero on success, a negative error code otherwise.
2124  */
2125 static int pt_blk_proceed_no_event_cached(struct pt_block_decoder *decoder,
2126 					  struct pt_block *block,
2127 					  struct pt_block_cache *bcache,
2128 					  const struct pt_mapped_section *msec)
2129 {
2130 	struct pt_bcache_entry bce;
2131 	uint16_t binsn, ninsn;
2132 	uint64_t offset, nip;
2133 	int status;
2134 
2135 	if (!decoder || !block)
2136 		return -pte_internal;
2137 
2138 	offset = pt_msec_unmap(msec, decoder->ip);
2139 	status = pt_bcache_lookup(&bce, bcache, offset);
2140 	if (status < 0)
2141 		return status;
2142 
2143 	/* If we don't find a valid cache entry, fill the cache. */
2144 	if (!pt_bce_is_valid(bce))
2145 		return pt_blk_proceed_no_event_fill_cache(decoder, block,
2146 							  bcache, msec,
2147 							  bcache_fill_steps);
2148 
2149 	/* If we switched sections, the origianl section must have been split
2150 	 * underneath us.  A split preserves the block cache of the original
2151 	 * section.
2152 	 *
2153 	 * Crossing sections requires ending the block so we can indicate the
2154 	 * proper isid for the entire block.
2155 	 *
2156 	 * Plus there's the chance that the new section that caused the original
2157 	 * section to split changed instructions.
2158 	 *
2159 	 * This check will also cover changes to a linear sequence of code we
2160 	 * would otherwise have jumped over as long as the start and end are in
2161 	 * different sub-sections.
2162 	 *
2163 	 * Since we stop on every (backwards) branch (through an artificial stop
2164 	 * in the case of a near direct backward branch) we will detect all
2165 	 * section splits.
2166 	 *
2167 	 * Switch to the slow path until we reach the end of this section.
2168 	 */
2169 	nip = decoder->ip + bce.displacement;
2170 	if (!pt_blk_is_in_section(msec, nip))
2171 		return pt_blk_proceed_no_event_uncached(decoder, block);
2172 
2173 	/* We have a valid cache entry.  Let's first check if the way to the
2174 	 * decision point still fits into @block.
2175 	 *
2176 	 * If it doesn't, we end the block without filling it as much as we
2177 	 * could since this would require us to switch to the slow path.
2178 	 *
2179 	 * On the next iteration, we will start with an empty block, which is
2180 	 * guaranteed to have enough room for at least one block cache entry.
2181 	 */
2182 	binsn = block->ninsn;
2183 	ninsn = binsn + (uint16_t) bce.ninsn;
2184 	if (ninsn < binsn)
2185 		return 0;
2186 
2187 	/* Jump ahead to the decision point and proceed from there.
2188 	 *
2189 	 * We're not switching execution modes so even if @block already has an
2190 	 * execution mode, it will be the one we're going to set.
2191 	 */
2192 	decoder->ip = nip;
2193 
2194 	/* We don't know the instruction class so we should be setting it to
2195 	 * ptic_error.  Since we will be able to fill it back in later in most
2196 	 * cases, we move the clearing to the switch cases that don't.
2197 	 */
2198 	block->end_ip = nip;
2199 	block->ninsn = ninsn;
2200 	block->mode = pt_bce_exec_mode(bce);
2201 
2202 
2203 	switch (pt_bce_qualifier(bce)) {
2204 	case ptbq_again:
2205 		/* We're not able to reach the actual decision point due to
2206 		 * overflows so we inserted a trampoline.
2207 		 *
2208 		 * We don't know the instruction and it is not guaranteed that
2209 		 * we will proceed further (e.g. if @block overflowed).  Let's
2210 		 * clear any previously stored instruction class which has
2211 		 * become invalid when we updated @block->ninsn.
2212 		 */
2213 		block->iclass = ptic_error;
2214 
2215 		return pt_blk_proceed_no_event_cached(decoder, block, bcache,
2216 						      msec);
2217 
2218 	case ptbq_cond:
2219 		/* We're at a conditional branch. */
2220 		block->iclass = ptic_cond_jump;
2221 
2222 		/* Let's first check whether we know the size of the
2223 		 * instruction.  If we do, we might get away without decoding
2224 		 * the instruction.
2225 		 *
2226 		 * If we don't know the size we might as well do the full decode
2227 		 * and proceed-with-trace flow we do for ptbq_decode.
2228 		 */
2229 		if (bce.isize) {
2230 			uint64_t ip;
2231 			int taken;
2232 
2233 			/* If the branch is not taken, we don't need to decode
2234 			 * the instruction at @decoder->ip.
2235 			 *
2236 			 * If it is taken, we have to implement everything here.
2237 			 * We can't use the normal decode and proceed-with-trace
2238 			 * flow since we already consumed the TNT bit.
2239 			 */
2240 			status = pt_blk_cond_branch(decoder, &taken);
2241 			if (status < 0)
2242 				return status;
2243 
2244 			/* Preserve the query decoder's response which indicates
2245 			 * upcoming events.
2246 			 */
2247 			decoder->status = status;
2248 
2249 			ip = decoder->ip;
2250 			if (taken) {
2251 				struct pt_insn_ext iext;
2252 				struct pt_insn insn;
2253 
2254 				memset(&iext, 0, sizeof(iext));
2255 				memset(&insn, 0, sizeof(insn));
2256 
2257 				insn.mode = pt_bce_exec_mode(bce);
2258 				insn.ip = ip;
2259 
2260 				status = pt_blk_decode_in_section(&insn, &iext,
2261 								  msec);
2262 				if (status < 0)
2263 					return status;
2264 
2265 				ip += iext.variant.branch.displacement;
2266 			}
2267 
2268 			decoder->ip = ip + bce.isize;
2269 			break;
2270 		}
2271 
2272 		fallthrough;
2273 	case ptbq_decode: {
2274 		struct pt_insn_ext iext;
2275 		struct pt_insn insn;
2276 
2277 		/* We need to decode the instruction at @decoder->ip and decide
2278 		 * what to do based on that.
2279 		 *
2280 		 * We already accounted for the instruction so we can't just
2281 		 * call pt_blk_proceed_one_insn().
2282 		 */
2283 
2284 		memset(&iext, 0, sizeof(iext));
2285 		memset(&insn, 0, sizeof(insn));
2286 
2287 		insn.mode = pt_bce_exec_mode(bce);
2288 		insn.ip = decoder->ip;
2289 
2290 		status = pt_blk_decode_in_section(&insn, &iext, msec);
2291 		if (status < 0) {
2292 			if (status != -pte_bad_insn)
2293 				return status;
2294 
2295 			return pt_blk_proceed_truncated(decoder, block);
2296 		}
2297 
2298 		/* We just decoded @insn so we know the instruction class. */
2299 		block->iclass = insn.iclass;
2300 
2301 		/* Log calls' return addresses for return compression. */
2302 		status = pt_blk_log_call(decoder, &insn, &iext);
2303 		if (status < 0)
2304 			return status;
2305 
2306 		/* Let's see if we can proceed to the next IP without trace.
2307 		 *
2308 		 * Note that we also stop due to displacement overflows or to
2309 		 * maintain the return-address stack for near direct calls.
2310 		 */
2311 		status = pt_insn_next_ip(&decoder->ip, &insn, &iext);
2312 		if (status < 0) {
2313 			if (status != -pte_bad_query)
2314 				return status;
2315 
2316 			/* We can't, so let's proceed with trace, which
2317 			 * completes the block.
2318 			 */
2319 			return pt_blk_proceed_with_trace(decoder, &insn, &iext);
2320 		}
2321 
2322 		/* End the block if the user asked us to.
2323 		 *
2324 		 * We only need to take care about direct near branches.
2325 		 * Indirect and far branches require trace and will naturally
2326 		 * end a block.
2327 		 */
2328 		if ((decoder->flags.variant.block.end_on_call &&
2329 		     (insn.iclass == ptic_call)) ||
2330 		    (decoder->flags.variant.block.end_on_jump &&
2331 		     (insn.iclass == ptic_jump)))
2332 			break;
2333 
2334 		/* If we can proceed without trace and we stay in @msec we may
2335 		 * proceed further.
2336 		 *
2337 		 * We're done if we switch sections, though.
2338 		 */
2339 		if (!pt_blk_is_in_section(msec, decoder->ip))
2340 			break;
2341 
2342 		return pt_blk_proceed_no_event_cached(decoder, block, bcache,
2343 						      msec);
2344 	}
2345 
2346 	case ptbq_ind_call: {
2347 		uint64_t ip;
2348 
2349 		/* We're at a near indirect call. */
2350 		block->iclass = ptic_call;
2351 
2352 		/* We need to update the return-address stack and query the
2353 		 * destination IP.
2354 		 */
2355 		ip = decoder->ip;
2356 
2357 		/* If we already know the size of the instruction, we don't need
2358 		 * to re-decode it.
2359 		 */
2360 		if (bce.isize)
2361 			ip += bce.isize;
2362 		else {
2363 			struct pt_insn_ext iext;
2364 			struct pt_insn insn;
2365 
2366 			memset(&iext, 0, sizeof(iext));
2367 			memset(&insn, 0, sizeof(insn));
2368 
2369 			insn.mode = pt_bce_exec_mode(bce);
2370 			insn.ip = ip;
2371 
2372 			status = pt_blk_decode_in_section(&insn, &iext, msec);
2373 			if (status < 0)
2374 				return status;
2375 
2376 			ip += insn.size;
2377 		}
2378 
2379 		status = pt_retstack_push(&decoder->retstack, ip);
2380 		if (status < 0)
2381 			return status;
2382 
2383 		status = pt_blk_indirect_branch(decoder, &decoder->ip);
2384 		if (status < 0)
2385 			return status;
2386 
2387 		/* Preserve the query decoder's response which indicates
2388 		 * upcoming events.
2389 		 */
2390 		decoder->status = status;
2391 		break;
2392 	}
2393 
2394 	case ptbq_return: {
2395 		int taken;
2396 
2397 		/* We're at a near return. */
2398 		block->iclass = ptic_return;
2399 
2400 		/* Check for a compressed return. */
2401 		status = pt_blk_cond_branch(decoder, &taken);
2402 		if (status < 0) {
2403 			if (status != -pte_bad_query)
2404 				return status;
2405 
2406 			/* The return is not compressed.  We need another query
2407 			 * to determine the destination IP.
2408 			 */
2409 			status = pt_blk_indirect_branch(decoder, &decoder->ip);
2410 			if (status < 0)
2411 				return status;
2412 
2413 			/* Preserve the query decoder's response which indicates
2414 			 * upcoming events.
2415 			 */
2416 			decoder->status = status;
2417 			break;
2418 		}
2419 
2420 		/* Preserve the query decoder's response which indicates
2421 		 * upcoming events.
2422 		 */
2423 		decoder->status = status;
2424 
2425 		/* A compressed return is indicated by a taken conditional
2426 		 * branch.
2427 		 */
2428 		if (!taken)
2429 			return -pte_bad_retcomp;
2430 
2431 		return pt_retstack_pop(&decoder->retstack, &decoder->ip);
2432 	}
2433 
2434 	case ptbq_indirect:
2435 		/* We're at an indirect jump or far transfer.
2436 		 *
2437 		 * We don't know the exact instruction class and there's no
2438 		 * reason to decode the instruction for any other purpose.
2439 		 *
2440 		 * Indicate that we don't know the instruction class and leave
2441 		 * it to our caller to decode the instruction if needed.
2442 		 */
2443 		block->iclass = ptic_error;
2444 
2445 		/* This is neither a near call nor return so we don't need to
2446 		 * touch the return-address stack.
2447 		 *
2448 		 * Just query the destination IP.
2449 		 */
2450 		status = pt_blk_indirect_branch(decoder, &decoder->ip);
2451 		if (status < 0)
2452 			return status;
2453 
2454 		/* Preserve the query decoder's response which indicates
2455 		 * upcoming events.
2456 		 */
2457 		decoder->status = status;
2458 		break;
2459 	}
2460 
2461 	return 0;
2462 }
2463 
2464 static int pt_blk_msec_fill(struct pt_block_decoder *decoder,
2465 			    const struct pt_mapped_section **pmsec)
2466 {
2467 	const struct pt_mapped_section *msec;
2468 	struct pt_section *section;
2469 	int isid, errcode;
2470 
2471 	if (!decoder || !pmsec)
2472 		return -pte_internal;
2473 
2474 	isid = pt_msec_cache_fill(&decoder->scache, &msec,  decoder->image,
2475 				  &decoder->asid, decoder->ip);
2476 	if (isid < 0)
2477 		return isid;
2478 
2479 	section = pt_msec_section(msec);
2480 	if (!section)
2481 		return -pte_internal;
2482 
2483 	*pmsec = msec;
2484 
2485 	errcode = pt_section_request_bcache(section);
2486 	if (errcode < 0)
2487 		return errcode;
2488 
2489 	return isid;
2490 }
2491 
2492 static inline int pt_blk_msec_lookup(struct pt_block_decoder *decoder,
2493 				     const struct pt_mapped_section **pmsec)
2494 {
2495 	int isid;
2496 
2497 	if (!decoder)
2498 		return -pte_internal;
2499 
2500 	isid = pt_msec_cache_read(&decoder->scache, pmsec, decoder->image,
2501 				  decoder->ip);
2502 	if (isid < 0) {
2503 		if (isid != -pte_nomap)
2504 			return isid;
2505 
2506 		return pt_blk_msec_fill(decoder, pmsec);
2507 	}
2508 
2509 	return isid;
2510 }
2511 
2512 /* Proceed to the next decision point - try using the cache.
2513  *
2514  * Tracing is enabled and we don't have an event pending.  Proceed as far as
2515  * we get without trace.  Stop when we either:
2516  *
2517  *   - need trace in order to continue
2518  *   - overflow the max number of instructions in a block
2519  *
2520  * We actually proceed one instruction further to get the start IP for the next
2521  * block.  This only updates @decoder's internal state, though.
2522  *
2523  * Returns zero on success, a negative error code otherwise.
2524  */
2525 static int pt_blk_proceed_no_event(struct pt_block_decoder *decoder,
2526 				   struct pt_block *block)
2527 {
2528 	const struct pt_mapped_section *msec;
2529 	struct pt_block_cache *bcache;
2530 	struct pt_section *section;
2531 	int isid;
2532 
2533 	if (!decoder || !block)
2534 		return -pte_internal;
2535 
2536 	isid = pt_blk_msec_lookup(decoder, &msec);
2537 	if (isid < 0) {
2538 		if (isid != -pte_nomap)
2539 			return isid;
2540 
2541 		/* Even if there is no such section in the image, we may still
2542 		 * read the memory via the callback function.
2543 		 */
2544 		return pt_blk_proceed_no_event_uncached(decoder, block);
2545 	}
2546 
2547 	/* We do not switch sections inside a block. */
2548 	if (isid != block->isid) {
2549 		if (!pt_blk_block_is_empty(block))
2550 			return 0;
2551 
2552 		block->isid = isid;
2553 	}
2554 
2555 	section = pt_msec_section(msec);
2556 	if (!section)
2557 		return -pte_internal;
2558 
2559 	bcache = pt_section_bcache(section);
2560 	if (!bcache)
2561 		return pt_blk_proceed_no_event_uncached(decoder, block);
2562 
2563 	return pt_blk_proceed_no_event_cached(decoder, block, bcache, msec);
2564 }
2565 
2566 /* Proceed to the next event or decision point.
2567  *
2568  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
2569  * code otherwise.
2570  */
2571 static int pt_blk_proceed(struct pt_block_decoder *decoder,
2572 			  struct pt_block *block)
2573 {
2574 	int status;
2575 
2576 	status = pt_blk_fetch_event(decoder);
2577 	if (status != 0) {
2578 		if (status < 0)
2579 			return status;
2580 
2581 		return pt_blk_proceed_event(decoder, block);
2582 	}
2583 
2584 	/* If tracing is disabled we should either be out of trace or we should
2585 	 * have taken the event flow above.
2586 	 */
2587 	if (!decoder->enabled) {
2588 		if (decoder->status & pts_eos)
2589 			return -pte_eos;
2590 
2591 		return -pte_no_enable;
2592 	}
2593 
2594 	status = pt_blk_proceed_no_event(decoder, block);
2595 	if (status < 0)
2596 		return status;
2597 
2598 	return pt_blk_proceed_trailing_event(decoder, block);
2599 }
2600 
2601 enum {
2602 	/* The maximum number of steps to take when determining whether the
2603 	 * event location can be reached.
2604 	 */
2605 	bdm64_max_steps	= 0x100
2606 };
2607 
2608 /* Try to work around erratum BDM64.
2609  *
2610  * If we got a transaction abort immediately following a branch that produced
2611  * trace, the trace for that branch might have been corrupted.
2612  *
2613  * Returns a positive integer if the erratum was handled.
2614  * Returns zero if the erratum does not seem to apply.
2615  * Returns a negative error code otherwise.
2616  */
2617 static int pt_blk_handle_erratum_bdm64(struct pt_block_decoder *decoder,
2618 				       const struct pt_block *block,
2619 				       const struct pt_event *ev)
2620 {
2621 	struct pt_insn_ext iext;
2622 	struct pt_insn insn;
2623 	int status;
2624 
2625 	if (!decoder || !block || !ev)
2626 		return -pte_internal;
2627 
2628 	/* This only affects aborts. */
2629 	if (!ev->variant.tsx.aborted)
2630 		return 0;
2631 
2632 	/* This only affects branches that require trace.
2633 	 *
2634 	 * If the erratum hits, that branch ended the current block and brought
2635 	 * us to the trailing event flow.
2636 	 */
2637 	if (pt_blk_block_is_empty(block))
2638 		return 0;
2639 
2640 	insn.mode = block->mode;
2641 	insn.ip = block->end_ip;
2642 
2643 	status = pt_insn_decode(&insn, &iext, decoder->image, &decoder->asid);
2644 	if (status < 0)
2645 		return 0;
2646 
2647 	if (!pt_insn_is_branch(&insn, &iext))
2648 		return 0;
2649 
2650 	/* Let's check if we can reach the event location from here.
2651 	 *
2652 	 * If we can, let's assume the erratum did not hit.  We might still be
2653 	 * wrong but we're not able to tell.
2654 	 */
2655 	status = pt_insn_range_is_contiguous(decoder->ip, ev->variant.tsx.ip,
2656 					     decoder->mode, decoder->image,
2657 					     &decoder->asid, bdm64_max_steps);
2658 	if (status > 0)
2659 		return status;
2660 
2661 	/* We can't reach the event location.  This could either mean that we
2662 	 * stopped too early (and status is zero) or that the erratum hit.
2663 	 *
2664 	 * We assume the latter and pretend that the previous branch brought us
2665 	 * to the event location, instead.
2666 	 */
2667 	decoder->ip = ev->variant.tsx.ip;
2668 
2669 	return 1;
2670 }
2671 
2672 /* Check whether a trailing TSX event should be postponed.
2673  *
2674  * This involves handling erratum BDM64.
2675  *
2676  * Returns a positive integer if the event is to be postponed.
2677  * Returns zero if the event should be processed.
2678  * Returns a negative error code otherwise.
2679  */
2680 static inline int pt_blk_postpone_trailing_tsx(struct pt_block_decoder *decoder,
2681 					       struct pt_block *block,
2682 					       const struct pt_event *ev)
2683 {
2684 	int status;
2685 
2686 	if (!decoder || !ev)
2687 		return -pte_internal;
2688 
2689 	if (ev->ip_suppressed)
2690 		return 0;
2691 
2692 	if (block && decoder->query.config.errata.bdm64) {
2693 		status = pt_blk_handle_erratum_bdm64(decoder, block, ev);
2694 		if (status < 0)
2695 			return 1;
2696 	}
2697 
2698 	if (decoder->ip != ev->variant.tsx.ip)
2699 		return 1;
2700 
2701 	return 0;
2702 }
2703 
2704 /* Proceed with events that bind to the current decoder IP.
2705  *
2706  * This function is used in the following scenarios:
2707  *
2708  *   - we just synchronized onto the trace stream
2709  *   - we ended a block and proceeded to the next IP
2710  *   - we processed an event that was indicated by this function
2711  *
2712  * Check if there is an event at the current IP that needs to be indicated to
2713  * the user.
2714  *
2715  * Returns a non-negative pt_status_flag bit-vector on success, a negative error
2716  * code otherwise.
2717  */
2718 static int pt_blk_proceed_trailing_event(struct pt_block_decoder *decoder,
2719 					 struct pt_block *block)
2720 {
2721 	struct pt_event *ev;
2722 	int status;
2723 
2724 	if (!decoder)
2725 		return -pte_internal;
2726 
2727 	status = pt_blk_fetch_event(decoder);
2728 	if (status <= 0) {
2729 		if (status < 0)
2730 			return status;
2731 
2732 		status = pt_blk_proceed_postponed_insn(decoder);
2733 		if (status < 0)
2734 			return status;
2735 
2736 		return pt_blk_status(decoder, 0);
2737 	}
2738 
2739 	ev = &decoder->event;
2740 	switch (ev->type) {
2741 	case ptev_disabled:
2742 		/* Synchronous disable events are normally indicated on the
2743 		 * event flow.
2744 		 */
2745 		if (!decoder->process_insn)
2746 			break;
2747 
2748 		/* A sync disable may bind to a CR3 changing instruction. */
2749 		if (ev->ip_suppressed &&
2750 		    pt_insn_changes_cr3(&decoder->insn, &decoder->iext))
2751 			return pt_blk_status(decoder, pts_event_pending);
2752 
2753 		/* Or it binds to the next branch that would require trace.
2754 		 *
2755 		 * Try to complete processing the current instruction by
2756 		 * proceeding past it.  If that fails because it would require
2757 		 * trace, we can apply the disabled event.
2758 		 */
2759 		status = pt_insn_next_ip(&decoder->ip, &decoder->insn,
2760 					 &decoder->iext);
2761 		if (status < 0) {
2762 			if (status != -pte_bad_query)
2763 				return status;
2764 
2765 			status = pt_blk_set_disable_resume_ip(decoder,
2766 							      &decoder->insn);
2767 			if (status < 0)
2768 				return status;
2769 
2770 			return pt_blk_status(decoder, pts_event_pending);
2771 		}
2772 
2773 		/* We proceeded past the current instruction. */
2774 		status = pt_blk_clear_postponed_insn(decoder);
2775 		if (status < 0)
2776 			return status;
2777 
2778 		/* This might have brought us to the disable IP. */
2779 		if (!ev->ip_suppressed &&
2780 		    decoder->ip == ev->variant.disabled.ip)
2781 			return pt_blk_status(decoder, pts_event_pending);
2782 
2783 		break;
2784 
2785 	case ptev_enabled:
2786 		/* This event does not bind to an instruction. */
2787 		status = pt_blk_proceed_postponed_insn(decoder);
2788 		if (status < 0)
2789 			return status;
2790 
2791 		return pt_blk_status(decoder, pts_event_pending);
2792 
2793 	case ptev_async_disabled:
2794 		/* This event does not bind to an instruction. */
2795 		status = pt_blk_proceed_postponed_insn(decoder);
2796 		if (status < 0)
2797 			return status;
2798 
2799 		if (decoder->ip != ev->variant.async_disabled.at)
2800 			break;
2801 
2802 		if (decoder->query.config.errata.skd022) {
2803 			status = pt_blk_handle_erratum_skd022(decoder, ev);
2804 			if (status != 0) {
2805 				if (status < 0)
2806 					return status;
2807 
2808 				/* If the erratum applies, the event is modified
2809 				 * to a synchronous disable event that will be
2810 				 * processed on the next pt_blk_proceed_event()
2811 				 * call.  We're done.
2812 				 */
2813 				break;
2814 			}
2815 		}
2816 
2817 		return pt_blk_status(decoder, pts_event_pending);
2818 
2819 	case ptev_async_branch:
2820 		/* This event does not bind to an instruction. */
2821 		status = pt_blk_proceed_postponed_insn(decoder);
2822 		if (status < 0)
2823 			return status;
2824 
2825 		if (decoder->ip != ev->variant.async_branch.from)
2826 			break;
2827 
2828 		return pt_blk_status(decoder, pts_event_pending);
2829 
2830 	case ptev_paging:
2831 		/* We apply the event immediately if we're not tracing. */
2832 		if (!decoder->enabled)
2833 			return pt_blk_status(decoder, pts_event_pending);
2834 
2835 		/* Synchronous paging events are normally indicated on the event
2836 		 * flow, unless they bind to the same instruction as a previous
2837 		 * event.
2838 		 *
2839 		 * We bind at most one paging event to an instruction, though.
2840 		 */
2841 		if (!decoder->process_insn || decoder->bound_paging)
2842 			break;
2843 
2844 		/* We're done if we're not binding to the currently postponed
2845 		 * instruction.  We will process the event on the normal event
2846 		 * flow in the next iteration.
2847 		 */
2848 		if (!pt_insn_binds_to_pip(&decoder->insn, &decoder->iext))
2849 			break;
2850 
2851 		/* We bound a paging event.  Make sure we do not bind further
2852 		 * paging events to this instruction.
2853 		 */
2854 		decoder->bound_paging = 1;
2855 
2856 		return pt_blk_status(decoder, pts_event_pending);
2857 
2858 	case ptev_async_paging:
2859 		/* This event does not bind to an instruction. */
2860 		status = pt_blk_proceed_postponed_insn(decoder);
2861 		if (status < 0)
2862 			return status;
2863 
2864 		if (!ev->ip_suppressed &&
2865 		    decoder->ip != ev->variant.async_paging.ip)
2866 			break;
2867 
2868 		return pt_blk_status(decoder, pts_event_pending);
2869 
2870 	case ptev_vmcs:
2871 		/* We apply the event immediately if we're not tracing. */
2872 		if (!decoder->enabled)
2873 			return pt_blk_status(decoder, pts_event_pending);
2874 
2875 		/* Synchronous vmcs events are normally indicated on the event
2876 		 * flow, unless they bind to the same instruction as a previous
2877 		 * event.
2878 		 *
2879 		 * We bind at most one vmcs event to an instruction, though.
2880 		 */
2881 		if (!decoder->process_insn || decoder->bound_vmcs)
2882 			break;
2883 
2884 		/* We're done if we're not binding to the currently postponed
2885 		 * instruction.  We will process the event on the normal event
2886 		 * flow in the next iteration.
2887 		 */
2888 		if (!pt_insn_binds_to_vmcs(&decoder->insn, &decoder->iext))
2889 			break;
2890 
2891 		/* We bound a vmcs event.  Make sure we do not bind further vmcs
2892 		 * events to this instruction.
2893 		 */
2894 		decoder->bound_vmcs = 1;
2895 
2896 		return pt_blk_status(decoder, pts_event_pending);
2897 
2898 	case ptev_async_vmcs:
2899 		/* This event does not bind to an instruction. */
2900 		status = pt_blk_proceed_postponed_insn(decoder);
2901 		if (status < 0)
2902 			return status;
2903 
2904 		if (!ev->ip_suppressed &&
2905 		    decoder->ip != ev->variant.async_vmcs.ip)
2906 			break;
2907 
2908 		return pt_blk_status(decoder, pts_event_pending);
2909 
2910 	case ptev_overflow:
2911 		/* This event does not bind to an instruction. */
2912 		status = pt_blk_proceed_postponed_insn(decoder);
2913 		if (status < 0)
2914 			return status;
2915 
2916 		return pt_blk_status(decoder, pts_event_pending);
2917 
2918 	case ptev_exec_mode:
2919 		/* This event does not bind to an instruction. */
2920 		status = pt_blk_proceed_postponed_insn(decoder);
2921 		if (status < 0)
2922 			return status;
2923 
2924 		if (!ev->ip_suppressed &&
2925 		    decoder->ip != ev->variant.exec_mode.ip)
2926 			break;
2927 
2928 		return pt_blk_status(decoder, pts_event_pending);
2929 
2930 	case ptev_tsx:
2931 		/* This event does not bind to an instruction. */
2932 		status = pt_blk_proceed_postponed_insn(decoder);
2933 		if (status < 0)
2934 			return status;
2935 
2936 		status = pt_blk_postpone_trailing_tsx(decoder, block, ev);
2937 		if (status != 0) {
2938 			if (status < 0)
2939 				return status;
2940 
2941 			break;
2942 		}
2943 
2944 		return pt_blk_status(decoder, pts_event_pending);
2945 
2946 	case ptev_stop:
2947 		/* This event does not bind to an instruction. */
2948 		status = pt_blk_proceed_postponed_insn(decoder);
2949 		if (status < 0)
2950 			return status;
2951 
2952 		return pt_blk_status(decoder, pts_event_pending);
2953 
2954 	case ptev_exstop:
2955 		/* This event does not bind to an instruction. */
2956 		status = pt_blk_proceed_postponed_insn(decoder);
2957 		if (status < 0)
2958 			return status;
2959 
2960 		if (!ev->ip_suppressed && decoder->enabled &&
2961 		    decoder->ip != ev->variant.exstop.ip)
2962 			break;
2963 
2964 		return pt_blk_status(decoder, pts_event_pending);
2965 
2966 	case ptev_mwait:
2967 		/* This event does not bind to an instruction. */
2968 		status = pt_blk_proceed_postponed_insn(decoder);
2969 		if (status < 0)
2970 			return status;
2971 
2972 		if (!ev->ip_suppressed && decoder->enabled &&
2973 		    decoder->ip != ev->variant.mwait.ip)
2974 			break;
2975 
2976 		return pt_blk_status(decoder, pts_event_pending);
2977 
2978 	case ptev_pwre:
2979 	case ptev_pwrx:
2980 		/* This event does not bind to an instruction. */
2981 		status = pt_blk_proceed_postponed_insn(decoder);
2982 		if (status < 0)
2983 			return status;
2984 
2985 		return pt_blk_status(decoder, pts_event_pending);
2986 
2987 	case ptev_ptwrite:
2988 		/* We apply the event immediately if we're not tracing. */
2989 		if (!decoder->enabled)
2990 			return pt_blk_status(decoder, pts_event_pending);
2991 
2992 		/* Ptwrite events are normally indicated on the event flow,
2993 		 * unless they bind to the same instruction as a previous event.
2994 		 *
2995 		 * We bind at most one ptwrite event to an instruction, though.
2996 		 */
2997 		if (!decoder->process_insn || decoder->bound_ptwrite)
2998 			break;
2999 
3000 		/* We're done if we're not binding to the currently postponed
3001 		 * instruction.  We will process the event on the normal event
3002 		 * flow in the next iteration.
3003 		 */
3004 		if (!ev->ip_suppressed ||
3005 		    !pt_insn_is_ptwrite(&decoder->insn, &decoder->iext))
3006 			break;
3007 
3008 		/* We bound a ptwrite event.  Make sure we do not bind further
3009 		 * ptwrite events to this instruction.
3010 		 */
3011 		decoder->bound_ptwrite = 1;
3012 
3013 		return pt_blk_status(decoder, pts_event_pending);
3014 
3015 	case ptev_tick:
3016 	case ptev_cbr:
3017 	case ptev_mnt:
3018 		/* This event does not bind to an instruction. */
3019 		status = pt_blk_proceed_postponed_insn(decoder);
3020 		if (status < 0)
3021 			return status;
3022 
3023 		return pt_blk_status(decoder, pts_event_pending);
3024 	}
3025 
3026 	/* No further events.  Proceed past any postponed instruction. */
3027 	status = pt_blk_proceed_postponed_insn(decoder);
3028 	if (status < 0)
3029 		return status;
3030 
3031 	return pt_blk_status(decoder, 0);
3032 }
3033 
3034 int pt_blk_next(struct pt_block_decoder *decoder, struct pt_block *ublock,
3035 		size_t size)
3036 {
3037 	struct pt_block block, *pblock;
3038 	int errcode, status;
3039 
3040 	if (!decoder || !ublock)
3041 		return -pte_invalid;
3042 
3043 	pblock = size == sizeof(block) ? ublock : &block;
3044 
3045 	/* Zero-initialize the block in case of error returns. */
3046 	memset(pblock, 0, sizeof(*pblock));
3047 
3048 	/* Fill in a few things from the current decode state.
3049 	 *
3050 	 * This reflects the state of the last pt_blk_next() or pt_blk_start()
3051 	 * call.  Note that, unless we stop with tracing disabled, we proceed
3052 	 * already to the start IP of the next block.
3053 	 *
3054 	 * Some of the state may later be overwritten as we process events.
3055 	 */
3056 	pblock->ip = decoder->ip;
3057 	pblock->mode = decoder->mode;
3058 	if (decoder->speculative)
3059 		pblock->speculative = 1;
3060 
3061 	/* Proceed one block. */
3062 	status = pt_blk_proceed(decoder, pblock);
3063 
3064 	errcode = block_to_user(ublock, size, pblock);
3065 	if (errcode < 0)
3066 		return errcode;
3067 
3068 	return status;
3069 }
3070 
3071 /* Process an enabled event.
3072  *
3073  * Returns zero on success, a negative error code otherwise.
3074  */
3075 static int pt_blk_process_enabled(struct pt_block_decoder *decoder,
3076 				  const struct pt_event *ev)
3077 {
3078 	if (!decoder || !ev)
3079 		return -pte_internal;
3080 
3081 	/* This event can't be a status update. */
3082 	if (ev->status_update)
3083 		return -pte_bad_context;
3084 
3085 	/* We must have an IP in order to start decoding. */
3086 	if (ev->ip_suppressed)
3087 		return -pte_noip;
3088 
3089 	/* We must currently be disabled. */
3090 	if (decoder->enabled)
3091 		return -pte_bad_context;
3092 
3093 	decoder->ip = ev->variant.enabled.ip;
3094 	decoder->enabled = 1;
3095 	decoder->process_event = 0;
3096 
3097 	return 0;
3098 }
3099 
3100 /* Process a disabled event.
3101  *
3102  * Returns zero on success, a negative error code otherwise.
3103  */
3104 static int pt_blk_process_disabled(struct pt_block_decoder *decoder,
3105 				   const struct pt_event *ev)
3106 {
3107 	if (!decoder || !ev)
3108 		return -pte_internal;
3109 
3110 	/* This event can't be a status update. */
3111 	if (ev->status_update)
3112 		return -pte_bad_context;
3113 
3114 	/* We must currently be enabled. */
3115 	if (!decoder->enabled)
3116 		return -pte_bad_context;
3117 
3118 	/* We preserve @decoder->ip.  This is where we expect tracing to resume
3119 	 * and we'll indicate that on the subsequent enabled event if tracing
3120 	 * actually does resume from there.
3121 	 */
3122 	decoder->enabled = 0;
3123 	decoder->process_event = 0;
3124 
3125 	return 0;
3126 }
3127 
3128 /* Process an asynchronous branch event.
3129  *
3130  * Returns zero on success, a negative error code otherwise.
3131  */
3132 static int pt_blk_process_async_branch(struct pt_block_decoder *decoder,
3133 				       const struct pt_event *ev)
3134 {
3135 	if (!decoder || !ev)
3136 		return -pte_internal;
3137 
3138 	/* This event can't be a status update. */
3139 	if (ev->status_update)
3140 		return -pte_bad_context;
3141 
3142 	/* We must currently be enabled. */
3143 	if (!decoder->enabled)
3144 		return -pte_bad_context;
3145 
3146 	/* Jump to the branch destination.  We will continue from there in the
3147 	 * next iteration.
3148 	 */
3149 	decoder->ip = ev->variant.async_branch.to;
3150 	decoder->process_event = 0;
3151 
3152 	return 0;
3153 }
3154 
3155 /* Process a paging event.
3156  *
3157  * Returns zero on success, a negative error code otherwise.
3158  */
3159 static int pt_blk_process_paging(struct pt_block_decoder *decoder,
3160 				 const struct pt_event *ev)
3161 {
3162 	uint64_t cr3;
3163 	int errcode;
3164 
3165 	if (!decoder || !ev)
3166 		return -pte_internal;
3167 
3168 	cr3 = ev->variant.paging.cr3;
3169 	if (decoder->asid.cr3 != cr3) {
3170 		errcode = pt_msec_cache_invalidate(&decoder->scache);
3171 		if (errcode < 0)
3172 			return errcode;
3173 
3174 		decoder->asid.cr3 = cr3;
3175 	}
3176 
3177 	decoder->process_event = 0;
3178 
3179 	return 0;
3180 }
3181 
3182 /* Process a vmcs event.
3183  *
3184  * Returns zero on success, a negative error code otherwise.
3185  */
3186 static int pt_blk_process_vmcs(struct pt_block_decoder *decoder,
3187 			       const struct pt_event *ev)
3188 {
3189 	uint64_t vmcs;
3190 	int errcode;
3191 
3192 	if (!decoder || !ev)
3193 		return -pte_internal;
3194 
3195 	vmcs = ev->variant.vmcs.base;
3196 	if (decoder->asid.vmcs != vmcs) {
3197 		errcode = pt_msec_cache_invalidate(&decoder->scache);
3198 		if (errcode < 0)
3199 			return errcode;
3200 
3201 		decoder->asid.vmcs = vmcs;
3202 	}
3203 
3204 	decoder->process_event = 0;
3205 
3206 	return 0;
3207 }
3208 
3209 /* Process an overflow event.
3210  *
3211  * Returns zero on success, a negative error code otherwise.
3212  */
3213 static int pt_blk_process_overflow(struct pt_block_decoder *decoder,
3214 				   const struct pt_event *ev)
3215 {
3216 	if (!decoder || !ev)
3217 		return -pte_internal;
3218 
3219 	/* This event can't be a status update. */
3220 	if (ev->status_update)
3221 		return -pte_bad_context;
3222 
3223 	/* If the IP is suppressed, the overflow resolved while tracing was
3224 	 * disabled.  Otherwise it resolved while tracing was enabled.
3225 	 */
3226 	if (ev->ip_suppressed) {
3227 		/* Tracing is disabled.  It doesn't make sense to preserve the
3228 		 * previous IP.  This will just be misleading.  Even if tracing
3229 		 * had been disabled before, as well, we might have missed the
3230 		 * re-enable in the overflow.
3231 		 */
3232 		decoder->enabled = 0;
3233 		decoder->ip = 0ull;
3234 	} else {
3235 		/* Tracing is enabled and we're at the IP at which the overflow
3236 		 * resolved.
3237 		 */
3238 		decoder->enabled = 1;
3239 		decoder->ip = ev->variant.overflow.ip;
3240 	}
3241 
3242 	/* We don't know the TSX state.  Let's assume we execute normally.
3243 	 *
3244 	 * We also don't know the execution mode.  Let's keep what we have
3245 	 * in case we don't get an update before we have to decode the next
3246 	 * instruction.
3247 	 */
3248 	decoder->speculative = 0;
3249 	decoder->process_event = 0;
3250 
3251 	return 0;
3252 }
3253 
3254 /* Process an exec mode event.
3255  *
3256  * Returns zero on success, a negative error code otherwise.
3257  */
3258 static int pt_blk_process_exec_mode(struct pt_block_decoder *decoder,
3259 				    const struct pt_event *ev)
3260 {
3261 	enum pt_exec_mode mode;
3262 
3263 	if (!decoder || !ev)
3264 		return -pte_internal;
3265 
3266 	/* Use status update events to diagnose inconsistencies. */
3267 	mode = ev->variant.exec_mode.mode;
3268 	if (ev->status_update && decoder->enabled &&
3269 	    decoder->mode != ptem_unknown && decoder->mode != mode)
3270 		return -pte_bad_status_update;
3271 
3272 	decoder->mode = mode;
3273 	decoder->process_event = 0;
3274 
3275 	return 0;
3276 }
3277 
3278 /* Process a tsx event.
3279  *
3280  * Returns zero on success, a negative error code otherwise.
3281  */
3282 static int pt_blk_process_tsx(struct pt_block_decoder *decoder,
3283 			      const struct pt_event *ev)
3284 {
3285 	if (!decoder || !ev)
3286 		return -pte_internal;
3287 
3288 	decoder->speculative = ev->variant.tsx.speculative;
3289 	decoder->process_event = 0;
3290 
3291 	return 0;
3292 }
3293 
3294 /* Process a stop event.
3295  *
3296  * Returns zero on success, a negative error code otherwise.
3297  */
3298 static int pt_blk_process_stop(struct pt_block_decoder *decoder,
3299 			       const struct pt_event *ev)
3300 {
3301 	if (!decoder || !ev)
3302 		return -pte_internal;
3303 
3304 	/* This event can't be a status update. */
3305 	if (ev->status_update)
3306 		return -pte_bad_context;
3307 
3308 	/* Tracing is always disabled before it is stopped. */
3309 	if (decoder->enabled)
3310 		return -pte_bad_context;
3311 
3312 	decoder->process_event = 0;
3313 
3314 	return 0;
3315 }
3316 
3317 int pt_blk_event(struct pt_block_decoder *decoder, struct pt_event *uevent,
3318 		 size_t size)
3319 {
3320 	struct pt_event *ev;
3321 	int status;
3322 
3323 	if (!decoder || !uevent)
3324 		return -pte_invalid;
3325 
3326 	/* We must currently process an event. */
3327 	if (!decoder->process_event)
3328 		return -pte_bad_query;
3329 
3330 	ev = &decoder->event;
3331 	switch (ev->type) {
3332 	case ptev_enabled:
3333 		/* Indicate that tracing resumes from the IP at which tracing
3334 		 * had been disabled before (with some special treatment for
3335 		 * calls).
3336 		 */
3337 		if (ev->variant.enabled.ip == decoder->ip)
3338 			ev->variant.enabled.resumed = 1;
3339 
3340 		status = pt_blk_process_enabled(decoder, ev);
3341 		if (status < 0)
3342 			return status;
3343 
3344 		break;
3345 
3346 	case ptev_async_disabled:
3347 		if (decoder->ip != ev->variant.async_disabled.at)
3348 			return -pte_bad_query;
3349 
3350 		fallthrough;
3351 	case ptev_disabled:
3352 
3353 		status = pt_blk_process_disabled(decoder, ev);
3354 		if (status < 0)
3355 			return status;
3356 
3357 		break;
3358 
3359 	case ptev_async_branch:
3360 		if (decoder->ip != ev->variant.async_branch.from)
3361 			return -pte_bad_query;
3362 
3363 		status = pt_blk_process_async_branch(decoder, ev);
3364 		if (status < 0)
3365 			return status;
3366 
3367 		break;
3368 
3369 	case ptev_async_paging:
3370 		if (!ev->ip_suppressed &&
3371 		    decoder->ip != ev->variant.async_paging.ip)
3372 			return -pte_bad_query;
3373 
3374 		fallthrough;
3375 	case ptev_paging:
3376 		status = pt_blk_process_paging(decoder, ev);
3377 		if (status < 0)
3378 			return status;
3379 
3380 		break;
3381 
3382 	case ptev_async_vmcs:
3383 		if (!ev->ip_suppressed &&
3384 		    decoder->ip != ev->variant.async_vmcs.ip)
3385 			return -pte_bad_query;
3386 
3387 		fallthrough;
3388 	case ptev_vmcs:
3389 		status = pt_blk_process_vmcs(decoder, ev);
3390 		if (status < 0)
3391 			return status;
3392 
3393 		break;
3394 
3395 	case ptev_overflow:
3396 		status = pt_blk_process_overflow(decoder, ev);
3397 		if (status < 0)
3398 			return status;
3399 
3400 		break;
3401 
3402 	case ptev_exec_mode:
3403 		if (!ev->ip_suppressed &&
3404 		    decoder->ip != ev->variant.exec_mode.ip)
3405 			return -pte_bad_query;
3406 
3407 		status = pt_blk_process_exec_mode(decoder, ev);
3408 		if (status < 0)
3409 			return status;
3410 
3411 		break;
3412 
3413 	case ptev_tsx:
3414 		if (!ev->ip_suppressed && decoder->ip != ev->variant.tsx.ip)
3415 			return -pte_bad_query;
3416 
3417 		status = pt_blk_process_tsx(decoder, ev);
3418 		if (status < 0)
3419 			return status;
3420 
3421 		break;
3422 
3423 	case ptev_stop:
3424 		status = pt_blk_process_stop(decoder, ev);
3425 		if (status < 0)
3426 			return status;
3427 
3428 		break;
3429 
3430 	case ptev_exstop:
3431 		if (!ev->ip_suppressed && decoder->enabled &&
3432 		    decoder->ip != ev->variant.exstop.ip)
3433 			return -pte_bad_query;
3434 
3435 		decoder->process_event = 0;
3436 		break;
3437 
3438 	case ptev_mwait:
3439 		if (!ev->ip_suppressed && decoder->enabled &&
3440 		    decoder->ip != ev->variant.mwait.ip)
3441 			return -pte_bad_query;
3442 
3443 		decoder->process_event = 0;
3444 		break;
3445 
3446 	case ptev_pwre:
3447 	case ptev_pwrx:
3448 	case ptev_ptwrite:
3449 	case ptev_tick:
3450 	case ptev_cbr:
3451 	case ptev_mnt:
3452 		decoder->process_event = 0;
3453 		break;
3454 	}
3455 
3456 	/* Copy the event to the user.  Make sure we're not writing beyond the
3457 	 * memory provided by the user.
3458 	 *
3459 	 * We might truncate details of an event but only for those events the
3460 	 * user can't know about, anyway.
3461 	 */
3462 	if (sizeof(*ev) < size)
3463 		size = sizeof(*ev);
3464 
3465 	memcpy(uevent, ev, size);
3466 
3467 	/* Indicate further events. */
3468 	return pt_blk_proceed_trailing_event(decoder, NULL);
3469 }
3470