1 /*
2 * Performance events:
3 *
4 * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
7 *
8 * Data type definitions, declarations, prototypes.
9 *
10 * Started by: Thomas Gleixner and Ingo Molnar
11 *
12 * For licencing details see kernel-base/COPYING
13 */
14 #ifndef _LINUX_PERF_EVENT_H
15 #define _LINUX_PERF_EVENT_H
16
17 #include <uapi/linux/perf_event.h>
18 #include <uapi/linux/bpf_perf_event.h>
19
20 /*
21 * Kernel-internal data types and definitions:
22 */
23
24 #ifdef CONFIG_PERF_EVENTS
25 # include <asm/perf_event.h>
26 # include <asm/local64.h>
27 #endif
28
29 #ifdef CONFIG_HAVE_HW_BREAKPOINT
30 # include <linux/rhashtable-types.h>
31 # include <asm/hw_breakpoint.h>
32 #endif
33
34 #include <linux/list.h>
35 #include <linux/mutex.h>
36 #include <linux/rculist.h>
37 #include <linux/rcupdate.h>
38 #include <linux/spinlock.h>
39 #include <linux/hrtimer.h>
40 #include <linux/fs.h>
41 #include <linux/pid_namespace.h>
42 #include <linux/workqueue.h>
43 #include <linux/ftrace.h>
44 #include <linux/cpu.h>
45 #include <linux/irq_work.h>
46 #include <linux/static_key.h>
47 #include <linux/jump_label_ratelimit.h>
48 #include <linux/atomic.h>
49 #include <linux/sysfs.h>
50 #include <linux/perf_regs.h>
51 #include <linux/cgroup.h>
52 #include <linux/refcount.h>
53 #include <linux/security.h>
54 #include <linux/static_call.h>
55 #include <linux/lockdep.h>
56
57 #include <asm/local.h>
58
59 struct perf_callchain_entry {
60 u64 nr;
61 u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */
62 };
63
64 struct perf_callchain_entry_ctx {
65 struct perf_callchain_entry *entry;
66 u32 max_stack;
67 u32 nr;
68 short contexts;
69 bool contexts_maxed;
70 };
71
72 typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
73 unsigned long off, unsigned long len);
74
75 struct perf_raw_frag {
76 union {
77 struct perf_raw_frag *next;
78 unsigned long pad;
79 };
80 perf_copy_f copy;
81 void *data;
82 u32 size;
83 } __packed;
84
85 struct perf_raw_record {
86 struct perf_raw_frag frag;
87 u32 size;
88 };
89
perf_raw_frag_last(const struct perf_raw_frag * frag)90 static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
91 {
92 return frag->pad < sizeof(u64);
93 }
94
95 /*
96 * branch stack layout:
97 * nr: number of taken branches stored in entries[]
98 * hw_idx: The low level index of raw branch records
99 * for the most recent branch.
100 * -1ULL means invalid/unknown.
101 *
102 * Note that nr can vary from sample to sample
103 * branches (to, from) are stored from most recent
104 * to least recent, i.e., entries[0] contains the most
105 * recent branch.
106 * The entries[] is an abstraction of raw branch records,
107 * which may not be stored in age order in HW, e.g. Intel LBR.
108 * The hw_idx is to expose the low level index of raw
109 * branch record for the most recent branch aka entries[0].
110 * The hw_idx index is between -1 (unknown) and max depth,
111 * which can be retrieved in /sys/devices/cpu/caps/branches.
112 * For the architectures whose raw branch records are
113 * already stored in age order, the hw_idx should be 0.
114 */
115 struct perf_branch_stack {
116 u64 nr;
117 u64 hw_idx;
118 struct perf_branch_entry entries[];
119 };
120
121 struct task_struct;
122
123 /*
124 * extra PMU register associated with an event
125 */
126 struct hw_perf_event_extra {
127 u64 config; /* register value */
128 unsigned int reg; /* register address or index */
129 int alloc; /* extra register already allocated */
130 int idx; /* index in shared_regs->regs[] */
131 };
132
133 /**
134 * hw_perf_event::flag values
135 *
136 * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
137 * usage.
138 */
139 #define PERF_EVENT_FLAG_ARCH 0x0fffffff
140 #define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
141
142 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
143
144 /**
145 * struct hw_perf_event - performance event hardware details:
146 */
147 struct hw_perf_event {
148 #ifdef CONFIG_PERF_EVENTS
149 union {
150 struct { /* hardware */
151 u64 config;
152 u64 config1;
153 u64 last_tag;
154 u64 dyn_constraint;
155 unsigned long config_base;
156 unsigned long event_base;
157 int event_base_rdpmc;
158 int idx;
159 int last_cpu;
160 int flags;
161
162 struct hw_perf_event_extra extra_reg;
163 struct hw_perf_event_extra branch_reg;
164 };
165 struct { /* aux / Intel-PT */
166 u64 aux_config;
167 /*
168 * For AUX area events, aux_paused cannot be a state
169 * flag because it can be updated asynchronously to
170 * state.
171 */
172 unsigned int aux_paused;
173 };
174 struct { /* software */
175 struct hrtimer hrtimer;
176 };
177 struct { /* tracepoint */
178 /* for tp_event->class */
179 struct list_head tp_list;
180 };
181 struct { /* amd_power */
182 u64 pwr_acc;
183 u64 ptsc;
184 };
185 #ifdef CONFIG_HAVE_HW_BREAKPOINT
186 struct { /* breakpoint */
187 /*
188 * Crufty hack to avoid the chicken and egg
189 * problem hw_breakpoint has with context
190 * creation and event initalization.
191 */
192 struct arch_hw_breakpoint info;
193 struct rhlist_head bp_list;
194 };
195 #endif
196 struct { /* amd_iommu */
197 u8 iommu_bank;
198 u8 iommu_cntr;
199 u16 padding;
200 u64 conf;
201 u64 conf1;
202 };
203 };
204 /*
205 * If the event is a per task event, this will point to the task in
206 * question. See the comment in perf_event_alloc().
207 */
208 struct task_struct *target;
209
210 /*
211 * PMU would store hardware filter configuration
212 * here.
213 */
214 void *addr_filters;
215
216 /* Last sync'ed generation of filters */
217 unsigned long addr_filters_gen;
218
219 /*
220 * hw_perf_event::state flags; used to track the PERF_EF_* state.
221 */
222
223 /* the counter is stopped */
224 #define PERF_HES_STOPPED 0x01
225
226 /* event->count up-to-date */
227 #define PERF_HES_UPTODATE 0x02
228
229 #define PERF_HES_ARCH 0x04
230
231 int state;
232
233 /*
234 * The last observed hardware counter value, updated with a
235 * local64_cmpxchg() such that pmu::read() can be called nested.
236 */
237 local64_t prev_count;
238
239 /*
240 * The period to start the next sample with.
241 */
242 u64 sample_period;
243
244 union {
245 struct { /* Sampling */
246 /*
247 * The period we started this sample with.
248 */
249 u64 last_period;
250
251 /*
252 * However much is left of the current period;
253 * note that this is a full 64bit value and
254 * allows for generation of periods longer
255 * than hardware might allow.
256 */
257 local64_t period_left;
258 };
259 struct { /* Topdown events counting for context switch */
260 u64 saved_metric;
261 u64 saved_slots;
262 };
263 };
264
265 /*
266 * State for throttling the event, see __perf_event_overflow() and
267 * perf_adjust_freq_unthr_context().
268 */
269 u64 interrupts_seq;
270 u64 interrupts;
271
272 /*
273 * State for freq target events, see __perf_event_overflow() and
274 * perf_adjust_freq_unthr_context().
275 */
276 u64 freq_time_stamp;
277 u64 freq_count_stamp;
278 #endif /* CONFIG_PERF_EVENTS */
279 };
280
281 struct perf_event;
282 struct perf_event_pmu_context;
283
284 /*
285 * Common implementation detail of pmu::{start,commit,cancel}_txn
286 */
287
288 /* txn to add/schedule event on PMU */
289 #define PERF_PMU_TXN_ADD 0x1
290
291 /* txn to read event group from PMU */
292 #define PERF_PMU_TXN_READ 0x2
293
294 /**
295 * pmu::capabilities flags
296 */
297 #define PERF_PMU_CAP_NO_INTERRUPT 0x0001
298 #define PERF_PMU_CAP_NO_NMI 0x0002
299 #define PERF_PMU_CAP_AUX_NO_SG 0x0004
300 #define PERF_PMU_CAP_EXTENDED_REGS 0x0008
301 #define PERF_PMU_CAP_EXCLUSIVE 0x0010
302 #define PERF_PMU_CAP_ITRACE 0x0020
303 #define PERF_PMU_CAP_NO_EXCLUDE 0x0040
304 #define PERF_PMU_CAP_AUX_OUTPUT 0x0080
305 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
306 #define PERF_PMU_CAP_AUX_PAUSE 0x0200
307 #define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
308
309 /**
310 * pmu::scope
311 */
312 enum perf_pmu_scope {
313 PERF_PMU_SCOPE_NONE = 0,
314 PERF_PMU_SCOPE_CORE,
315 PERF_PMU_SCOPE_DIE,
316 PERF_PMU_SCOPE_CLUSTER,
317 PERF_PMU_SCOPE_PKG,
318 PERF_PMU_SCOPE_SYS_WIDE,
319 PERF_PMU_MAX_SCOPE,
320 };
321
322 struct perf_output_handle;
323
324 #define PMU_NULL_DEV ((void *)(~0UL))
325
326 /**
327 * struct pmu - generic performance monitoring unit
328 */
329 struct pmu {
330 struct list_head entry;
331
332 spinlock_t events_lock;
333 struct list_head events;
334
335 struct module *module;
336 struct device *dev;
337 struct device *parent;
338 const struct attribute_group **attr_groups;
339 const struct attribute_group **attr_update;
340 const char *name;
341 int type;
342
343 /*
344 * various common per-pmu feature flags
345 */
346 int capabilities;
347
348 /*
349 * PMU scope
350 */
351 unsigned int scope;
352
353 struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
354 atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
355 int task_ctx_nr;
356 int hrtimer_interval_ms;
357
358 /* number of address filters this PMU can do */
359 unsigned int nr_addr_filters;
360
361 /*
362 * Fully disable/enable this PMU, can be used to protect from the PMI
363 * as well as for lazy/batch writing of the MSRs.
364 */
365 void (*pmu_enable) (struct pmu *pmu); /* optional */
366 void (*pmu_disable) (struct pmu *pmu); /* optional */
367
368 /*
369 * Try and initialize the event for this PMU.
370 *
371 * Returns:
372 * -ENOENT -- @event is not for this PMU
373 *
374 * -ENODEV -- @event is for this PMU but PMU not present
375 * -EBUSY -- @event is for this PMU but PMU temporarily unavailable
376 * -EINVAL -- @event is for this PMU but @event is not valid
377 * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
378 * -EACCES -- @event is for this PMU, @event is valid, but no privileges
379 *
380 * 0 -- @event is for this PMU and valid
381 *
382 * Other error return values are allowed.
383 */
384 int (*event_init) (struct perf_event *event);
385
386 /*
387 * Notification that the event was mapped or unmapped. Called
388 * in the context of the mapping task.
389 */
390 void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */
391 void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */
392
393 /*
394 * Flags for ->add()/->del()/ ->start()/->stop(). There are
395 * matching hw_perf_event::state flags.
396 */
397
398 /* start the counter when adding */
399 #define PERF_EF_START 0x01
400
401 /* reload the counter when starting */
402 #define PERF_EF_RELOAD 0x02
403
404 /* update the counter when stopping */
405 #define PERF_EF_UPDATE 0x04
406
407 /* AUX area event, pause tracing */
408 #define PERF_EF_PAUSE 0x08
409
410 /* AUX area event, resume tracing */
411 #define PERF_EF_RESUME 0x10
412
413 /*
414 * Adds/Removes a counter to/from the PMU, can be done inside a
415 * transaction, see the ->*_txn() methods.
416 *
417 * The add/del callbacks will reserve all hardware resources required
418 * to service the event, this includes any counter constraint
419 * scheduling etc.
420 *
421 * Called with IRQs disabled and the PMU disabled on the CPU the event
422 * is on.
423 *
424 * ->add() called without PERF_EF_START should result in the same state
425 * as ->add() followed by ->stop().
426 *
427 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
428 * ->stop() that must deal with already being stopped without
429 * PERF_EF_UPDATE.
430 */
431 int (*add) (struct perf_event *event, int flags);
432 void (*del) (struct perf_event *event, int flags);
433
434 /*
435 * Starts/Stops a counter present on the PMU.
436 *
437 * The PMI handler should stop the counter when perf_event_overflow()
438 * returns !0. ->start() will be used to continue.
439 *
440 * Also used to change the sample period.
441 *
442 * Called with IRQs disabled and the PMU disabled on the CPU the event
443 * is on -- will be called from NMI context with the PMU generates
444 * NMIs.
445 *
446 * ->stop() with PERF_EF_UPDATE will read the counter and update
447 * period/count values like ->read() would.
448 *
449 * ->start() with PERF_EF_RELOAD will reprogram the counter
450 * value, must be preceded by a ->stop() with PERF_EF_UPDATE.
451 *
452 * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
453 * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
454 * PERF_EF_RESUME.
455 *
456 * ->start() with PERF_EF_RESUME will start as simply as possible but
457 * only if the counter is not otherwise stopped. Will not overlap
458 * another ->start() with PERF_EF_RESUME nor ->stop() with
459 * PERF_EF_PAUSE.
460 *
461 * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
462 * ->stop()/->start() invocations, just not itself.
463 */
464 void (*start) (struct perf_event *event, int flags);
465 void (*stop) (struct perf_event *event, int flags);
466
467 /*
468 * Updates the counter value of the event.
469 *
470 * For sampling capable PMUs this will also update the software period
471 * hw_perf_event::period_left field.
472 */
473 void (*read) (struct perf_event *event);
474
475 /*
476 * Group events scheduling is treated as a transaction, add
477 * group events as a whole and perform one schedulability test.
478 * If the test fails, roll back the whole group
479 *
480 * Start the transaction, after this ->add() doesn't need to
481 * do schedulability tests.
482 *
483 * Optional.
484 */
485 void (*start_txn) (struct pmu *pmu, unsigned int txn_flags);
486 /*
487 * If ->start_txn() disabled the ->add() schedulability test
488 * then ->commit_txn() is required to perform one. On success
489 * the transaction is closed. On error the transaction is kept
490 * open until ->cancel_txn() is called.
491 *
492 * Optional.
493 */
494 int (*commit_txn) (struct pmu *pmu);
495 /*
496 * Will cancel the transaction, assumes ->del() is called
497 * for each successful ->add() during the transaction.
498 *
499 * Optional.
500 */
501 void (*cancel_txn) (struct pmu *pmu);
502
503 /*
504 * Will return the value for perf_event_mmap_page::index for this event,
505 * if no implementation is provided it will default to 0 (see
506 * perf_event_idx_default).
507 */
508 int (*event_idx) (struct perf_event *event); /*optional */
509
510 /*
511 * context-switches callback
512 */
513 void (*sched_task) (struct perf_event_pmu_context *pmu_ctx,
514 struct task_struct *task, bool sched_in);
515
516 /*
517 * Kmem cache of PMU specific data
518 */
519 struct kmem_cache *task_ctx_cache;
520
521 /*
522 * Set up pmu-private data structures for an AUX area
523 */
524 void *(*setup_aux) (struct perf_event *event, void **pages,
525 int nr_pages, bool overwrite);
526 /* optional */
527
528 /*
529 * Free pmu-private AUX data structures
530 */
531 void (*free_aux) (void *aux); /* optional */
532
533 /*
534 * Take a snapshot of the AUX buffer without touching the event
535 * state, so that preempting ->start()/->stop() callbacks does
536 * not interfere with their logic. Called in PMI context.
537 *
538 * Returns the size of AUX data copied to the output handle.
539 *
540 * Optional.
541 */
542 long (*snapshot_aux) (struct perf_event *event,
543 struct perf_output_handle *handle,
544 unsigned long size);
545
546 /*
547 * Validate address range filters: make sure the HW supports the
548 * requested configuration and number of filters; return 0 if the
549 * supplied filters are valid, -errno otherwise.
550 *
551 * Runs in the context of the ioctl()ing process and is not serialized
552 * with the rest of the PMU callbacks.
553 */
554 int (*addr_filters_validate) (struct list_head *filters);
555 /* optional */
556
557 /*
558 * Synchronize address range filter configuration:
559 * translate hw-agnostic filters into hardware configuration in
560 * event::hw::addr_filters.
561 *
562 * Runs as a part of filter sync sequence that is done in ->start()
563 * callback by calling perf_event_addr_filters_sync().
564 *
565 * May (and should) traverse event::addr_filters::list, for which its
566 * caller provides necessary serialization.
567 */
568 void (*addr_filters_sync) (struct perf_event *event);
569 /* optional */
570
571 /*
572 * Check if event can be used for aux_output purposes for
573 * events of this PMU.
574 *
575 * Runs from perf_event_open(). Should return 0 for "no match"
576 * or non-zero for "match".
577 */
578 int (*aux_output_match) (struct perf_event *event);
579 /* optional */
580
581 /*
582 * Skip programming this PMU on the given CPU. Typically needed for
583 * big.LITTLE things.
584 */
585 bool (*filter) (struct pmu *pmu, int cpu); /* optional */
586
587 /*
588 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
589 */
590 int (*check_period) (struct perf_event *event, u64 value); /* optional */
591 };
592
593 enum perf_addr_filter_action_t {
594 PERF_ADDR_FILTER_ACTION_STOP = 0,
595 PERF_ADDR_FILTER_ACTION_START,
596 PERF_ADDR_FILTER_ACTION_FILTER,
597 };
598
599 /**
600 * struct perf_addr_filter - address range filter definition
601 * @entry: event's filter list linkage
602 * @path: object file's path for file-based filters
603 * @offset: filter range offset
604 * @size: filter range size (size==0 means single address trigger)
605 * @action: filter/start/stop
606 *
607 * This is a hardware-agnostic filter configuration as specified by the user.
608 */
609 struct perf_addr_filter {
610 struct list_head entry;
611 struct path path;
612 unsigned long offset;
613 unsigned long size;
614 enum perf_addr_filter_action_t action;
615 };
616
617 /**
618 * struct perf_addr_filters_head - container for address range filters
619 * @list: list of filters for this event
620 * @lock: spinlock that serializes accesses to the @list and event's
621 * (and its children's) filter generations.
622 * @nr_file_filters: number of file-based filters
623 *
624 * A child event will use parent's @list (and therefore @lock), so they are
625 * bundled together; see perf_event_addr_filters().
626 */
627 struct perf_addr_filters_head {
628 struct list_head list;
629 raw_spinlock_t lock;
630 unsigned int nr_file_filters;
631 };
632
633 struct perf_addr_filter_range {
634 unsigned long start;
635 unsigned long size;
636 };
637
638 /*
639 * The normal states are:
640 *
641 * ACTIVE --.
642 * ^ |
643 * | |
644 * sched_{in,out}() |
645 * | |
646 * v |
647 * ,---> INACTIVE --+ <-.
648 * | | |
649 * | {dis,en}able()
650 * sched_in() | |
651 * | OFF <--' --+
652 * | |
653 * `---> ERROR ------'
654 *
655 * That is:
656 *
657 * sched_in: INACTIVE -> {ACTIVE,ERROR}
658 * sched_out: ACTIVE -> INACTIVE
659 * disable: {ACTIVE,INACTIVE} -> OFF
660 * enable: {OFF,ERROR} -> INACTIVE
661 *
662 * Where {OFF,ERROR} are disabled states.
663 *
664 * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of
665 * defunct events:
666 *
667 * - EXIT means task that the even was assigned to died, but child events
668 * still live, and further children can still be created. But the event
669 * itself will never be active again. It can only transition to
670 * {REVOKED,DEAD};
671 *
672 * - REVOKED means the PMU the event was associated with is gone; all
673 * functionality is stopped but the event is still alive. Can only
674 * transition to DEAD;
675 *
676 * - DEAD event really is DYING tearing down state and freeing bits.
677 *
678 */
679 enum perf_event_state {
680 PERF_EVENT_STATE_DEAD = -5,
681 PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */
682 PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */
683 PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */
684 PERF_EVENT_STATE_OFF = -1,
685 PERF_EVENT_STATE_INACTIVE = 0,
686 PERF_EVENT_STATE_ACTIVE = 1,
687 };
688
689 struct file;
690 struct perf_sample_data;
691
692 typedef void (*perf_overflow_handler_t)(struct perf_event *,
693 struct perf_sample_data *,
694 struct pt_regs *regs);
695
696 /*
697 * Event capabilities. For event_caps and groups caps.
698 *
699 * PERF_EV_CAP_SOFTWARE: Is a software event.
700 * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
701 * from any CPU in the package where it is active.
702 * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
703 * cannot be a group leader. If an event with this flag is detached from the
704 * group it is scheduled out and moved into an unrecoverable ERROR state.
705 * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
706 * PMU scope where it is active.
707 */
708 #define PERF_EV_CAP_SOFTWARE BIT(0)
709 #define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
710 #define PERF_EV_CAP_SIBLING BIT(2)
711 #define PERF_EV_CAP_READ_SCOPE BIT(3)
712
713 #define SWEVENT_HLIST_BITS 8
714 #define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
715
716 struct swevent_hlist {
717 struct hlist_head heads[SWEVENT_HLIST_SIZE];
718 struct rcu_head rcu_head;
719 };
720
721 #define PERF_ATTACH_CONTEXT 0x0001
722 #define PERF_ATTACH_GROUP 0x0002
723 #define PERF_ATTACH_TASK 0x0004
724 #define PERF_ATTACH_TASK_DATA 0x0008
725 #define PERF_ATTACH_GLOBAL_DATA 0x0010
726 #define PERF_ATTACH_SCHED_CB 0x0020
727 #define PERF_ATTACH_CHILD 0x0040
728 #define PERF_ATTACH_EXCLUSIVE 0x0080
729 #define PERF_ATTACH_CALLCHAIN 0x0100
730 #define PERF_ATTACH_ITRACE 0x0200
731
732 struct bpf_prog;
733 struct perf_cgroup;
734 struct perf_buffer;
735
736 struct pmu_event_list {
737 raw_spinlock_t lock;
738 struct list_head list;
739 };
740
741 /*
742 * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
743 * as such iteration must hold either lock. However, since ctx->lock is an IRQ
744 * safe lock, and is only held by the CPU doing the modification, having IRQs
745 * disabled is sufficient since it will hold-off the IPIs.
746 */
747 #ifdef CONFIG_PROVE_LOCKING
748 # define lockdep_assert_event_ctx(event) \
749 WARN_ON_ONCE(__lockdep_enabled && \
750 (this_cpu_read(hardirqs_enabled) && \
751 lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
752 #else
753 # define lockdep_assert_event_ctx(event)
754 #endif
755
756 #define for_each_sibling_event(sibling, event) \
757 lockdep_assert_event_ctx(event); \
758 if ((event)->group_leader == (event)) \
759 list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
760
761 /**
762 * struct perf_event - performance event kernel representation:
763 */
764 struct perf_event {
765 #ifdef CONFIG_PERF_EVENTS
766 /*
767 * entry onto perf_event_context::event_list;
768 * modifications require ctx->lock
769 * RCU safe iterations.
770 */
771 struct list_head event_entry;
772
773 /*
774 * Locked for modification by both ctx->mutex and ctx->lock; holding
775 * either sufficies for read.
776 */
777 struct list_head sibling_list;
778 struct list_head active_list;
779 /*
780 * Node on the pinned or flexible tree located at the event context;
781 */
782 struct rb_node group_node;
783 u64 group_index;
784 /*
785 * We need storage to track the entries in perf_pmu_migrate_context; we
786 * cannot use the event_entry because of RCU and we want to keep the
787 * group in tact which avoids us using the other two entries.
788 */
789 struct list_head migrate_entry;
790
791 struct hlist_node hlist_entry;
792 struct list_head active_entry;
793 int nr_siblings;
794
795 /* Not serialized. Only written during event initialization. */
796 int event_caps;
797 /* The cumulative AND of all event_caps for events in this group. */
798 int group_caps;
799
800 unsigned int group_generation;
801 struct perf_event *group_leader;
802 /*
803 * event->pmu will always point to pmu in which this event belongs.
804 * Whereas event->pmu_ctx->pmu may point to other pmu when group of
805 * different pmu events is created.
806 */
807 struct pmu *pmu;
808 void *pmu_private;
809
810 enum perf_event_state state;
811 unsigned int attach_state;
812 local64_t count;
813 atomic64_t child_count;
814
815 /*
816 * These are the total time in nanoseconds that the event
817 * has been enabled (i.e. eligible to run, and the task has
818 * been scheduled in, if this is a per-task event)
819 * and running (scheduled onto the CPU), respectively.
820 */
821 u64 total_time_enabled;
822 u64 total_time_running;
823 u64 tstamp;
824
825 struct perf_event_attr attr;
826 u16 header_size;
827 u16 id_header_size;
828 u16 read_size;
829 struct hw_perf_event hw;
830
831 struct perf_event_context *ctx;
832 /*
833 * event->pmu_ctx points to perf_event_pmu_context in which the event
834 * is added. This pmu_ctx can be of other pmu for sw event when that
835 * sw event is part of a group which also contains non-sw events.
836 */
837 struct perf_event_pmu_context *pmu_ctx;
838 atomic_long_t refcount;
839
840 /*
841 * These accumulate total time (in nanoseconds) that children
842 * events have been enabled and running, respectively.
843 */
844 atomic64_t child_total_time_enabled;
845 atomic64_t child_total_time_running;
846
847 /*
848 * Protect attach/detach and child_list:
849 */
850 struct mutex child_mutex;
851 struct list_head child_list;
852 struct perf_event *parent;
853
854 int oncpu;
855 int cpu;
856
857 struct list_head owner_entry;
858 struct task_struct *owner;
859
860 /* mmap bits */
861 struct mutex mmap_mutex;
862 atomic_t mmap_count;
863
864 struct perf_buffer *rb;
865 struct list_head rb_entry;
866 unsigned long rcu_batches;
867 int rcu_pending;
868
869 /* poll related */
870 wait_queue_head_t waitq;
871 struct fasync_struct *fasync;
872
873 /* delayed work for NMIs and such */
874 unsigned int pending_wakeup;
875 unsigned int pending_kill;
876 unsigned int pending_disable;
877 unsigned long pending_addr; /* SIGTRAP */
878 struct irq_work pending_irq;
879 struct irq_work pending_disable_irq;
880 struct callback_head pending_task;
881 unsigned int pending_work;
882
883 atomic_t event_limit;
884
885 /* address range filters */
886 struct perf_addr_filters_head addr_filters;
887 /* vma address array for file-based filders */
888 struct perf_addr_filter_range *addr_filter_ranges;
889 unsigned long addr_filters_gen;
890
891 /* for aux_output events */
892 struct perf_event *aux_event;
893
894 void (*destroy)(struct perf_event *);
895 struct rcu_head rcu_head;
896
897 struct pid_namespace *ns;
898 u64 id;
899
900 atomic64_t lost_samples;
901
902 u64 (*clock)(void);
903 perf_overflow_handler_t overflow_handler;
904 void *overflow_handler_context;
905 struct bpf_prog *prog;
906 u64 bpf_cookie;
907
908 #ifdef CONFIG_EVENT_TRACING
909 struct trace_event_call *tp_event;
910 struct event_filter *filter;
911 # ifdef CONFIG_FUNCTION_TRACER
912 struct ftrace_ops ftrace_ops;
913 # endif
914 #endif
915
916 #ifdef CONFIG_CGROUP_PERF
917 struct perf_cgroup *cgrp; /* cgroup event is attach to */
918 #endif
919
920 #ifdef CONFIG_SECURITY
921 void *security;
922 #endif
923 struct list_head sb_list;
924 struct list_head pmu_list;
925
926 /*
927 * Certain events gets forwarded to another pmu internally by over-
928 * writing kernel copy of event->attr.type without user being aware
929 * of it. event->orig_type contains original 'type' requested by
930 * user.
931 */
932 u32 orig_type;
933 #endif /* CONFIG_PERF_EVENTS */
934 };
935
936 /*
937 * ,-----------------------[1:n]------------------------.
938 * V V
939 * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
940 * | |
941 * `--[n:1]-> pmu <-[1:n]--'
942 *
943 *
944 * struct perf_event_pmu_context lifetime is refcount based and RCU freed
945 * (similar to perf_event_context). Locking is as if it were a member of
946 * perf_event_context; specifically:
947 *
948 * modification, both: ctx->mutex && ctx->lock
949 * reading, either: ctx->mutex || ctx->lock
950 *
951 * There is one exception to this; namely put_pmu_ctx() isn't always called
952 * with ctx->mutex held; this means that as long as we can guarantee the epc
953 * has events the above rules hold.
954 *
955 * Specificially, sys_perf_event_open()'s group_leader case depends on
956 * ctx->mutex pinning the configuration. Since we hold a reference on
957 * group_leader (through the filedesc) it can't go away, therefore it's
958 * associated pmu_ctx must exist and cannot change due to ctx->mutex.
959 *
960 * perf_event holds a refcount on perf_event_context
961 * perf_event holds a refcount on perf_event_pmu_context
962 */
963 struct perf_event_pmu_context {
964 struct pmu *pmu;
965 struct perf_event_context *ctx;
966
967 struct list_head pmu_ctx_entry;
968
969 struct list_head pinned_active;
970 struct list_head flexible_active;
971
972 /* Used to identify the per-cpu perf_event_pmu_context */
973 unsigned int embedded : 1;
974
975 unsigned int nr_events;
976 unsigned int nr_cgroups;
977 unsigned int nr_freq;
978
979 atomic_t refcount; /* event <-> epc */
980 struct rcu_head rcu_head;
981
982 /*
983 * Set when one or more (plausibly active) event can't be scheduled
984 * due to pmu overcommit or pmu constraints, except tolerant to
985 * events not necessary to be active due to scheduling constraints,
986 * such as cgroups.
987 */
988 int rotate_necessary;
989 };
990
perf_pmu_ctx_is_active(struct perf_event_pmu_context * epc)991 static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
992 {
993 return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
994 }
995
996 struct perf_event_groups {
997 struct rb_root tree;
998 u64 index;
999 };
1000
1001
1002 /**
1003 * struct perf_event_context - event context structure
1004 *
1005 * Used as a container for task events and CPU events as well:
1006 */
1007 struct perf_event_context {
1008 /*
1009 * Protect the states of the events in the list,
1010 * nr_active, and the list:
1011 */
1012 raw_spinlock_t lock;
1013 /*
1014 * Protect the list of events. Locking either mutex or lock
1015 * is sufficient to ensure the list doesn't change; to change
1016 * the list you need to lock both the mutex and the spinlock.
1017 */
1018 struct mutex mutex;
1019
1020 struct list_head pmu_ctx_list;
1021 struct perf_event_groups pinned_groups;
1022 struct perf_event_groups flexible_groups;
1023 struct list_head event_list;
1024
1025 int nr_events;
1026 int nr_user;
1027 int is_active;
1028
1029 int nr_stat;
1030 int nr_freq;
1031 int rotate_disable;
1032
1033 refcount_t refcount; /* event <-> ctx */
1034 struct task_struct *task;
1035
1036 /*
1037 * Context clock, runs when context enabled.
1038 */
1039 u64 time;
1040 u64 timestamp;
1041 u64 timeoffset;
1042
1043 /*
1044 * These fields let us detect when two contexts have both
1045 * been cloned (inherited) from a common ancestor.
1046 */
1047 struct perf_event_context *parent_ctx;
1048 u64 parent_gen;
1049 u64 generation;
1050 int pin_count;
1051 #ifdef CONFIG_CGROUP_PERF
1052 int nr_cgroups; /* cgroup evts */
1053 #endif
1054 struct rcu_head rcu_head;
1055
1056 /*
1057 * The count of events for which using the switch-out fast path
1058 * should be avoided.
1059 *
1060 * Sum (event->pending_work + events with
1061 * (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
1062 *
1063 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
1064 * that until the signal is delivered.
1065 */
1066 local_t nr_no_switch_fast;
1067 };
1068
1069 /**
1070 * struct perf_ctx_data - PMU specific data for a task
1071 * @rcu_head: To avoid the race on free PMU specific data
1072 * @refcount: To track users
1073 * @global: To track system-wide users
1074 * @ctx_cache: Kmem cache of PMU specific data
1075 * @data: PMU specific data
1076 *
1077 * Currently, the struct is only used in Intel LBR call stack mode to
1078 * save/restore the call stack of a task on context switches.
1079 *
1080 * The rcu_head is used to prevent the race on free the data.
1081 * The data only be allocated when Intel LBR call stack mode is enabled.
1082 * The data will be freed when the mode is disabled.
1083 * The content of the data will only be accessed in context switch, which
1084 * should be protected by rcu_read_lock().
1085 *
1086 * Because of the alignment requirement of Intel Arch LBR, the Kmem cache
1087 * is used to allocate the PMU specific data. The ctx_cache is to track
1088 * the Kmem cache.
1089 *
1090 * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
1091 * When system-wide Intel LBR call stack mode is enabled, a buffer with
1092 * constant size will be allocated for each task.
1093 * Also, system memory consumption can further grow when the size of
1094 * struct perf_ctx_data enlarges.
1095 */
1096 struct perf_ctx_data {
1097 struct rcu_head rcu_head;
1098 refcount_t refcount;
1099 int global;
1100 struct kmem_cache *ctx_cache;
1101 void *data;
1102 };
1103
1104 struct perf_cpu_pmu_context {
1105 struct perf_event_pmu_context epc;
1106 struct perf_event_pmu_context *task_epc;
1107
1108 struct list_head sched_cb_entry;
1109 int sched_cb_usage;
1110
1111 int active_oncpu;
1112 int exclusive;
1113 int pmu_disable_count;
1114
1115 raw_spinlock_t hrtimer_lock;
1116 struct hrtimer hrtimer;
1117 ktime_t hrtimer_interval;
1118 unsigned int hrtimer_active;
1119 };
1120
1121 /**
1122 * struct perf_event_cpu_context - per cpu event context structure
1123 */
1124 struct perf_cpu_context {
1125 struct perf_event_context ctx;
1126 struct perf_event_context *task_ctx;
1127 int online;
1128
1129 #ifdef CONFIG_CGROUP_PERF
1130 struct perf_cgroup *cgrp;
1131 #endif
1132
1133 /*
1134 * Per-CPU storage for iterators used in visit_groups_merge. The default
1135 * storage is of size 2 to hold the CPU and any CPU event iterators.
1136 */
1137 int heap_size;
1138 struct perf_event **heap;
1139 struct perf_event *heap_default[2];
1140 };
1141
1142 struct perf_output_handle {
1143 struct perf_event *event;
1144 struct perf_buffer *rb;
1145 unsigned long wakeup;
1146 unsigned long size;
1147 union {
1148 u64 flags; /* perf_output*() */
1149 u64 aux_flags; /* perf_aux_output*() */
1150 struct {
1151 u64 skip_read : 1;
1152 };
1153 };
1154 union {
1155 void *addr;
1156 unsigned long head;
1157 };
1158 int page;
1159 };
1160
1161 struct bpf_perf_event_data_kern {
1162 bpf_user_pt_regs_t *regs;
1163 struct perf_sample_data *data;
1164 struct perf_event *event;
1165 };
1166
1167 #ifdef CONFIG_CGROUP_PERF
1168
1169 /*
1170 * perf_cgroup_info keeps track of time_enabled for a cgroup.
1171 * This is a per-cpu dynamically allocated data structure.
1172 */
1173 struct perf_cgroup_info {
1174 u64 time;
1175 u64 timestamp;
1176 u64 timeoffset;
1177 int active;
1178 };
1179
1180 struct perf_cgroup {
1181 struct cgroup_subsys_state css;
1182 struct perf_cgroup_info __percpu *info;
1183 };
1184
1185 /*
1186 * Must ensure cgroup is pinned (css_get) before calling
1187 * this function. In other words, we cannot call this function
1188 * if there is no cgroup event for the current CPU context.
1189 */
1190 static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct * task,struct perf_event_context * ctx)1191 perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
1192 {
1193 return container_of(task_css_check(task, perf_event_cgrp_id,
1194 ctx ? lockdep_is_held(&ctx->lock)
1195 : true),
1196 struct perf_cgroup, css);
1197 }
1198 #endif /* CONFIG_CGROUP_PERF */
1199
1200 #ifdef CONFIG_PERF_EVENTS
1201
1202 extern struct perf_event_context *perf_cpu_task_ctx(void);
1203
1204 extern void *perf_aux_output_begin(struct perf_output_handle *handle,
1205 struct perf_event *event);
1206 extern void perf_aux_output_end(struct perf_output_handle *handle,
1207 unsigned long size);
1208 extern int perf_aux_output_skip(struct perf_output_handle *handle,
1209 unsigned long size);
1210 extern void *perf_get_aux(struct perf_output_handle *handle);
1211 extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
1212 extern void perf_event_itrace_started(struct perf_event *event);
1213
1214 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
1215 extern int perf_pmu_unregister(struct pmu *pmu);
1216
1217 extern void __perf_event_task_sched_in(struct task_struct *prev,
1218 struct task_struct *task);
1219 extern void __perf_event_task_sched_out(struct task_struct *prev,
1220 struct task_struct *next);
1221 extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
1222 extern void perf_event_exit_task(struct task_struct *child);
1223 extern void perf_event_free_task(struct task_struct *task);
1224 extern void perf_event_delayed_put(struct task_struct *task);
1225 extern struct file *perf_event_get(unsigned int fd);
1226 extern const struct perf_event *perf_get_event(struct file *file);
1227 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
1228 extern void perf_event_print_debug(void);
1229 extern void perf_pmu_disable(struct pmu *pmu);
1230 extern void perf_pmu_enable(struct pmu *pmu);
1231 extern void perf_sched_cb_dec(struct pmu *pmu);
1232 extern void perf_sched_cb_inc(struct pmu *pmu);
1233 extern int perf_event_task_disable(void);
1234 extern int perf_event_task_enable(void);
1235
1236 extern void perf_pmu_resched(struct pmu *pmu);
1237
1238 extern int perf_event_refresh(struct perf_event *event, int refresh);
1239 extern void perf_event_update_userpage(struct perf_event *event);
1240 extern int perf_event_release_kernel(struct perf_event *event);
1241
1242 extern struct perf_event *
1243 perf_event_create_kernel_counter(struct perf_event_attr *attr,
1244 int cpu,
1245 struct task_struct *task,
1246 perf_overflow_handler_t callback,
1247 void *context);
1248
1249 extern void perf_pmu_migrate_context(struct pmu *pmu,
1250 int src_cpu, int dst_cpu);
1251 extern int perf_event_read_local(struct perf_event *event, u64 *value,
1252 u64 *enabled, u64 *running);
1253 extern u64 perf_event_read_value(struct perf_event *event,
1254 u64 *enabled, u64 *running);
1255
1256 extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
1257
branch_sample_no_flags(const struct perf_event * event)1258 static inline bool branch_sample_no_flags(const struct perf_event *event)
1259 {
1260 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
1261 }
1262
branch_sample_no_cycles(const struct perf_event * event)1263 static inline bool branch_sample_no_cycles(const struct perf_event *event)
1264 {
1265 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
1266 }
1267
branch_sample_type(const struct perf_event * event)1268 static inline bool branch_sample_type(const struct perf_event *event)
1269 {
1270 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
1271 }
1272
branch_sample_hw_index(const struct perf_event * event)1273 static inline bool branch_sample_hw_index(const struct perf_event *event)
1274 {
1275 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
1276 }
1277
branch_sample_priv(const struct perf_event * event)1278 static inline bool branch_sample_priv(const struct perf_event *event)
1279 {
1280 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
1281 }
1282
branch_sample_counters(const struct perf_event * event)1283 static inline bool branch_sample_counters(const struct perf_event *event)
1284 {
1285 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
1286 }
1287
branch_sample_call_stack(const struct perf_event * event)1288 static inline bool branch_sample_call_stack(const struct perf_event *event)
1289 {
1290 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
1291 }
1292
1293 struct perf_sample_data {
1294 /*
1295 * Fields set by perf_sample_data_init() unconditionally,
1296 * group so as to minimize the cachelines touched.
1297 */
1298 u64 sample_flags;
1299 u64 period;
1300 u64 dyn_size;
1301
1302 /*
1303 * Fields commonly set by __perf_event_header__init_id(),
1304 * group so as to minimize the cachelines touched.
1305 */
1306 u64 type;
1307 struct {
1308 u32 pid;
1309 u32 tid;
1310 } tid_entry;
1311 u64 time;
1312 u64 id;
1313 struct {
1314 u32 cpu;
1315 u32 reserved;
1316 } cpu_entry;
1317
1318 /*
1319 * The other fields, optionally {set,used} by
1320 * perf_{prepare,output}_sample().
1321 */
1322 u64 ip;
1323 struct perf_callchain_entry *callchain;
1324 struct perf_raw_record *raw;
1325 struct perf_branch_stack *br_stack;
1326 u64 *br_stack_cntr;
1327 union perf_sample_weight weight;
1328 union perf_mem_data_src data_src;
1329 u64 txn;
1330
1331 struct perf_regs regs_user;
1332 struct perf_regs regs_intr;
1333 u64 stack_user_size;
1334
1335 u64 stream_id;
1336 u64 cgroup;
1337 u64 addr;
1338 u64 phys_addr;
1339 u64 data_page_size;
1340 u64 code_page_size;
1341 u64 aux_size;
1342 } ____cacheline_aligned;
1343
1344 /* default value for data source */
1345 #define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
1346 PERF_MEM_S(LVL, NA) |\
1347 PERF_MEM_S(SNOOP, NA) |\
1348 PERF_MEM_S(LOCK, NA) |\
1349 PERF_MEM_S(TLB, NA) |\
1350 PERF_MEM_S(LVLNUM, NA))
1351
perf_sample_data_init(struct perf_sample_data * data,u64 addr,u64 period)1352 static inline void perf_sample_data_init(struct perf_sample_data *data,
1353 u64 addr, u64 period)
1354 {
1355 /* remaining struct members initialized in perf_prepare_sample() */
1356 data->sample_flags = PERF_SAMPLE_PERIOD;
1357 data->period = period;
1358 data->dyn_size = 0;
1359
1360 if (addr) {
1361 data->addr = addr;
1362 data->sample_flags |= PERF_SAMPLE_ADDR;
1363 }
1364 }
1365
perf_sample_save_callchain(struct perf_sample_data * data,struct perf_event * event,struct pt_regs * regs)1366 static inline void perf_sample_save_callchain(struct perf_sample_data *data,
1367 struct perf_event *event,
1368 struct pt_regs *regs)
1369 {
1370 int size = 1;
1371
1372 if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
1373 return;
1374 if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
1375 return;
1376
1377 data->callchain = perf_callchain(event, regs);
1378 size += data->callchain->nr;
1379
1380 data->dyn_size += size * sizeof(u64);
1381 data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
1382 }
1383
perf_sample_save_raw_data(struct perf_sample_data * data,struct perf_event * event,struct perf_raw_record * raw)1384 static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
1385 struct perf_event *event,
1386 struct perf_raw_record *raw)
1387 {
1388 struct perf_raw_frag *frag = &raw->frag;
1389 u32 sum = 0;
1390 int size;
1391
1392 if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
1393 return;
1394 if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
1395 return;
1396
1397 do {
1398 sum += frag->size;
1399 if (perf_raw_frag_last(frag))
1400 break;
1401 frag = frag->next;
1402 } while (1);
1403
1404 size = round_up(sum + sizeof(u32), sizeof(u64));
1405 raw->size = size - sizeof(u32);
1406 frag->pad = raw->size - sum;
1407
1408 data->raw = raw;
1409 data->dyn_size += size;
1410 data->sample_flags |= PERF_SAMPLE_RAW;
1411 }
1412
has_branch_stack(struct perf_event * event)1413 static inline bool has_branch_stack(struct perf_event *event)
1414 {
1415 return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1416 }
1417
perf_sample_save_brstack(struct perf_sample_data * data,struct perf_event * event,struct perf_branch_stack * brs,u64 * brs_cntr)1418 static inline void perf_sample_save_brstack(struct perf_sample_data *data,
1419 struct perf_event *event,
1420 struct perf_branch_stack *brs,
1421 u64 *brs_cntr)
1422 {
1423 int size = sizeof(u64); /* nr */
1424
1425 if (!has_branch_stack(event))
1426 return;
1427 if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
1428 return;
1429
1430 if (branch_sample_hw_index(event))
1431 size += sizeof(u64);
1432
1433 brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
1434
1435 size += brs->nr * sizeof(struct perf_branch_entry);
1436
1437 /*
1438 * The extension space for counters is appended after the
1439 * struct perf_branch_stack. It is used to store the occurrences
1440 * of events of each branch.
1441 */
1442 if (brs_cntr)
1443 size += brs->nr * sizeof(u64);
1444
1445 data->br_stack = brs;
1446 data->br_stack_cntr = brs_cntr;
1447 data->dyn_size += size;
1448 data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
1449 }
1450
perf_sample_data_size(struct perf_sample_data * data,struct perf_event * event)1451 static inline u32 perf_sample_data_size(struct perf_sample_data *data,
1452 struct perf_event *event)
1453 {
1454 u32 size = sizeof(struct perf_event_header);
1455
1456 size += event->header_size + event->id_header_size;
1457 size += data->dyn_size;
1458
1459 return size;
1460 }
1461
1462 /*
1463 * Clear all bitfields in the perf_branch_entry.
1464 * The to and from fields are not cleared because they are
1465 * systematically modified by caller.
1466 */
perf_clear_branch_entry_bitfields(struct perf_branch_entry * br)1467 static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
1468 {
1469 br->mispred = 0;
1470 br->predicted = 0;
1471 br->in_tx = 0;
1472 br->abort = 0;
1473 br->cycles = 0;
1474 br->type = 0;
1475 br->spec = PERF_BR_SPEC_NA;
1476 br->reserved = 0;
1477 }
1478
1479 extern void perf_output_sample(struct perf_output_handle *handle,
1480 struct perf_event_header *header,
1481 struct perf_sample_data *data,
1482 struct perf_event *event);
1483 extern void perf_prepare_sample(struct perf_sample_data *data,
1484 struct perf_event *event,
1485 struct pt_regs *regs);
1486 extern void perf_prepare_header(struct perf_event_header *header,
1487 struct perf_sample_data *data,
1488 struct perf_event *event,
1489 struct pt_regs *regs);
1490
1491 extern int perf_event_overflow(struct perf_event *event,
1492 struct perf_sample_data *data,
1493 struct pt_regs *regs);
1494
1495 extern void perf_event_output_forward(struct perf_event *event,
1496 struct perf_sample_data *data,
1497 struct pt_regs *regs);
1498 extern void perf_event_output_backward(struct perf_event *event,
1499 struct perf_sample_data *data,
1500 struct pt_regs *regs);
1501 extern int perf_event_output(struct perf_event *event,
1502 struct perf_sample_data *data,
1503 struct pt_regs *regs);
1504
1505 static inline bool
is_default_overflow_handler(struct perf_event * event)1506 is_default_overflow_handler(struct perf_event *event)
1507 {
1508 perf_overflow_handler_t overflow_handler = event->overflow_handler;
1509
1510 if (likely(overflow_handler == perf_event_output_forward))
1511 return true;
1512 if (unlikely(overflow_handler == perf_event_output_backward))
1513 return true;
1514 return false;
1515 }
1516
1517 extern void
1518 perf_event_header__init_id(struct perf_event_header *header,
1519 struct perf_sample_data *data,
1520 struct perf_event *event);
1521 extern void
1522 perf_event__output_id_sample(struct perf_event *event,
1523 struct perf_output_handle *handle,
1524 struct perf_sample_data *sample);
1525
1526 extern void
1527 perf_log_lost_samples(struct perf_event *event, u64 lost);
1528
event_has_any_exclude_flag(struct perf_event * event)1529 static inline bool event_has_any_exclude_flag(struct perf_event *event)
1530 {
1531 struct perf_event_attr *attr = &event->attr;
1532
1533 return attr->exclude_idle || attr->exclude_user ||
1534 attr->exclude_kernel || attr->exclude_hv ||
1535 attr->exclude_guest || attr->exclude_host;
1536 }
1537
is_sampling_event(struct perf_event * event)1538 static inline bool is_sampling_event(struct perf_event *event)
1539 {
1540 return event->attr.sample_period != 0;
1541 }
1542
1543 /*
1544 * Return 1 for a software event, 0 for a hardware event
1545 */
is_software_event(struct perf_event * event)1546 static inline int is_software_event(struct perf_event *event)
1547 {
1548 return event->event_caps & PERF_EV_CAP_SOFTWARE;
1549 }
1550
1551 /*
1552 * Return 1 for event in sw context, 0 for event in hw context
1553 */
in_software_context(struct perf_event * event)1554 static inline int in_software_context(struct perf_event *event)
1555 {
1556 return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
1557 }
1558
is_exclusive_pmu(struct pmu * pmu)1559 static inline int is_exclusive_pmu(struct pmu *pmu)
1560 {
1561 return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
1562 }
1563
1564 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1565
1566 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
1567 extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1568
1569 #ifndef perf_arch_fetch_caller_regs
perf_arch_fetch_caller_regs(struct pt_regs * regs,unsigned long ip)1570 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
1571 #endif
1572
1573 /*
1574 * When generating a perf sample in-line, instead of from an interrupt /
1575 * exception, we lack a pt_regs. This is typically used from software events
1576 * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
1577 *
1578 * We typically don't need a full set, but (for x86) do require:
1579 * - ip for PERF_SAMPLE_IP
1580 * - cs for user_mode() tests
1581 * - sp for PERF_SAMPLE_CALLCHAIN
1582 * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
1583 *
1584 * NOTE: assumes @regs is otherwise already 0 filled; this is important for
1585 * things like PERF_SAMPLE_REGS_INTR.
1586 */
perf_fetch_caller_regs(struct pt_regs * regs)1587 static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1588 {
1589 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1590 }
1591
1592 static __always_inline void
perf_sw_event(u32 event_id,u64 nr,struct pt_regs * regs,u64 addr)1593 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1594 {
1595 if (static_key_false(&perf_swevent_enabled[event_id]))
1596 __perf_sw_event(event_id, nr, regs, addr);
1597 }
1598
1599 DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
1600
1601 /*
1602 * 'Special' version for the scheduler, it hard assumes no recursion,
1603 * which is guaranteed by us not actually scheduling inside other swevents
1604 * because those disable preemption.
1605 */
__perf_sw_event_sched(u32 event_id,u64 nr,u64 addr)1606 static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
1607 {
1608 struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
1609
1610 perf_fetch_caller_regs(regs);
1611 ___perf_sw_event(event_id, nr, regs, addr);
1612 }
1613
1614 extern struct static_key_false perf_sched_events;
1615
__perf_sw_enabled(int swevt)1616 static __always_inline bool __perf_sw_enabled(int swevt)
1617 {
1618 return static_key_false(&perf_swevent_enabled[swevt]);
1619 }
1620
perf_event_task_migrate(struct task_struct * task)1621 static inline void perf_event_task_migrate(struct task_struct *task)
1622 {
1623 if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
1624 task->sched_migrated = 1;
1625 }
1626
perf_event_task_sched_in(struct task_struct * prev,struct task_struct * task)1627 static inline void perf_event_task_sched_in(struct task_struct *prev,
1628 struct task_struct *task)
1629 {
1630 if (static_branch_unlikely(&perf_sched_events))
1631 __perf_event_task_sched_in(prev, task);
1632
1633 if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
1634 task->sched_migrated) {
1635 __perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
1636 task->sched_migrated = 0;
1637 }
1638 }
1639
perf_event_task_sched_out(struct task_struct * prev,struct task_struct * next)1640 static inline void perf_event_task_sched_out(struct task_struct *prev,
1641 struct task_struct *next)
1642 {
1643 if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
1644 __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
1645
1646 #ifdef CONFIG_CGROUP_PERF
1647 if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
1648 perf_cgroup_from_task(prev, NULL) !=
1649 perf_cgroup_from_task(next, NULL))
1650 __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
1651 #endif
1652
1653 if (static_branch_unlikely(&perf_sched_events))
1654 __perf_event_task_sched_out(prev, next);
1655 }
1656
1657 extern void perf_event_mmap(struct vm_area_struct *vma);
1658
1659 extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1660 bool unregister, const char *sym);
1661 extern void perf_event_bpf_event(struct bpf_prog *prog,
1662 enum perf_bpf_event_type type,
1663 u16 flags);
1664
1665 #define PERF_GUEST_ACTIVE 0x01
1666 #define PERF_GUEST_USER 0x02
1667
1668 struct perf_guest_info_callbacks {
1669 unsigned int (*state)(void);
1670 unsigned long (*get_ip)(void);
1671 unsigned int (*handle_intel_pt_intr)(void);
1672 };
1673
1674 #ifdef CONFIG_GUEST_PERF_EVENTS
1675
1676 extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
1677
1678 DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
1679 DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
1680 DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
1681
perf_guest_state(void)1682 static inline unsigned int perf_guest_state(void)
1683 {
1684 return static_call(__perf_guest_state)();
1685 }
1686
perf_guest_get_ip(void)1687 static inline unsigned long perf_guest_get_ip(void)
1688 {
1689 return static_call(__perf_guest_get_ip)();
1690 }
1691
perf_guest_handle_intel_pt_intr(void)1692 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
1693 {
1694 return static_call(__perf_guest_handle_intel_pt_intr)();
1695 }
1696
1697 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1698 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1699
1700 #else /* !CONFIG_GUEST_PERF_EVENTS: */
1701
perf_guest_state(void)1702 static inline unsigned int perf_guest_state(void) { return 0; }
perf_guest_get_ip(void)1703 static inline unsigned long perf_guest_get_ip(void) { return 0; }
perf_guest_handle_intel_pt_intr(void)1704 static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
1705
1706 #endif /* !CONFIG_GUEST_PERF_EVENTS */
1707
1708 extern void perf_event_exec(void);
1709 extern void perf_event_comm(struct task_struct *tsk, bool exec);
1710 extern void perf_event_namespaces(struct task_struct *tsk);
1711 extern void perf_event_fork(struct task_struct *tsk);
1712 extern void perf_event_text_poke(const void *addr,
1713 const void *old_bytes, size_t old_len,
1714 const void *new_bytes, size_t new_len);
1715
1716 /* Callchains */
1717 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1718
1719 extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1720 extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1721 extern struct perf_callchain_entry *
1722 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
1723 u32 max_stack, bool crosstask, bool add_mark);
1724 extern int get_callchain_buffers(int max_stack);
1725 extern void put_callchain_buffers(void);
1726 extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
1727 extern void put_callchain_entry(int rctx);
1728
1729 extern int sysctl_perf_event_max_stack;
1730 extern int sysctl_perf_event_max_contexts_per_stack;
1731
perf_callchain_store_context(struct perf_callchain_entry_ctx * ctx,u64 ip)1732 static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
1733 {
1734 if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
1735 struct perf_callchain_entry *entry = ctx->entry;
1736
1737 entry->ip[entry->nr++] = ip;
1738 ++ctx->contexts;
1739 return 0;
1740 } else {
1741 ctx->contexts_maxed = true;
1742 return -1; /* no more room, stop walking the stack */
1743 }
1744 }
1745
perf_callchain_store(struct perf_callchain_entry_ctx * ctx,u64 ip)1746 static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
1747 {
1748 if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
1749 struct perf_callchain_entry *entry = ctx->entry;
1750
1751 entry->ip[entry->nr++] = ip;
1752 ++ctx->nr;
1753 return 0;
1754 } else {
1755 return -1; /* no more room, stop walking the stack */
1756 }
1757 }
1758
1759 extern int sysctl_perf_event_paranoid;
1760 extern int sysctl_perf_event_sample_rate;
1761
1762 extern void perf_sample_event_took(u64 sample_len_ns);
1763
1764 /* Access to perf_event_open(2) syscall. */
1765 #define PERF_SECURITY_OPEN 0
1766
1767 /* Finer grained perf_event_open(2) access control. */
1768 #define PERF_SECURITY_CPU 1
1769 #define PERF_SECURITY_KERNEL 2
1770 #define PERF_SECURITY_TRACEPOINT 3
1771
perf_is_paranoid(void)1772 static inline int perf_is_paranoid(void)
1773 {
1774 return sysctl_perf_event_paranoid > -1;
1775 }
1776
1777 extern int perf_allow_kernel(void);
1778
perf_allow_cpu(void)1779 static inline int perf_allow_cpu(void)
1780 {
1781 if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
1782 return -EACCES;
1783
1784 return security_perf_event_open(PERF_SECURITY_CPU);
1785 }
1786
perf_allow_tracepoint(void)1787 static inline int perf_allow_tracepoint(void)
1788 {
1789 if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
1790 return -EPERM;
1791
1792 return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
1793 }
1794
1795 extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
1796
1797 extern void perf_event_init(void);
1798 extern void perf_tp_event(u16 event_type, u64 count, void *record,
1799 int entry_size, struct pt_regs *regs,
1800 struct hlist_head *head, int rctx,
1801 struct task_struct *task);
1802 extern void perf_bp_event(struct perf_event *event, void *data);
1803
1804 extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
1805 extern unsigned long perf_instruction_pointer(struct perf_event *event,
1806 struct pt_regs *regs);
1807
1808 #ifndef perf_arch_misc_flags
1809 # define perf_arch_misc_flags(regs) \
1810 (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
1811 # define perf_arch_instruction_pointer(regs) instruction_pointer(regs)
1812 #endif
1813 #ifndef perf_arch_bpf_user_pt_regs
1814 # define perf_arch_bpf_user_pt_regs(regs) regs
1815 #endif
1816
1817 #ifndef perf_arch_guest_misc_flags
perf_arch_guest_misc_flags(struct pt_regs * regs)1818 static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
1819 {
1820 unsigned long guest_state = perf_guest_state();
1821
1822 if (!(guest_state & PERF_GUEST_ACTIVE))
1823 return 0;
1824
1825 if (guest_state & PERF_GUEST_USER)
1826 return PERF_RECORD_MISC_GUEST_USER;
1827 else
1828 return PERF_RECORD_MISC_GUEST_KERNEL;
1829 }
1830 # define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
1831 #endif
1832
needs_branch_stack(struct perf_event * event)1833 static inline bool needs_branch_stack(struct perf_event *event)
1834 {
1835 return event->attr.branch_sample_type != 0;
1836 }
1837
has_aux(struct perf_event * event)1838 static inline bool has_aux(struct perf_event *event)
1839 {
1840 return event->pmu && event->pmu->setup_aux;
1841 }
1842
has_aux_action(struct perf_event * event)1843 static inline bool has_aux_action(struct perf_event *event)
1844 {
1845 return event->attr.aux_sample_size ||
1846 event->attr.aux_pause ||
1847 event->attr.aux_resume;
1848 }
1849
is_write_backward(struct perf_event * event)1850 static inline bool is_write_backward(struct perf_event *event)
1851 {
1852 return !!event->attr.write_backward;
1853 }
1854
has_addr_filter(struct perf_event * event)1855 static inline bool has_addr_filter(struct perf_event *event)
1856 {
1857 return event->pmu->nr_addr_filters;
1858 }
1859
1860 /*
1861 * An inherited event uses parent's filters
1862 */
1863 static inline struct perf_addr_filters_head *
perf_event_addr_filters(struct perf_event * event)1864 perf_event_addr_filters(struct perf_event *event)
1865 {
1866 struct perf_addr_filters_head *ifh = &event->addr_filters;
1867
1868 if (event->parent)
1869 ifh = &event->parent->addr_filters;
1870
1871 return ifh;
1872 }
1873
perf_event_fasync(struct perf_event * event)1874 static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
1875 {
1876 /* Only the parent has fasync state */
1877 if (event->parent)
1878 event = event->parent;
1879 return &event->fasync;
1880 }
1881
1882 extern void perf_event_addr_filters_sync(struct perf_event *event);
1883 extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
1884
1885 extern int perf_output_begin(struct perf_output_handle *handle,
1886 struct perf_sample_data *data,
1887 struct perf_event *event, unsigned int size);
1888 extern int perf_output_begin_forward(struct perf_output_handle *handle,
1889 struct perf_sample_data *data,
1890 struct perf_event *event,
1891 unsigned int size);
1892 extern int perf_output_begin_backward(struct perf_output_handle *handle,
1893 struct perf_sample_data *data,
1894 struct perf_event *event,
1895 unsigned int size);
1896
1897 extern void perf_output_end(struct perf_output_handle *handle);
1898 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
1899 const void *buf, unsigned int len);
1900 extern unsigned int perf_output_skip(struct perf_output_handle *handle,
1901 unsigned int len);
1902 extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
1903 struct perf_output_handle *handle,
1904 unsigned long from, unsigned long to);
1905 extern int perf_swevent_get_recursion_context(void);
1906 extern void perf_swevent_put_recursion_context(int rctx);
1907 extern u64 perf_swevent_set_period(struct perf_event *event);
1908 extern void perf_event_enable(struct perf_event *event);
1909 extern void perf_event_disable(struct perf_event *event);
1910 extern void perf_event_disable_local(struct perf_event *event);
1911 extern void perf_event_disable_inatomic(struct perf_event *event);
1912 extern void perf_event_task_tick(void);
1913 extern int perf_event_account_interrupt(struct perf_event *event);
1914 extern int perf_event_period(struct perf_event *event, u64 value);
1915 extern u64 perf_event_pause(struct perf_event *event, bool reset);
1916
1917 #else /* !CONFIG_PERF_EVENTS: */
1918
1919 static inline void *
perf_aux_output_begin(struct perf_output_handle * handle,struct perf_event * event)1920 perf_aux_output_begin(struct perf_output_handle *handle,
1921 struct perf_event *event) { return NULL; }
1922 static inline void
perf_aux_output_end(struct perf_output_handle * handle,unsigned long size)1923 perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
1924 { }
1925 static inline int
perf_aux_output_skip(struct perf_output_handle * handle,unsigned long size)1926 perf_aux_output_skip(struct perf_output_handle *handle,
1927 unsigned long size) { return -EINVAL; }
1928 static inline void *
perf_get_aux(struct perf_output_handle * handle)1929 perf_get_aux(struct perf_output_handle *handle) { return NULL; }
1930 static inline void
perf_event_task_migrate(struct task_struct * task)1931 perf_event_task_migrate(struct task_struct *task) { }
1932 static inline void
perf_event_task_sched_in(struct task_struct * prev,struct task_struct * task)1933 perf_event_task_sched_in(struct task_struct *prev,
1934 struct task_struct *task) { }
1935 static inline void
perf_event_task_sched_out(struct task_struct * prev,struct task_struct * next)1936 perf_event_task_sched_out(struct task_struct *prev,
1937 struct task_struct *next) { }
perf_event_init_task(struct task_struct * child,u64 clone_flags)1938 static inline int perf_event_init_task(struct task_struct *child,
1939 u64 clone_flags) { return 0; }
perf_event_exit_task(struct task_struct * child)1940 static inline void perf_event_exit_task(struct task_struct *child) { }
perf_event_free_task(struct task_struct * task)1941 static inline void perf_event_free_task(struct task_struct *task) { }
perf_event_delayed_put(struct task_struct * task)1942 static inline void perf_event_delayed_put(struct task_struct *task) { }
perf_event_get(unsigned int fd)1943 static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
perf_get_event(struct file * file)1944 static inline const struct perf_event *perf_get_event(struct file *file)
1945 {
1946 return ERR_PTR(-EINVAL);
1947 }
perf_event_attrs(struct perf_event * event)1948 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
1949 {
1950 return ERR_PTR(-EINVAL);
1951 }
perf_event_read_local(struct perf_event * event,u64 * value,u64 * enabled,u64 * running)1952 static inline int perf_event_read_local(struct perf_event *event, u64 *value,
1953 u64 *enabled, u64 *running)
1954 {
1955 return -EINVAL;
1956 }
perf_event_print_debug(void)1957 static inline void perf_event_print_debug(void) { }
perf_event_task_disable(void)1958 static inline int perf_event_task_disable(void) { return -EINVAL; }
perf_event_task_enable(void)1959 static inline int perf_event_task_enable(void) { return -EINVAL; }
perf_event_refresh(struct perf_event * event,int refresh)1960 static inline int perf_event_refresh(struct perf_event *event, int refresh)
1961 {
1962 return -EINVAL;
1963 }
1964
1965 static inline void
perf_sw_event(u32 event_id,u64 nr,struct pt_regs * regs,u64 addr)1966 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
1967 static inline void
perf_bp_event(struct perf_event * event,void * data)1968 perf_bp_event(struct perf_event *event, void *data) { }
1969
perf_event_mmap(struct vm_area_struct * vma)1970 static inline void perf_event_mmap(struct vm_area_struct *vma) { }
1971
1972 typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
perf_event_ksymbol(u16 ksym_type,u64 addr,u32 len,bool unregister,const char * sym)1973 static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1974 bool unregister, const char *sym) { }
perf_event_bpf_event(struct bpf_prog * prog,enum perf_bpf_event_type type,u16 flags)1975 static inline void perf_event_bpf_event(struct bpf_prog *prog,
1976 enum perf_bpf_event_type type,
1977 u16 flags) { }
perf_event_exec(void)1978 static inline void perf_event_exec(void) { }
perf_event_comm(struct task_struct * tsk,bool exec)1979 static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
perf_event_namespaces(struct task_struct * tsk)1980 static inline void perf_event_namespaces(struct task_struct *tsk) { }
perf_event_fork(struct task_struct * tsk)1981 static inline void perf_event_fork(struct task_struct *tsk) { }
perf_event_text_poke(const void * addr,const void * old_bytes,size_t old_len,const void * new_bytes,size_t new_len)1982 static inline void perf_event_text_poke(const void *addr,
1983 const void *old_bytes,
1984 size_t old_len,
1985 const void *new_bytes,
1986 size_t new_len) { }
perf_event_init(void)1987 static inline void perf_event_init(void) { }
perf_swevent_get_recursion_context(void)1988 static inline int perf_swevent_get_recursion_context(void) { return -1; }
perf_swevent_put_recursion_context(int rctx)1989 static inline void perf_swevent_put_recursion_context(int rctx) { }
perf_swevent_set_period(struct perf_event * event)1990 static inline u64 perf_swevent_set_period(struct perf_event *event) { return 0; }
perf_event_enable(struct perf_event * event)1991 static inline void perf_event_enable(struct perf_event *event) { }
perf_event_disable(struct perf_event * event)1992 static inline void perf_event_disable(struct perf_event *event) { }
__perf_event_disable(void * info)1993 static inline int __perf_event_disable(void *info) { return -1; }
perf_event_task_tick(void)1994 static inline void perf_event_task_tick(void) { }
perf_event_release_kernel(struct perf_event * event)1995 static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
1996 static inline int
perf_event_period(struct perf_event * event,u64 value)1997 perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; }
1998 static inline u64
perf_event_pause(struct perf_event * event,bool reset)1999 perf_event_pause(struct perf_event *event, bool reset) { return 0; }
2000 static inline int
perf_exclude_event(struct perf_event * event,struct pt_regs * regs)2001 perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; }
2002
2003 #endif /* !CONFIG_PERF_EVENTS */
2004
2005 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
2006 extern void perf_restore_debug_store(void);
2007 #else
perf_restore_debug_store(void)2008 static inline void perf_restore_debug_store(void) { }
2009 #endif
2010
2011 #define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
2012
2013 struct perf_pmu_events_attr {
2014 struct device_attribute attr;
2015 u64 id;
2016 const char *event_str;
2017 };
2018
2019 struct perf_pmu_events_ht_attr {
2020 struct device_attribute attr;
2021 u64 id;
2022 const char *event_str_ht;
2023 const char *event_str_noht;
2024 };
2025
2026 struct perf_pmu_events_hybrid_attr {
2027 struct device_attribute attr;
2028 u64 id;
2029 const char *event_str;
2030 u64 pmu_type;
2031 };
2032
2033 struct perf_pmu_format_hybrid_attr {
2034 struct device_attribute attr;
2035 u64 pmu_type;
2036 };
2037
2038 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
2039 char *page);
2040
2041 #define PMU_EVENT_ATTR(_name, _var, _id, _show) \
2042 static struct perf_pmu_events_attr _var = { \
2043 .attr = __ATTR(_name, 0444, _show, NULL), \
2044 .id = _id, \
2045 };
2046
2047 #define PMU_EVENT_ATTR_STRING(_name, _var, _str) \
2048 static struct perf_pmu_events_attr _var = { \
2049 .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
2050 .id = 0, \
2051 .event_str = _str, \
2052 };
2053
2054 #define PMU_EVENT_ATTR_ID(_name, _show, _id) \
2055 (&((struct perf_pmu_events_attr[]) { \
2056 { .attr = __ATTR(_name, 0444, _show, NULL), \
2057 .id = _id, } \
2058 })[0].attr.attr)
2059
2060 #define PMU_FORMAT_ATTR_SHOW(_name, _format) \
2061 static ssize_t \
2062 _name##_show(struct device *dev, \
2063 struct device_attribute *attr, \
2064 char *page) \
2065 { \
2066 BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
2067 return sprintf(page, _format "\n"); \
2068 } \
2069
2070 #define PMU_FORMAT_ATTR(_name, _format) \
2071 PMU_FORMAT_ATTR_SHOW(_name, _format) \
2072 \
2073 static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
2074
2075 /* Performance counter hotplug functions */
2076 #ifdef CONFIG_PERF_EVENTS
2077 extern int perf_event_init_cpu(unsigned int cpu);
2078 extern int perf_event_exit_cpu(unsigned int cpu);
2079 #else
2080 # define perf_event_init_cpu NULL
2081 # define perf_event_exit_cpu NULL
2082 #endif
2083
2084 extern void arch_perf_update_userpage(struct perf_event *event,
2085 struct perf_event_mmap_page *userpg,
2086 u64 now);
2087
2088 /*
2089 * Snapshot branch stack on software events.
2090 *
2091 * Branch stack can be very useful in understanding software events. For
2092 * example, when a long function, e.g. sys_perf_event_open, returns an
2093 * errno, it is not obvious why the function failed. Branch stack could
2094 * provide very helpful information in this type of scenarios.
2095 *
2096 * On software event, it is necessary to stop the hardware branch recorder
2097 * fast. Otherwise, the hardware register/buffer will be flushed with
2098 * entries of the triggering event. Therefore, static call is used to
2099 * stop the hardware recorder.
2100 */
2101
2102 /*
2103 * cnt is the number of entries allocated for entries.
2104 * Return number of entries copied to .
2105 */
2106 typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
2107 unsigned int cnt);
2108 DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
2109
2110 #ifndef PERF_NEEDS_LOPWR_CB
perf_lopwr_cb(bool mode)2111 static inline void perf_lopwr_cb(bool mode)
2112 {
2113 }
2114 #endif
2115
2116 #endif /* _LINUX_PERF_EVENT_H */
2117