xref: /linux/include/linux/perf_event.h (revision 17ef32ae66b1afc9fa6dbea40eb18a13edba9c31)
1 /*
2  * Performance events:
3  *
4  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5  *    Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
6  *    Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
7  *
8  * Data type definitions, declarations, prototypes.
9  *
10  *    Started by: Thomas Gleixner and Ingo Molnar
11  *
12  * For licencing details see kernel-base/COPYING
13  */
14 #ifndef _LINUX_PERF_EVENT_H
15 #define _LINUX_PERF_EVENT_H
16 
17 #include <uapi/linux/perf_event.h>
18 #include <uapi/linux/bpf_perf_event.h>
19 
20 /*
21  * Kernel-internal data types and definitions:
22  */
23 
24 #ifdef CONFIG_PERF_EVENTS
25 # include <asm/perf_event.h>
26 # include <asm/local64.h>
27 #endif
28 
29 #ifdef CONFIG_HAVE_HW_BREAKPOINT
30 # include <linux/rhashtable-types.h>
31 # include <asm/hw_breakpoint.h>
32 #endif
33 
34 #include <linux/list.h>
35 #include <linux/mutex.h>
36 #include <linux/rculist.h>
37 #include <linux/rcupdate.h>
38 #include <linux/spinlock.h>
39 #include <linux/hrtimer.h>
40 #include <linux/fs.h>
41 #include <linux/pid_namespace.h>
42 #include <linux/workqueue.h>
43 #include <linux/ftrace.h>
44 #include <linux/cpu.h>
45 #include <linux/irq_work.h>
46 #include <linux/static_key.h>
47 #include <linux/jump_label_ratelimit.h>
48 #include <linux/atomic.h>
49 #include <linux/sysfs.h>
50 #include <linux/perf_regs.h>
51 #include <linux/cgroup.h>
52 #include <linux/refcount.h>
53 #include <linux/security.h>
54 #include <linux/static_call.h>
55 #include <linux/lockdep.h>
56 
57 #include <asm/local.h>
58 
59 struct perf_callchain_entry {
60 	u64				nr;
61 	u64				ip[]; /* /proc/sys/kernel/perf_event_max_stack */
62 };
63 
64 struct perf_callchain_entry_ctx {
65 	struct perf_callchain_entry	*entry;
66 	u32				max_stack;
67 	u32				nr;
68 	short				contexts;
69 	bool				contexts_maxed;
70 };
71 
72 typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
73 				     unsigned long off, unsigned long len);
74 
75 struct perf_raw_frag {
76 	union {
77 		struct perf_raw_frag	*next;
78 		unsigned long		pad;
79 	};
80 	perf_copy_f			copy;
81 	void				*data;
82 	u32				size;
83 } __packed;
84 
85 struct perf_raw_record {
86 	struct perf_raw_frag		frag;
87 	u32				size;
88 };
89 
perf_raw_frag_last(const struct perf_raw_frag * frag)90 static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
91 {
92 	return frag->pad < sizeof(u64);
93 }
94 
95 /*
96  * branch stack layout:
97  *  nr: number of taken branches stored in entries[]
98  *  hw_idx: The low level index of raw branch records
99  *          for the most recent branch.
100  *          -1ULL means invalid/unknown.
101  *
102  * Note that nr can vary from sample to sample
103  * branches (to, from) are stored from most recent
104  * to least recent, i.e., entries[0] contains the most
105  * recent branch.
106  * The entries[] is an abstraction of raw branch records,
107  * which may not be stored in age order in HW, e.g. Intel LBR.
108  * The hw_idx is to expose the low level index of raw
109  * branch record for the most recent branch aka entries[0].
110  * The hw_idx index is between -1 (unknown) and max depth,
111  * which can be retrieved in /sys/devices/cpu/caps/branches.
112  * For the architectures whose raw branch records are
113  * already stored in age order, the hw_idx should be 0.
114  */
115 struct perf_branch_stack {
116 	u64				nr;
117 	u64				hw_idx;
118 	struct perf_branch_entry	entries[];
119 };
120 
121 struct task_struct;
122 
123 /*
124  * extra PMU register associated with an event
125  */
126 struct hw_perf_event_extra {
127 	u64				config;	/* register value */
128 	unsigned int			reg;	/* register address or index */
129 	int				alloc;	/* extra register already allocated */
130 	int				idx;	/* index in shared_regs->regs[] */
131 };
132 
133 /**
134  * hw_perf_event::flag values
135  *
136  * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
137  * usage.
138  */
139 #define PERF_EVENT_FLAG_ARCH		0x0fffffff
140 #define PERF_EVENT_FLAG_USER_READ_CNT	0x80000000
141 
142 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
143 
144 /**
145  * struct hw_perf_event - performance event hardware details:
146  */
147 struct hw_perf_event {
148 #ifdef CONFIG_PERF_EVENTS
149 	union {
150 		struct { /* hardware */
151 			u64		config;
152 			u64		config1;
153 			u64		last_tag;
154 			u64		dyn_constraint;
155 			unsigned long	config_base;
156 			unsigned long	event_base;
157 			int		event_base_rdpmc;
158 			int		idx;
159 			int		last_cpu;
160 			int		flags;
161 
162 			struct hw_perf_event_extra extra_reg;
163 			struct hw_perf_event_extra branch_reg;
164 		};
165 		struct { /* aux / Intel-PT */
166 			u64		aux_config;
167 			/*
168 			 * For AUX area events, aux_paused cannot be a state
169 			 * flag because it can be updated asynchronously to
170 			 * state.
171 			 */
172 			unsigned int	aux_paused;
173 		};
174 		struct { /* software */
175 			struct hrtimer	hrtimer;
176 		};
177 		struct { /* tracepoint */
178 			/* for tp_event->class */
179 			struct list_head	tp_list;
180 		};
181 		struct { /* amd_power */
182 			u64	pwr_acc;
183 			u64	ptsc;
184 		};
185 #ifdef CONFIG_HAVE_HW_BREAKPOINT
186 		struct { /* breakpoint */
187 			/*
188 			 * Crufty hack to avoid the chicken and egg
189 			 * problem hw_breakpoint has with context
190 			 * creation and event initalization.
191 			 */
192 			struct arch_hw_breakpoint	info;
193 			struct rhlist_head		bp_list;
194 		};
195 #endif
196 		struct { /* amd_iommu */
197 			u8	iommu_bank;
198 			u8	iommu_cntr;
199 			u16	padding;
200 			u64	conf;
201 			u64	conf1;
202 		};
203 	};
204 	/*
205 	 * If the event is a per task event, this will point to the task in
206 	 * question. See the comment in perf_event_alloc().
207 	 */
208 	struct task_struct		*target;
209 
210 	/*
211 	 * PMU would store hardware filter configuration
212 	 * here.
213 	 */
214 	void				*addr_filters;
215 
216 	/* Last sync'ed generation of filters */
217 	unsigned long			addr_filters_gen;
218 
219 /*
220  * hw_perf_event::state flags; used to track the PERF_EF_* state.
221  */
222 
223 /* the counter is stopped */
224 #define PERF_HES_STOPPED		0x01
225 
226 /* event->count up-to-date */
227 #define PERF_HES_UPTODATE		0x02
228 
229 #define PERF_HES_ARCH			0x04
230 
231 	int				state;
232 
233 	/*
234 	 * The last observed hardware counter value, updated with a
235 	 * local64_cmpxchg() such that pmu::read() can be called nested.
236 	 */
237 	local64_t			prev_count;
238 
239 	/*
240 	 * The period to start the next sample with.
241 	 */
242 	u64				sample_period;
243 
244 	union {
245 		struct { /* Sampling */
246 			/*
247 			 * The period we started this sample with.
248 			 */
249 			u64				last_period;
250 
251 			/*
252 			 * However much is left of the current period;
253 			 * note that this is a full 64bit value and
254 			 * allows for generation of periods longer
255 			 * than hardware might allow.
256 			 */
257 			local64_t			period_left;
258 		};
259 		struct { /* Topdown events counting for context switch */
260 			u64				saved_metric;
261 			u64				saved_slots;
262 		};
263 	};
264 
265 	/*
266 	 * State for throttling the event, see __perf_event_overflow() and
267 	 * perf_adjust_freq_unthr_context().
268 	 */
269 	u64                             interrupts_seq;
270 	u64				interrupts;
271 
272 	/*
273 	 * State for freq target events, see __perf_event_overflow() and
274 	 * perf_adjust_freq_unthr_context().
275 	 */
276 	u64				freq_time_stamp;
277 	u64				freq_count_stamp;
278 #endif /* CONFIG_PERF_EVENTS */
279 };
280 
281 struct perf_event;
282 struct perf_event_pmu_context;
283 
284 /*
285  * Common implementation detail of pmu::{start,commit,cancel}_txn
286  */
287 
288 /* txn to add/schedule event on PMU */
289 #define PERF_PMU_TXN_ADD		0x1
290 
291 /* txn to read event group from PMU */
292 #define PERF_PMU_TXN_READ		0x2
293 
294 /**
295  * pmu::capabilities flags
296  */
297 #define PERF_PMU_CAP_NO_INTERRUPT	0x0001
298 #define PERF_PMU_CAP_NO_NMI		0x0002
299 #define PERF_PMU_CAP_AUX_NO_SG		0x0004
300 #define PERF_PMU_CAP_EXTENDED_REGS	0x0008
301 #define PERF_PMU_CAP_EXCLUSIVE		0x0010
302 #define PERF_PMU_CAP_ITRACE		0x0020
303 #define PERF_PMU_CAP_NO_EXCLUDE		0x0040
304 #define PERF_PMU_CAP_AUX_OUTPUT		0x0080
305 #define PERF_PMU_CAP_EXTENDED_HW_TYPE	0x0100
306 #define PERF_PMU_CAP_AUX_PAUSE		0x0200
307 #define PERF_PMU_CAP_AUX_PREFER_LARGE	0x0400
308 
309 /**
310  * pmu::scope
311  */
312 enum perf_pmu_scope {
313 	PERF_PMU_SCOPE_NONE = 0,
314 	PERF_PMU_SCOPE_CORE,
315 	PERF_PMU_SCOPE_DIE,
316 	PERF_PMU_SCOPE_CLUSTER,
317 	PERF_PMU_SCOPE_PKG,
318 	PERF_PMU_SCOPE_SYS_WIDE,
319 	PERF_PMU_MAX_SCOPE,
320 };
321 
322 struct perf_output_handle;
323 
324 #define PMU_NULL_DEV	((void *)(~0UL))
325 
326 /**
327  * struct pmu - generic performance monitoring unit
328  */
329 struct pmu {
330 	struct list_head		entry;
331 
332 	spinlock_t			events_lock;
333 	struct list_head		events;
334 
335 	struct module			*module;
336 	struct device			*dev;
337 	struct device			*parent;
338 	const struct attribute_group	**attr_groups;
339 	const struct attribute_group	**attr_update;
340 	const char			*name;
341 	int				type;
342 
343 	/*
344 	 * various common per-pmu feature flags
345 	 */
346 	int				capabilities;
347 
348 	/*
349 	 * PMU scope
350 	 */
351 	unsigned int			scope;
352 
353 	struct perf_cpu_pmu_context * __percpu *cpu_pmu_context;
354 	atomic_t			exclusive_cnt; /* < 0: cpu; > 0: tsk */
355 	int				task_ctx_nr;
356 	int				hrtimer_interval_ms;
357 
358 	/* number of address filters this PMU can do */
359 	unsigned int			nr_addr_filters;
360 
361 	/*
362 	 * Fully disable/enable this PMU, can be used to protect from the PMI
363 	 * as well as for lazy/batch writing of the MSRs.
364 	 */
365 	void (*pmu_enable)		(struct pmu *pmu); /* optional */
366 	void (*pmu_disable)		(struct pmu *pmu); /* optional */
367 
368 	/*
369 	 * Try and initialize the event for this PMU.
370 	 *
371 	 * Returns:
372 	 *  -ENOENT	-- @event is not for this PMU
373 	 *
374 	 *  -ENODEV	-- @event is for this PMU but PMU not present
375 	 *  -EBUSY	-- @event is for this PMU but PMU temporarily unavailable
376 	 *  -EINVAL	-- @event is for this PMU but @event is not valid
377 	 *  -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
378 	 *  -EACCES	-- @event is for this PMU, @event is valid, but no privileges
379 	 *
380 	 *  0		-- @event is for this PMU and valid
381 	 *
382 	 * Other error return values are allowed.
383 	 */
384 	int (*event_init)		(struct perf_event *event);
385 
386 	/*
387 	 * Notification that the event was mapped or unmapped.  Called
388 	 * in the context of the mapping task.
389 	 */
390 	void (*event_mapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
391 	void (*event_unmapped)		(struct perf_event *event, struct mm_struct *mm); /* optional */
392 
393 	/*
394 	 * Flags for ->add()/->del()/ ->start()/->stop(). There are
395 	 * matching hw_perf_event::state flags.
396 	 */
397 
398 /* start the counter when adding    */
399 #define PERF_EF_START			0x01
400 
401 /* reload the counter when starting */
402 #define PERF_EF_RELOAD			0x02
403 
404 /* update the counter when stopping */
405 #define PERF_EF_UPDATE			0x04
406 
407 /* AUX area event, pause tracing */
408 #define PERF_EF_PAUSE			0x08
409 
410 /* AUX area event, resume tracing */
411 #define PERF_EF_RESUME			0x10
412 
413 	/*
414 	 * Adds/Removes a counter to/from the PMU, can be done inside a
415 	 * transaction, see the ->*_txn() methods.
416 	 *
417 	 * The add/del callbacks will reserve all hardware resources required
418 	 * to service the event, this includes any counter constraint
419 	 * scheduling etc.
420 	 *
421 	 * Called with IRQs disabled and the PMU disabled on the CPU the event
422 	 * is on.
423 	 *
424 	 * ->add() called without PERF_EF_START should result in the same state
425 	 *  as ->add() followed by ->stop().
426 	 *
427 	 * ->del() must always PERF_EF_UPDATE stop an event. If it calls
428 	 *  ->stop() that must deal with already being stopped without
429 	 *  PERF_EF_UPDATE.
430 	 */
431 	int  (*add)			(struct perf_event *event, int flags);
432 	void (*del)			(struct perf_event *event, int flags);
433 
434 	/*
435 	 * Starts/Stops a counter present on the PMU.
436 	 *
437 	 * The PMI handler should stop the counter when perf_event_overflow()
438 	 * returns !0. ->start() will be used to continue.
439 	 *
440 	 * Also used to change the sample period.
441 	 *
442 	 * Called with IRQs disabled and the PMU disabled on the CPU the event
443 	 * is on -- will be called from NMI context with the PMU generates
444 	 * NMIs.
445 	 *
446 	 * ->stop() with PERF_EF_UPDATE will read the counter and update
447 	 *  period/count values like ->read() would.
448 	 *
449 	 * ->start() with PERF_EF_RELOAD will reprogram the counter
450 	 *  value, must be preceded by a ->stop() with PERF_EF_UPDATE.
451 	 *
452 	 * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
453 	 * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
454 	 * PERF_EF_RESUME.
455 	 *
456 	 * ->start() with PERF_EF_RESUME will start as simply as possible but
457 	 * only if the counter is not otherwise stopped. Will not overlap
458 	 * another ->start() with PERF_EF_RESUME nor ->stop() with
459 	 * PERF_EF_PAUSE.
460 	 *
461 	 * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
462 	 * ->stop()/->start() invocations, just not itself.
463 	 */
464 	void (*start)			(struct perf_event *event, int flags);
465 	void (*stop)			(struct perf_event *event, int flags);
466 
467 	/*
468 	 * Updates the counter value of the event.
469 	 *
470 	 * For sampling capable PMUs this will also update the software period
471 	 * hw_perf_event::period_left field.
472 	 */
473 	void (*read)			(struct perf_event *event);
474 
475 	/*
476 	 * Group events scheduling is treated as a transaction, add
477 	 * group events as a whole and perform one schedulability test.
478 	 * If the test fails, roll back the whole group
479 	 *
480 	 * Start the transaction, after this ->add() doesn't need to
481 	 * do schedulability tests.
482 	 *
483 	 * Optional.
484 	 */
485 	void (*start_txn)		(struct pmu *pmu, unsigned int txn_flags);
486 	/*
487 	 * If ->start_txn() disabled the ->add() schedulability test
488 	 * then ->commit_txn() is required to perform one. On success
489 	 * the transaction is closed. On error the transaction is kept
490 	 * open until ->cancel_txn() is called.
491 	 *
492 	 * Optional.
493 	 */
494 	int  (*commit_txn)		(struct pmu *pmu);
495 	/*
496 	 * Will cancel the transaction, assumes ->del() is called
497 	 * for each successful ->add() during the transaction.
498 	 *
499 	 * Optional.
500 	 */
501 	void (*cancel_txn)		(struct pmu *pmu);
502 
503 	/*
504 	 * Will return the value for perf_event_mmap_page::index for this event,
505 	 * if no implementation is provided it will default to 0 (see
506 	 * perf_event_idx_default).
507 	 */
508 	int (*event_idx)		(struct perf_event *event); /*optional */
509 
510 	/*
511 	 * context-switches callback
512 	 */
513 	void (*sched_task)		(struct perf_event_pmu_context *pmu_ctx,
514 					 struct task_struct *task, bool sched_in);
515 
516 	/*
517 	 * Kmem cache of PMU specific data
518 	 */
519 	struct kmem_cache		*task_ctx_cache;
520 
521 	/*
522 	 * Set up pmu-private data structures for an AUX area
523 	 */
524 	void *(*setup_aux)		(struct perf_event *event, void **pages,
525 					 int nr_pages, bool overwrite);
526 					/* optional */
527 
528 	/*
529 	 * Free pmu-private AUX data structures
530 	 */
531 	void (*free_aux)		(void *aux); /* optional */
532 
533 	/*
534 	 * Take a snapshot of the AUX buffer without touching the event
535 	 * state, so that preempting ->start()/->stop() callbacks does
536 	 * not interfere with their logic. Called in PMI context.
537 	 *
538 	 * Returns the size of AUX data copied to the output handle.
539 	 *
540 	 * Optional.
541 	 */
542 	long (*snapshot_aux)		(struct perf_event *event,
543 					 struct perf_output_handle *handle,
544 					 unsigned long size);
545 
546 	/*
547 	 * Validate address range filters: make sure the HW supports the
548 	 * requested configuration and number of filters; return 0 if the
549 	 * supplied filters are valid, -errno otherwise.
550 	 *
551 	 * Runs in the context of the ioctl()ing process and is not serialized
552 	 * with the rest of the PMU callbacks.
553 	 */
554 	int (*addr_filters_validate)	(struct list_head *filters);
555 					/* optional */
556 
557 	/*
558 	 * Synchronize address range filter configuration:
559 	 * translate hw-agnostic filters into hardware configuration in
560 	 * event::hw::addr_filters.
561 	 *
562 	 * Runs as a part of filter sync sequence that is done in ->start()
563 	 * callback by calling perf_event_addr_filters_sync().
564 	 *
565 	 * May (and should) traverse event::addr_filters::list, for which its
566 	 * caller provides necessary serialization.
567 	 */
568 	void (*addr_filters_sync)	(struct perf_event *event);
569 					/* optional */
570 
571 	/*
572 	 * Check if event can be used for aux_output purposes for
573 	 * events of this PMU.
574 	 *
575 	 * Runs from perf_event_open(). Should return 0 for "no match"
576 	 * or non-zero for "match".
577 	 */
578 	int (*aux_output_match)		(struct perf_event *event);
579 					/* optional */
580 
581 	/*
582 	 * Skip programming this PMU on the given CPU. Typically needed for
583 	 * big.LITTLE things.
584 	 */
585 	bool (*filter)			(struct pmu *pmu, int cpu); /* optional */
586 
587 	/*
588 	 * Check period value for PERF_EVENT_IOC_PERIOD ioctl.
589 	 */
590 	int (*check_period)		(struct perf_event *event, u64 value); /* optional */
591 };
592 
593 enum perf_addr_filter_action_t {
594 	PERF_ADDR_FILTER_ACTION_STOP = 0,
595 	PERF_ADDR_FILTER_ACTION_START,
596 	PERF_ADDR_FILTER_ACTION_FILTER,
597 };
598 
599 /**
600  * struct perf_addr_filter - address range filter definition
601  * @entry:	event's filter list linkage
602  * @path:	object file's path for file-based filters
603  * @offset:	filter range offset
604  * @size:	filter range size (size==0 means single address trigger)
605  * @action:	filter/start/stop
606  *
607  * This is a hardware-agnostic filter configuration as specified by the user.
608  */
609 struct perf_addr_filter {
610 	struct list_head		entry;
611 	struct path			path;
612 	unsigned long			offset;
613 	unsigned long			size;
614 	enum perf_addr_filter_action_t	action;
615 };
616 
617 /**
618  * struct perf_addr_filters_head - container for address range filters
619  * @list:	list of filters for this event
620  * @lock:	spinlock that serializes accesses to the @list and event's
621  *		(and its children's) filter generations.
622  * @nr_file_filters:	number of file-based filters
623  *
624  * A child event will use parent's @list (and therefore @lock), so they are
625  * bundled together; see perf_event_addr_filters().
626  */
627 struct perf_addr_filters_head {
628 	struct list_head		list;
629 	raw_spinlock_t			lock;
630 	unsigned int			nr_file_filters;
631 };
632 
633 struct perf_addr_filter_range {
634 	unsigned long			start;
635 	unsigned long			size;
636 };
637 
638 /*
639  * The normal states are:
640  *
641  *            ACTIVE    --.
642  *               ^        |
643  *               |        |
644  *       sched_{in,out}() |
645  *               |        |
646  *               v        |
647  *      ,---> INACTIVE  --+ <-.
648  *      |                 |   |
649  *      |                {dis,en}able()
650  *   sched_in()           |   |
651  *      |       OFF    <--' --+
652  *      |                     |
653  *      `--->  ERROR    ------'
654  *
655  * That is:
656  *
657  * sched_in:       INACTIVE          -> {ACTIVE,ERROR}
658  * sched_out:      ACTIVE            -> INACTIVE
659  * disable:        {ACTIVE,INACTIVE} -> OFF
660  * enable:         {OFF,ERROR}       -> INACTIVE
661  *
662  * Where {OFF,ERROR} are disabled states.
663  *
664  * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of
665  * defunct events:
666  *
667  *  - EXIT means task that the even was assigned to died, but child events
668  *    still live, and further children can still be created. But the event
669  *    itself will never be active again. It can only transition to
670  *    {REVOKED,DEAD};
671  *
672  *  - REVOKED means the PMU the event was associated with is gone; all
673  *    functionality is stopped but the event is still alive. Can only
674  *    transition to DEAD;
675  *
676  *  - DEAD event really is DYING tearing down state and freeing bits.
677  *
678  */
679 enum perf_event_state {
680 	PERF_EVENT_STATE_DEAD		= -5,
681 	PERF_EVENT_STATE_REVOKED	= -4, /* pmu gone, must not touch */
682 	PERF_EVENT_STATE_EXIT		= -3, /* task died, still inherit */
683 	PERF_EVENT_STATE_ERROR		= -2, /* scheduling error, can enable */
684 	PERF_EVENT_STATE_OFF		= -1,
685 	PERF_EVENT_STATE_INACTIVE	=  0,
686 	PERF_EVENT_STATE_ACTIVE		=  1,
687 };
688 
689 struct file;
690 struct perf_sample_data;
691 
692 typedef void (*perf_overflow_handler_t)(struct perf_event *,
693 					struct perf_sample_data *,
694 					struct pt_regs *regs);
695 
696 /*
697  * Event capabilities. For event_caps and groups caps.
698  *
699  * PERF_EV_CAP_SOFTWARE: Is a software event.
700  * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
701  * from any CPU in the package where it is active.
702  * PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
703  * cannot be a group leader. If an event with this flag is detached from the
704  * group it is scheduled out and moved into an unrecoverable ERROR state.
705  * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
706  * PMU scope where it is active.
707  */
708 #define PERF_EV_CAP_SOFTWARE		BIT(0)
709 #define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
710 #define PERF_EV_CAP_SIBLING		BIT(2)
711 #define PERF_EV_CAP_READ_SCOPE		BIT(3)
712 
713 #define SWEVENT_HLIST_BITS		8
714 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
715 
716 struct swevent_hlist {
717 	struct hlist_head		heads[SWEVENT_HLIST_SIZE];
718 	struct rcu_head			rcu_head;
719 };
720 
721 #define PERF_ATTACH_CONTEXT		0x0001
722 #define PERF_ATTACH_GROUP		0x0002
723 #define PERF_ATTACH_TASK		0x0004
724 #define PERF_ATTACH_TASK_DATA		0x0008
725 #define PERF_ATTACH_GLOBAL_DATA		0x0010
726 #define PERF_ATTACH_SCHED_CB		0x0020
727 #define PERF_ATTACH_CHILD		0x0040
728 #define PERF_ATTACH_EXCLUSIVE		0x0080
729 #define PERF_ATTACH_CALLCHAIN		0x0100
730 #define PERF_ATTACH_ITRACE		0x0200
731 
732 struct bpf_prog;
733 struct perf_cgroup;
734 struct perf_buffer;
735 
736 struct pmu_event_list {
737 	raw_spinlock_t			lock;
738 	struct list_head		list;
739 };
740 
741 /*
742  * event->sibling_list is modified whole holding both ctx->lock and ctx->mutex
743  * as such iteration must hold either lock. However, since ctx->lock is an IRQ
744  * safe lock, and is only held by the CPU doing the modification, having IRQs
745  * disabled is sufficient since it will hold-off the IPIs.
746  */
747 #ifdef CONFIG_PROVE_LOCKING
748 # define lockdep_assert_event_ctx(event)			\
749 	WARN_ON_ONCE(__lockdep_enabled &&			\
750 		     (this_cpu_read(hardirqs_enabled) &&	\
751 		      lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
752 #else
753 # define lockdep_assert_event_ctx(event)
754 #endif
755 
756 #define for_each_sibling_event(sibling, event)			\
757 	lockdep_assert_event_ctx(event);			\
758 	if ((event)->group_leader == (event))			\
759 		list_for_each_entry((sibling), &(event)->sibling_list, sibling_list)
760 
761 /**
762  * struct perf_event - performance event kernel representation:
763  */
764 struct perf_event {
765 #ifdef CONFIG_PERF_EVENTS
766 	/*
767 	 * entry onto perf_event_context::event_list;
768 	 *   modifications require ctx->lock
769 	 *   RCU safe iterations.
770 	 */
771 	struct list_head		event_entry;
772 
773 	/*
774 	 * Locked for modification by both ctx->mutex and ctx->lock; holding
775 	 * either sufficies for read.
776 	 */
777 	struct list_head		sibling_list;
778 	struct list_head		active_list;
779 	/*
780 	 * Node on the pinned or flexible tree located at the event context;
781 	 */
782 	struct rb_node			group_node;
783 	u64				group_index;
784 	/*
785 	 * We need storage to track the entries in perf_pmu_migrate_context; we
786 	 * cannot use the event_entry because of RCU and we want to keep the
787 	 * group in tact which avoids us using the other two entries.
788 	 */
789 	struct list_head		migrate_entry;
790 
791 	struct hlist_node		hlist_entry;
792 	struct list_head		active_entry;
793 	int				nr_siblings;
794 
795 	/* Not serialized. Only written during event initialization. */
796 	int				event_caps;
797 	/* The cumulative AND of all event_caps for events in this group. */
798 	int				group_caps;
799 
800 	unsigned int			group_generation;
801 	struct perf_event		*group_leader;
802 	/*
803 	 * event->pmu will always point to pmu in which this event belongs.
804 	 * Whereas event->pmu_ctx->pmu may point to other pmu when group of
805 	 * different pmu events is created.
806 	 */
807 	struct pmu			*pmu;
808 	void				*pmu_private;
809 
810 	enum perf_event_state		state;
811 	unsigned int			attach_state;
812 	local64_t			count;
813 	atomic64_t			child_count;
814 
815 	/*
816 	 * These are the total time in nanoseconds that the event
817 	 * has been enabled (i.e. eligible to run, and the task has
818 	 * been scheduled in, if this is a per-task event)
819 	 * and running (scheduled onto the CPU), respectively.
820 	 */
821 	u64				total_time_enabled;
822 	u64				total_time_running;
823 	u64				tstamp;
824 
825 	struct perf_event_attr		attr;
826 	u16				header_size;
827 	u16				id_header_size;
828 	u16				read_size;
829 	struct hw_perf_event		hw;
830 
831 	struct perf_event_context	*ctx;
832 	/*
833 	 * event->pmu_ctx points to perf_event_pmu_context in which the event
834 	 * is added. This pmu_ctx can be of other pmu for sw event when that
835 	 * sw event is part of a group which also contains non-sw events.
836 	 */
837 	struct perf_event_pmu_context	*pmu_ctx;
838 	atomic_long_t			refcount;
839 
840 	/*
841 	 * These accumulate total time (in nanoseconds) that children
842 	 * events have been enabled and running, respectively.
843 	 */
844 	atomic64_t			child_total_time_enabled;
845 	atomic64_t			child_total_time_running;
846 
847 	/*
848 	 * Protect attach/detach and child_list:
849 	 */
850 	struct mutex			child_mutex;
851 	struct list_head		child_list;
852 	struct perf_event		*parent;
853 
854 	int				oncpu;
855 	int				cpu;
856 
857 	struct list_head		owner_entry;
858 	struct task_struct		*owner;
859 
860 	/* mmap bits */
861 	struct mutex			mmap_mutex;
862 	atomic_t			mmap_count;
863 
864 	struct perf_buffer		*rb;
865 	struct list_head		rb_entry;
866 	unsigned long			rcu_batches;
867 	int				rcu_pending;
868 
869 	/* poll related */
870 	wait_queue_head_t		waitq;
871 	struct fasync_struct		*fasync;
872 
873 	/* delayed work for NMIs and such */
874 	unsigned int			pending_wakeup;
875 	unsigned int			pending_kill;
876 	unsigned int			pending_disable;
877 	unsigned long			pending_addr;	/* SIGTRAP */
878 	struct irq_work			pending_irq;
879 	struct irq_work			pending_disable_irq;
880 	struct callback_head		pending_task;
881 	unsigned int			pending_work;
882 
883 	atomic_t			event_limit;
884 
885 	/* address range filters */
886 	struct perf_addr_filters_head	addr_filters;
887 	/* vma address array for file-based filders */
888 	struct perf_addr_filter_range	*addr_filter_ranges;
889 	unsigned long			addr_filters_gen;
890 
891 	/* for aux_output events */
892 	struct perf_event		*aux_event;
893 
894 	void (*destroy)(struct perf_event *);
895 	struct rcu_head			rcu_head;
896 
897 	struct pid_namespace		*ns;
898 	u64				id;
899 
900 	atomic64_t			lost_samples;
901 
902 	u64				(*clock)(void);
903 	perf_overflow_handler_t		overflow_handler;
904 	void				*overflow_handler_context;
905 	struct bpf_prog			*prog;
906 	u64				bpf_cookie;
907 
908 #ifdef CONFIG_EVENT_TRACING
909 	struct trace_event_call		*tp_event;
910 	struct event_filter		*filter;
911 # ifdef CONFIG_FUNCTION_TRACER
912 	struct ftrace_ops               ftrace_ops;
913 # endif
914 #endif
915 
916 #ifdef CONFIG_CGROUP_PERF
917 	struct perf_cgroup		*cgrp; /* cgroup event is attach to */
918 #endif
919 
920 #ifdef CONFIG_SECURITY
921 	void *security;
922 #endif
923 	struct list_head		sb_list;
924 	struct list_head		pmu_list;
925 
926 	/*
927 	 * Certain events gets forwarded to another pmu internally by over-
928 	 * writing kernel copy of event->attr.type without user being aware
929 	 * of it. event->orig_type contains original 'type' requested by
930 	 * user.
931 	 */
932 	u32				orig_type;
933 #endif /* CONFIG_PERF_EVENTS */
934 };
935 
936 /*
937  *           ,-----------------------[1:n]------------------------.
938  *           V                                                    V
939  * perf_event_context <-[1:n]-> perf_event_pmu_context <-[1:n]- perf_event
940  *                                        |                       |
941  *                                        `--[n:1]-> pmu <-[1:n]--'
942  *
943  *
944  * struct perf_event_pmu_context  lifetime is refcount based and RCU freed
945  * (similar to perf_event_context). Locking is as if it were a member of
946  * perf_event_context; specifically:
947  *
948  *   modification, both: ctx->mutex && ctx->lock
949  *   reading, either:    ctx->mutex || ctx->lock
950  *
951  * There is one exception to this; namely put_pmu_ctx() isn't always called
952  * with ctx->mutex held; this means that as long as we can guarantee the epc
953  * has events the above rules hold.
954  *
955  * Specificially, sys_perf_event_open()'s group_leader case depends on
956  * ctx->mutex pinning the configuration. Since we hold a reference on
957  * group_leader (through the filedesc) it can't go away, therefore it's
958  * associated pmu_ctx must exist and cannot change due to ctx->mutex.
959  *
960  * perf_event holds a refcount on perf_event_context
961  * perf_event holds a refcount on perf_event_pmu_context
962  */
963 struct perf_event_pmu_context {
964 	struct pmu			*pmu;
965 	struct perf_event_context       *ctx;
966 
967 	struct list_head		pmu_ctx_entry;
968 
969 	struct list_head		pinned_active;
970 	struct list_head		flexible_active;
971 
972 	/* Used to identify the per-cpu perf_event_pmu_context */
973 	unsigned int			embedded : 1;
974 
975 	unsigned int			nr_events;
976 	unsigned int			nr_cgroups;
977 	unsigned int			nr_freq;
978 
979 	atomic_t			refcount; /* event <-> epc */
980 	struct rcu_head			rcu_head;
981 
982 	/*
983 	 * Set when one or more (plausibly active) event can't be scheduled
984 	 * due to pmu overcommit or pmu constraints, except tolerant to
985 	 * events not necessary to be active due to scheduling constraints,
986 	 * such as cgroups.
987 	 */
988 	int				rotate_necessary;
989 };
990 
perf_pmu_ctx_is_active(struct perf_event_pmu_context * epc)991 static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
992 {
993 	return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
994 }
995 
996 struct perf_event_groups {
997 	struct rb_root			tree;
998 	u64				index;
999 };
1000 
1001 
1002 /**
1003  * struct perf_event_context - event context structure
1004  *
1005  * Used as a container for task events and CPU events as well:
1006  */
1007 struct perf_event_context {
1008 	/*
1009 	 * Protect the states of the events in the list,
1010 	 * nr_active, and the list:
1011 	 */
1012 	raw_spinlock_t			lock;
1013 	/*
1014 	 * Protect the list of events.  Locking either mutex or lock
1015 	 * is sufficient to ensure the list doesn't change; to change
1016 	 * the list you need to lock both the mutex and the spinlock.
1017 	 */
1018 	struct mutex			mutex;
1019 
1020 	struct list_head		pmu_ctx_list;
1021 	struct perf_event_groups	pinned_groups;
1022 	struct perf_event_groups	flexible_groups;
1023 	struct list_head		event_list;
1024 
1025 	int				nr_events;
1026 	int				nr_user;
1027 	int				is_active;
1028 
1029 	int				nr_stat;
1030 	int				nr_freq;
1031 	int				rotate_disable;
1032 
1033 	refcount_t			refcount; /* event <-> ctx */
1034 	struct task_struct		*task;
1035 
1036 	/*
1037 	 * Context clock, runs when context enabled.
1038 	 */
1039 	u64				time;
1040 	u64				timestamp;
1041 	u64				timeoffset;
1042 
1043 	/*
1044 	 * These fields let us detect when two contexts have both
1045 	 * been cloned (inherited) from a common ancestor.
1046 	 */
1047 	struct perf_event_context	*parent_ctx;
1048 	u64				parent_gen;
1049 	u64				generation;
1050 	int				pin_count;
1051 #ifdef CONFIG_CGROUP_PERF
1052 	int				nr_cgroups;	 /* cgroup evts */
1053 #endif
1054 	struct rcu_head			rcu_head;
1055 
1056 	/*
1057 	 * The count of events for which using the switch-out fast path
1058 	 * should be avoided.
1059 	 *
1060 	 * Sum (event->pending_work + events with
1061 	 *    (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
1062 	 *
1063 	 * The SIGTRAP is targeted at ctx->task, as such it won't do changing
1064 	 * that until the signal is delivered.
1065 	 */
1066 	local_t				nr_no_switch_fast;
1067 };
1068 
1069 /**
1070  * struct perf_ctx_data - PMU specific data for a task
1071  * @rcu_head:  To avoid the race on free PMU specific data
1072  * @refcount:  To track users
1073  * @global:    To track system-wide users
1074  * @ctx_cache: Kmem cache of PMU specific data
1075  * @data:      PMU specific data
1076  *
1077  * Currently, the struct is only used in Intel LBR call stack mode to
1078  * save/restore the call stack of a task on context switches.
1079  *
1080  * The rcu_head is used to prevent the race on free the data.
1081  * The data only be allocated when Intel LBR call stack mode is enabled.
1082  * The data will be freed when the mode is disabled.
1083  * The content of the data will only be accessed in context switch, which
1084  * should be protected by rcu_read_lock().
1085  *
1086  * Because of the alignment requirement of Intel Arch LBR, the Kmem cache
1087  * is used to allocate the PMU specific data. The ctx_cache is to track
1088  * the Kmem cache.
1089  *
1090  * Careful: Struct perf_ctx_data is added as a pointer in struct task_struct.
1091  * When system-wide Intel LBR call stack mode is enabled, a buffer with
1092  * constant size will be allocated for each task.
1093  * Also, system memory consumption can further grow when the size of
1094  * struct perf_ctx_data enlarges.
1095  */
1096 struct perf_ctx_data {
1097 	struct rcu_head			rcu_head;
1098 	refcount_t			refcount;
1099 	int				global;
1100 	struct kmem_cache		*ctx_cache;
1101 	void				*data;
1102 };
1103 
1104 struct perf_cpu_pmu_context {
1105 	struct perf_event_pmu_context	epc;
1106 	struct perf_event_pmu_context	*task_epc;
1107 
1108 	struct list_head		sched_cb_entry;
1109 	int				sched_cb_usage;
1110 
1111 	int				active_oncpu;
1112 	int				exclusive;
1113 	int				pmu_disable_count;
1114 
1115 	raw_spinlock_t			hrtimer_lock;
1116 	struct hrtimer			hrtimer;
1117 	ktime_t				hrtimer_interval;
1118 	unsigned int			hrtimer_active;
1119 };
1120 
1121 /**
1122  * struct perf_event_cpu_context - per cpu event context structure
1123  */
1124 struct perf_cpu_context {
1125 	struct perf_event_context	ctx;
1126 	struct perf_event_context	*task_ctx;
1127 	int				online;
1128 
1129 #ifdef CONFIG_CGROUP_PERF
1130 	struct perf_cgroup		*cgrp;
1131 #endif
1132 
1133 	/*
1134 	 * Per-CPU storage for iterators used in visit_groups_merge. The default
1135 	 * storage is of size 2 to hold the CPU and any CPU event iterators.
1136 	 */
1137 	int				heap_size;
1138 	struct perf_event		**heap;
1139 	struct perf_event		*heap_default[2];
1140 };
1141 
1142 struct perf_output_handle {
1143 	struct perf_event		*event;
1144 	struct perf_buffer		*rb;
1145 	unsigned long			wakeup;
1146 	unsigned long			size;
1147 	union {
1148 		u64			flags;		/* perf_output*() */
1149 		u64			aux_flags;	/* perf_aux_output*() */
1150 		struct {
1151 			u64		skip_read : 1;
1152 		};
1153 	};
1154 	union {
1155 		void			*addr;
1156 		unsigned long		head;
1157 	};
1158 	int				page;
1159 };
1160 
1161 struct bpf_perf_event_data_kern {
1162 	bpf_user_pt_regs_t *regs;
1163 	struct perf_sample_data *data;
1164 	struct perf_event *event;
1165 };
1166 
1167 #ifdef CONFIG_CGROUP_PERF
1168 
1169 /*
1170  * perf_cgroup_info keeps track of time_enabled for a cgroup.
1171  * This is a per-cpu dynamically allocated data structure.
1172  */
1173 struct perf_cgroup_info {
1174 	u64				time;
1175 	u64				timestamp;
1176 	u64				timeoffset;
1177 	int				active;
1178 };
1179 
1180 struct perf_cgroup {
1181 	struct cgroup_subsys_state	css;
1182 	struct perf_cgroup_info	__percpu *info;
1183 };
1184 
1185 /*
1186  * Must ensure cgroup is pinned (css_get) before calling
1187  * this function. In other words, we cannot call this function
1188  * if there is no cgroup event for the current CPU context.
1189  */
1190 static inline struct perf_cgroup *
perf_cgroup_from_task(struct task_struct * task,struct perf_event_context * ctx)1191 perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
1192 {
1193 	return container_of(task_css_check(task, perf_event_cgrp_id,
1194 					   ctx ? lockdep_is_held(&ctx->lock)
1195 					       : true),
1196 			    struct perf_cgroup, css);
1197 }
1198 #endif /* CONFIG_CGROUP_PERF */
1199 
1200 #ifdef CONFIG_PERF_EVENTS
1201 
1202 extern struct perf_event_context *perf_cpu_task_ctx(void);
1203 
1204 extern void *perf_aux_output_begin(struct perf_output_handle *handle,
1205 				   struct perf_event *event);
1206 extern void perf_aux_output_end(struct perf_output_handle *handle,
1207 				unsigned long size);
1208 extern int perf_aux_output_skip(struct perf_output_handle *handle,
1209 				unsigned long size);
1210 extern void *perf_get_aux(struct perf_output_handle *handle);
1211 extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
1212 extern void perf_event_itrace_started(struct perf_event *event);
1213 
1214 extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
1215 extern int perf_pmu_unregister(struct pmu *pmu);
1216 
1217 extern void __perf_event_task_sched_in(struct task_struct *prev,
1218 				       struct task_struct *task);
1219 extern void __perf_event_task_sched_out(struct task_struct *prev,
1220 					struct task_struct *next);
1221 extern int perf_event_init_task(struct task_struct *child, u64 clone_flags);
1222 extern void perf_event_exit_task(struct task_struct *child);
1223 extern void perf_event_free_task(struct task_struct *task);
1224 extern void perf_event_delayed_put(struct task_struct *task);
1225 extern struct file *perf_event_get(unsigned int fd);
1226 extern const struct perf_event *perf_get_event(struct file *file);
1227 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
1228 extern void perf_event_print_debug(void);
1229 extern void perf_pmu_disable(struct pmu *pmu);
1230 extern void perf_pmu_enable(struct pmu *pmu);
1231 extern void perf_sched_cb_dec(struct pmu *pmu);
1232 extern void perf_sched_cb_inc(struct pmu *pmu);
1233 extern int perf_event_task_disable(void);
1234 extern int perf_event_task_enable(void);
1235 
1236 extern void perf_pmu_resched(struct pmu *pmu);
1237 
1238 extern int perf_event_refresh(struct perf_event *event, int refresh);
1239 extern void perf_event_update_userpage(struct perf_event *event);
1240 extern int perf_event_release_kernel(struct perf_event *event);
1241 
1242 extern struct perf_event *
1243 perf_event_create_kernel_counter(struct perf_event_attr *attr,
1244 				 int cpu,
1245 				 struct task_struct *task,
1246 				 perf_overflow_handler_t callback,
1247 				 void *context);
1248 
1249 extern void perf_pmu_migrate_context(struct pmu *pmu,
1250 				     int src_cpu, int dst_cpu);
1251 extern int perf_event_read_local(struct perf_event *event, u64 *value,
1252 				 u64 *enabled, u64 *running);
1253 extern u64 perf_event_read_value(struct perf_event *event,
1254 				 u64 *enabled, u64 *running);
1255 
1256 extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
1257 
branch_sample_no_flags(const struct perf_event * event)1258 static inline bool branch_sample_no_flags(const struct perf_event *event)
1259 {
1260 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
1261 }
1262 
branch_sample_no_cycles(const struct perf_event * event)1263 static inline bool branch_sample_no_cycles(const struct perf_event *event)
1264 {
1265 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
1266 }
1267 
branch_sample_type(const struct perf_event * event)1268 static inline bool branch_sample_type(const struct perf_event *event)
1269 {
1270 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
1271 }
1272 
branch_sample_hw_index(const struct perf_event * event)1273 static inline bool branch_sample_hw_index(const struct perf_event *event)
1274 {
1275 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
1276 }
1277 
branch_sample_priv(const struct perf_event * event)1278 static inline bool branch_sample_priv(const struct perf_event *event)
1279 {
1280 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
1281 }
1282 
branch_sample_counters(const struct perf_event * event)1283 static inline bool branch_sample_counters(const struct perf_event *event)
1284 {
1285 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS;
1286 }
1287 
branch_sample_call_stack(const struct perf_event * event)1288 static inline bool branch_sample_call_stack(const struct perf_event *event)
1289 {
1290 	return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
1291 }
1292 
1293 struct perf_sample_data {
1294 	/*
1295 	 * Fields set by perf_sample_data_init() unconditionally,
1296 	 * group so as to minimize the cachelines touched.
1297 	 */
1298 	u64				sample_flags;
1299 	u64				period;
1300 	u64				dyn_size;
1301 
1302 	/*
1303 	 * Fields commonly set by __perf_event_header__init_id(),
1304 	 * group so as to minimize the cachelines touched.
1305 	 */
1306 	u64				type;
1307 	struct {
1308 		u32	pid;
1309 		u32	tid;
1310 	}				tid_entry;
1311 	u64				time;
1312 	u64				id;
1313 	struct {
1314 		u32	cpu;
1315 		u32	reserved;
1316 	}				cpu_entry;
1317 
1318 	/*
1319 	 * The other fields, optionally {set,used} by
1320 	 * perf_{prepare,output}_sample().
1321 	 */
1322 	u64				ip;
1323 	struct perf_callchain_entry	*callchain;
1324 	struct perf_raw_record		*raw;
1325 	struct perf_branch_stack	*br_stack;
1326 	u64				*br_stack_cntr;
1327 	union perf_sample_weight	weight;
1328 	union  perf_mem_data_src	data_src;
1329 	u64				txn;
1330 
1331 	struct perf_regs		regs_user;
1332 	struct perf_regs		regs_intr;
1333 	u64				stack_user_size;
1334 
1335 	u64				stream_id;
1336 	u64				cgroup;
1337 	u64				addr;
1338 	u64				phys_addr;
1339 	u64				data_page_size;
1340 	u64				code_page_size;
1341 	u64				aux_size;
1342 } ____cacheline_aligned;
1343 
1344 /* default value for data source */
1345 #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
1346 		    PERF_MEM_S(LVL, NA)   |\
1347 		    PERF_MEM_S(SNOOP, NA) |\
1348 		    PERF_MEM_S(LOCK, NA)  |\
1349 		    PERF_MEM_S(TLB, NA)   |\
1350 		    PERF_MEM_S(LVLNUM, NA))
1351 
perf_sample_data_init(struct perf_sample_data * data,u64 addr,u64 period)1352 static inline void perf_sample_data_init(struct perf_sample_data *data,
1353 					 u64 addr, u64 period)
1354 {
1355 	/* remaining struct members initialized in perf_prepare_sample() */
1356 	data->sample_flags = PERF_SAMPLE_PERIOD;
1357 	data->period = period;
1358 	data->dyn_size = 0;
1359 
1360 	if (addr) {
1361 		data->addr = addr;
1362 		data->sample_flags |= PERF_SAMPLE_ADDR;
1363 	}
1364 }
1365 
perf_sample_save_callchain(struct perf_sample_data * data,struct perf_event * event,struct pt_regs * regs)1366 static inline void perf_sample_save_callchain(struct perf_sample_data *data,
1367 					      struct perf_event *event,
1368 					      struct pt_regs *regs)
1369 {
1370 	int size = 1;
1371 
1372 	if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN))
1373 		return;
1374 	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN))
1375 		return;
1376 
1377 	data->callchain = perf_callchain(event, regs);
1378 	size += data->callchain->nr;
1379 
1380 	data->dyn_size += size * sizeof(u64);
1381 	data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
1382 }
1383 
perf_sample_save_raw_data(struct perf_sample_data * data,struct perf_event * event,struct perf_raw_record * raw)1384 static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
1385 					     struct perf_event *event,
1386 					     struct perf_raw_record *raw)
1387 {
1388 	struct perf_raw_frag *frag = &raw->frag;
1389 	u32 sum = 0;
1390 	int size;
1391 
1392 	if (!(event->attr.sample_type & PERF_SAMPLE_RAW))
1393 		return;
1394 	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW))
1395 		return;
1396 
1397 	do {
1398 		sum += frag->size;
1399 		if (perf_raw_frag_last(frag))
1400 			break;
1401 		frag = frag->next;
1402 	} while (1);
1403 
1404 	size = round_up(sum + sizeof(u32), sizeof(u64));
1405 	raw->size = size - sizeof(u32);
1406 	frag->pad = raw->size - sum;
1407 
1408 	data->raw = raw;
1409 	data->dyn_size += size;
1410 	data->sample_flags |= PERF_SAMPLE_RAW;
1411 }
1412 
has_branch_stack(struct perf_event * event)1413 static inline bool has_branch_stack(struct perf_event *event)
1414 {
1415 	return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
1416 }
1417 
perf_sample_save_brstack(struct perf_sample_data * data,struct perf_event * event,struct perf_branch_stack * brs,u64 * brs_cntr)1418 static inline void perf_sample_save_brstack(struct perf_sample_data *data,
1419 					    struct perf_event *event,
1420 					    struct perf_branch_stack *brs,
1421 					    u64 *brs_cntr)
1422 {
1423 	int size = sizeof(u64); /* nr */
1424 
1425 	if (!has_branch_stack(event))
1426 		return;
1427 	if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK))
1428 		return;
1429 
1430 	if (branch_sample_hw_index(event))
1431 		size += sizeof(u64);
1432 
1433 	brs->nr = min_t(u16, event->attr.sample_max_stack, brs->nr);
1434 
1435 	size += brs->nr * sizeof(struct perf_branch_entry);
1436 
1437 	/*
1438 	 * The extension space for counters is appended after the
1439 	 * struct perf_branch_stack. It is used to store the occurrences
1440 	 * of events of each branch.
1441 	 */
1442 	if (brs_cntr)
1443 		size += brs->nr * sizeof(u64);
1444 
1445 	data->br_stack = brs;
1446 	data->br_stack_cntr = brs_cntr;
1447 	data->dyn_size += size;
1448 	data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
1449 }
1450 
perf_sample_data_size(struct perf_sample_data * data,struct perf_event * event)1451 static inline u32 perf_sample_data_size(struct perf_sample_data *data,
1452 					struct perf_event *event)
1453 {
1454 	u32 size = sizeof(struct perf_event_header);
1455 
1456 	size += event->header_size + event->id_header_size;
1457 	size += data->dyn_size;
1458 
1459 	return size;
1460 }
1461 
1462 /*
1463  * Clear all bitfields in the perf_branch_entry.
1464  * The to and from fields are not cleared because they are
1465  * systematically modified by caller.
1466  */
perf_clear_branch_entry_bitfields(struct perf_branch_entry * br)1467 static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
1468 {
1469 	br->mispred	= 0;
1470 	br->predicted	= 0;
1471 	br->in_tx	= 0;
1472 	br->abort	= 0;
1473 	br->cycles	= 0;
1474 	br->type	= 0;
1475 	br->spec	= PERF_BR_SPEC_NA;
1476 	br->reserved	= 0;
1477 }
1478 
1479 extern void perf_output_sample(struct perf_output_handle *handle,
1480 			       struct perf_event_header *header,
1481 			       struct perf_sample_data *data,
1482 			       struct perf_event *event);
1483 extern void perf_prepare_sample(struct perf_sample_data *data,
1484 				struct perf_event *event,
1485 				struct pt_regs *regs);
1486 extern void perf_prepare_header(struct perf_event_header *header,
1487 				struct perf_sample_data *data,
1488 				struct perf_event *event,
1489 				struct pt_regs *regs);
1490 
1491 extern int perf_event_overflow(struct perf_event *event,
1492 				 struct perf_sample_data *data,
1493 				 struct pt_regs *regs);
1494 
1495 extern void perf_event_output_forward(struct perf_event *event,
1496 				     struct perf_sample_data *data,
1497 				     struct pt_regs *regs);
1498 extern void perf_event_output_backward(struct perf_event *event,
1499 				       struct perf_sample_data *data,
1500 				       struct pt_regs *regs);
1501 extern int perf_event_output(struct perf_event *event,
1502 			     struct perf_sample_data *data,
1503 			     struct pt_regs *regs);
1504 
1505 static inline bool
is_default_overflow_handler(struct perf_event * event)1506 is_default_overflow_handler(struct perf_event *event)
1507 {
1508 	perf_overflow_handler_t overflow_handler = event->overflow_handler;
1509 
1510 	if (likely(overflow_handler == perf_event_output_forward))
1511 		return true;
1512 	if (unlikely(overflow_handler == perf_event_output_backward))
1513 		return true;
1514 	return false;
1515 }
1516 
1517 extern void
1518 perf_event_header__init_id(struct perf_event_header *header,
1519 			   struct perf_sample_data *data,
1520 			   struct perf_event *event);
1521 extern void
1522 perf_event__output_id_sample(struct perf_event *event,
1523 			     struct perf_output_handle *handle,
1524 			     struct perf_sample_data *sample);
1525 
1526 extern void
1527 perf_log_lost_samples(struct perf_event *event, u64 lost);
1528 
event_has_any_exclude_flag(struct perf_event * event)1529 static inline bool event_has_any_exclude_flag(struct perf_event *event)
1530 {
1531 	struct perf_event_attr *attr = &event->attr;
1532 
1533 	return attr->exclude_idle || attr->exclude_user ||
1534 	       attr->exclude_kernel || attr->exclude_hv ||
1535 	       attr->exclude_guest || attr->exclude_host;
1536 }
1537 
is_sampling_event(struct perf_event * event)1538 static inline bool is_sampling_event(struct perf_event *event)
1539 {
1540 	return event->attr.sample_period != 0;
1541 }
1542 
1543 /*
1544  * Return 1 for a software event, 0 for a hardware event
1545  */
is_software_event(struct perf_event * event)1546 static inline int is_software_event(struct perf_event *event)
1547 {
1548 	return event->event_caps & PERF_EV_CAP_SOFTWARE;
1549 }
1550 
1551 /*
1552  * Return 1 for event in sw context, 0 for event in hw context
1553  */
in_software_context(struct perf_event * event)1554 static inline int in_software_context(struct perf_event *event)
1555 {
1556 	return event->pmu_ctx->pmu->task_ctx_nr == perf_sw_context;
1557 }
1558 
is_exclusive_pmu(struct pmu * pmu)1559 static inline int is_exclusive_pmu(struct pmu *pmu)
1560 {
1561 	return pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE;
1562 }
1563 
1564 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1565 
1566 extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
1567 extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1568 
1569 #ifndef perf_arch_fetch_caller_regs
perf_arch_fetch_caller_regs(struct pt_regs * regs,unsigned long ip)1570 static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
1571 #endif
1572 
1573 /*
1574  * When generating a perf sample in-line, instead of from an interrupt /
1575  * exception, we lack a pt_regs. This is typically used from software events
1576  * like: SW_CONTEXT_SWITCHES, SW_MIGRATIONS and the tie-in with tracepoints.
1577  *
1578  * We typically don't need a full set, but (for x86) do require:
1579  * - ip for PERF_SAMPLE_IP
1580  * - cs for user_mode() tests
1581  * - sp for PERF_SAMPLE_CALLCHAIN
1582  * - eflags for MISC bits and CALLCHAIN (see: perf_hw_regs())
1583  *
1584  * NOTE: assumes @regs is otherwise already 0 filled; this is important for
1585  * things like PERF_SAMPLE_REGS_INTR.
1586  */
perf_fetch_caller_regs(struct pt_regs * regs)1587 static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1588 {
1589 	perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1590 }
1591 
1592 static __always_inline void
perf_sw_event(u32 event_id,u64 nr,struct pt_regs * regs,u64 addr)1593 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1594 {
1595 	if (static_key_false(&perf_swevent_enabled[event_id]))
1596 		__perf_sw_event(event_id, nr, regs, addr);
1597 }
1598 
1599 DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
1600 
1601 /*
1602  * 'Special' version for the scheduler, it hard assumes no recursion,
1603  * which is guaranteed by us not actually scheduling inside other swevents
1604  * because those disable preemption.
1605  */
__perf_sw_event_sched(u32 event_id,u64 nr,u64 addr)1606 static __always_inline void __perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
1607 {
1608 	struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
1609 
1610 	perf_fetch_caller_regs(regs);
1611 	___perf_sw_event(event_id, nr, regs, addr);
1612 }
1613 
1614 extern struct static_key_false perf_sched_events;
1615 
__perf_sw_enabled(int swevt)1616 static __always_inline bool __perf_sw_enabled(int swevt)
1617 {
1618 	return static_key_false(&perf_swevent_enabled[swevt]);
1619 }
1620 
perf_event_task_migrate(struct task_struct * task)1621 static inline void perf_event_task_migrate(struct task_struct *task)
1622 {
1623 	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS))
1624 		task->sched_migrated = 1;
1625 }
1626 
perf_event_task_sched_in(struct task_struct * prev,struct task_struct * task)1627 static inline void perf_event_task_sched_in(struct task_struct *prev,
1628 					    struct task_struct *task)
1629 {
1630 	if (static_branch_unlikely(&perf_sched_events))
1631 		__perf_event_task_sched_in(prev, task);
1632 
1633 	if (__perf_sw_enabled(PERF_COUNT_SW_CPU_MIGRATIONS) &&
1634 	    task->sched_migrated) {
1635 		__perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
1636 		task->sched_migrated = 0;
1637 	}
1638 }
1639 
perf_event_task_sched_out(struct task_struct * prev,struct task_struct * next)1640 static inline void perf_event_task_sched_out(struct task_struct *prev,
1641 					     struct task_struct *next)
1642 {
1643 	if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES))
1644 		__perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
1645 
1646 #ifdef CONFIG_CGROUP_PERF
1647 	if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) &&
1648 	    perf_cgroup_from_task(prev, NULL) !=
1649 	    perf_cgroup_from_task(next, NULL))
1650 		__perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0);
1651 #endif
1652 
1653 	if (static_branch_unlikely(&perf_sched_events))
1654 		__perf_event_task_sched_out(prev, next);
1655 }
1656 
1657 extern void perf_event_mmap(struct vm_area_struct *vma);
1658 
1659 extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1660 			       bool unregister, const char *sym);
1661 extern void perf_event_bpf_event(struct bpf_prog *prog,
1662 				 enum perf_bpf_event_type type,
1663 				 u16 flags);
1664 
1665 #define PERF_GUEST_ACTIVE		0x01
1666 #define PERF_GUEST_USER			0x02
1667 
1668 struct perf_guest_info_callbacks {
1669 	unsigned int			(*state)(void);
1670 	unsigned long			(*get_ip)(void);
1671 	unsigned int			(*handle_intel_pt_intr)(void);
1672 };
1673 
1674 #ifdef CONFIG_GUEST_PERF_EVENTS
1675 
1676 extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
1677 
1678 DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
1679 DECLARE_STATIC_CALL(__perf_guest_get_ip, *perf_guest_cbs->get_ip);
1680 DECLARE_STATIC_CALL(__perf_guest_handle_intel_pt_intr, *perf_guest_cbs->handle_intel_pt_intr);
1681 
perf_guest_state(void)1682 static inline unsigned int perf_guest_state(void)
1683 {
1684 	return static_call(__perf_guest_state)();
1685 }
1686 
perf_guest_get_ip(void)1687 static inline unsigned long perf_guest_get_ip(void)
1688 {
1689 	return static_call(__perf_guest_get_ip)();
1690 }
1691 
perf_guest_handle_intel_pt_intr(void)1692 static inline unsigned int perf_guest_handle_intel_pt_intr(void)
1693 {
1694 	return static_call(__perf_guest_handle_intel_pt_intr)();
1695 }
1696 
1697 extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1698 extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
1699 
1700 #else /* !CONFIG_GUEST_PERF_EVENTS: */
1701 
perf_guest_state(void)1702 static inline unsigned int perf_guest_state(void)		 { return 0; }
perf_guest_get_ip(void)1703 static inline unsigned long perf_guest_get_ip(void)		 { return 0; }
perf_guest_handle_intel_pt_intr(void)1704 static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
1705 
1706 #endif /* !CONFIG_GUEST_PERF_EVENTS */
1707 
1708 extern void perf_event_exec(void);
1709 extern void perf_event_comm(struct task_struct *tsk, bool exec);
1710 extern void perf_event_namespaces(struct task_struct *tsk);
1711 extern void perf_event_fork(struct task_struct *tsk);
1712 extern void perf_event_text_poke(const void *addr,
1713 				 const void *old_bytes, size_t old_len,
1714 				 const void *new_bytes, size_t new_len);
1715 
1716 /* Callchains */
1717 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1718 
1719 extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1720 extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1721 extern struct perf_callchain_entry *
1722 get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
1723 		   u32 max_stack, bool crosstask, bool add_mark);
1724 extern int get_callchain_buffers(int max_stack);
1725 extern void put_callchain_buffers(void);
1726 extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
1727 extern void put_callchain_entry(int rctx);
1728 
1729 extern int sysctl_perf_event_max_stack;
1730 extern int sysctl_perf_event_max_contexts_per_stack;
1731 
perf_callchain_store_context(struct perf_callchain_entry_ctx * ctx,u64 ip)1732 static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *ctx, u64 ip)
1733 {
1734 	if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
1735 		struct perf_callchain_entry *entry = ctx->entry;
1736 
1737 		entry->ip[entry->nr++] = ip;
1738 		++ctx->contexts;
1739 		return 0;
1740 	} else {
1741 		ctx->contexts_maxed = true;
1742 		return -1; /* no more room, stop walking the stack */
1743 	}
1744 }
1745 
perf_callchain_store(struct perf_callchain_entry_ctx * ctx,u64 ip)1746 static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 ip)
1747 {
1748 	if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
1749 		struct perf_callchain_entry *entry = ctx->entry;
1750 
1751 		entry->ip[entry->nr++] = ip;
1752 		++ctx->nr;
1753 		return 0;
1754 	} else {
1755 		return -1; /* no more room, stop walking the stack */
1756 	}
1757 }
1758 
1759 extern int sysctl_perf_event_paranoid;
1760 extern int sysctl_perf_event_sample_rate;
1761 
1762 extern void perf_sample_event_took(u64 sample_len_ns);
1763 
1764 /* Access to perf_event_open(2) syscall. */
1765 #define PERF_SECURITY_OPEN		0
1766 
1767 /* Finer grained perf_event_open(2) access control. */
1768 #define PERF_SECURITY_CPU		1
1769 #define PERF_SECURITY_KERNEL		2
1770 #define PERF_SECURITY_TRACEPOINT	3
1771 
perf_is_paranoid(void)1772 static inline int perf_is_paranoid(void)
1773 {
1774 	return sysctl_perf_event_paranoid > -1;
1775 }
1776 
1777 extern int perf_allow_kernel(void);
1778 
perf_allow_cpu(void)1779 static inline int perf_allow_cpu(void)
1780 {
1781 	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
1782 		return -EACCES;
1783 
1784 	return security_perf_event_open(PERF_SECURITY_CPU);
1785 }
1786 
perf_allow_tracepoint(void)1787 static inline int perf_allow_tracepoint(void)
1788 {
1789 	if (sysctl_perf_event_paranoid > -1 && !perfmon_capable())
1790 		return -EPERM;
1791 
1792 	return security_perf_event_open(PERF_SECURITY_TRACEPOINT);
1793 }
1794 
1795 extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs);
1796 
1797 extern void perf_event_init(void);
1798 extern void perf_tp_event(u16 event_type, u64 count, void *record,
1799 			  int entry_size, struct pt_regs *regs,
1800 			  struct hlist_head *head, int rctx,
1801 			  struct task_struct *task);
1802 extern void perf_bp_event(struct perf_event *event, void *data);
1803 
1804 extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
1805 extern unsigned long perf_instruction_pointer(struct perf_event *event,
1806 					      struct pt_regs *regs);
1807 
1808 #ifndef perf_arch_misc_flags
1809 # define perf_arch_misc_flags(regs) \
1810 		(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
1811 # define perf_arch_instruction_pointer(regs)	instruction_pointer(regs)
1812 #endif
1813 #ifndef perf_arch_bpf_user_pt_regs
1814 # define perf_arch_bpf_user_pt_regs(regs) regs
1815 #endif
1816 
1817 #ifndef perf_arch_guest_misc_flags
perf_arch_guest_misc_flags(struct pt_regs * regs)1818 static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
1819 {
1820 	unsigned long guest_state = perf_guest_state();
1821 
1822 	if (!(guest_state & PERF_GUEST_ACTIVE))
1823 		return 0;
1824 
1825 	if (guest_state & PERF_GUEST_USER)
1826 		return PERF_RECORD_MISC_GUEST_USER;
1827 	else
1828 		return PERF_RECORD_MISC_GUEST_KERNEL;
1829 }
1830 # define perf_arch_guest_misc_flags(regs)	perf_arch_guest_misc_flags(regs)
1831 #endif
1832 
needs_branch_stack(struct perf_event * event)1833 static inline bool needs_branch_stack(struct perf_event *event)
1834 {
1835 	return event->attr.branch_sample_type != 0;
1836 }
1837 
has_aux(struct perf_event * event)1838 static inline bool has_aux(struct perf_event *event)
1839 {
1840 	return event->pmu && event->pmu->setup_aux;
1841 }
1842 
has_aux_action(struct perf_event * event)1843 static inline bool has_aux_action(struct perf_event *event)
1844 {
1845 	return event->attr.aux_sample_size ||
1846 	       event->attr.aux_pause ||
1847 	       event->attr.aux_resume;
1848 }
1849 
is_write_backward(struct perf_event * event)1850 static inline bool is_write_backward(struct perf_event *event)
1851 {
1852 	return !!event->attr.write_backward;
1853 }
1854 
has_addr_filter(struct perf_event * event)1855 static inline bool has_addr_filter(struct perf_event *event)
1856 {
1857 	return event->pmu->nr_addr_filters;
1858 }
1859 
1860 /*
1861  * An inherited event uses parent's filters
1862  */
1863 static inline struct perf_addr_filters_head *
perf_event_addr_filters(struct perf_event * event)1864 perf_event_addr_filters(struct perf_event *event)
1865 {
1866 	struct perf_addr_filters_head *ifh = &event->addr_filters;
1867 
1868 	if (event->parent)
1869 		ifh = &event->parent->addr_filters;
1870 
1871 	return ifh;
1872 }
1873 
perf_event_fasync(struct perf_event * event)1874 static inline struct fasync_struct **perf_event_fasync(struct perf_event *event)
1875 {
1876 	/* Only the parent has fasync state */
1877 	if (event->parent)
1878 		event = event->parent;
1879 	return &event->fasync;
1880 }
1881 
1882 extern void perf_event_addr_filters_sync(struct perf_event *event);
1883 extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
1884 
1885 extern int perf_output_begin(struct perf_output_handle *handle,
1886 			     struct perf_sample_data *data,
1887 			     struct perf_event *event, unsigned int size);
1888 extern int perf_output_begin_forward(struct perf_output_handle *handle,
1889 				     struct perf_sample_data *data,
1890 				     struct perf_event *event,
1891 				     unsigned int size);
1892 extern int perf_output_begin_backward(struct perf_output_handle *handle,
1893 				      struct perf_sample_data *data,
1894 				      struct perf_event *event,
1895 				      unsigned int size);
1896 
1897 extern void perf_output_end(struct perf_output_handle *handle);
1898 extern unsigned int perf_output_copy(struct perf_output_handle *handle,
1899 				     const void *buf, unsigned int len);
1900 extern unsigned int perf_output_skip(struct perf_output_handle *handle,
1901 				     unsigned int len);
1902 extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
1903 				 struct perf_output_handle *handle,
1904 				 unsigned long from, unsigned long to);
1905 extern int perf_swevent_get_recursion_context(void);
1906 extern void perf_swevent_put_recursion_context(int rctx);
1907 extern u64 perf_swevent_set_period(struct perf_event *event);
1908 extern void perf_event_enable(struct perf_event *event);
1909 extern void perf_event_disable(struct perf_event *event);
1910 extern void perf_event_disable_local(struct perf_event *event);
1911 extern void perf_event_disable_inatomic(struct perf_event *event);
1912 extern void perf_event_task_tick(void);
1913 extern int perf_event_account_interrupt(struct perf_event *event);
1914 extern int perf_event_period(struct perf_event *event, u64 value);
1915 extern u64 perf_event_pause(struct perf_event *event, bool reset);
1916 
1917 #else /* !CONFIG_PERF_EVENTS: */
1918 
1919 static inline void *
perf_aux_output_begin(struct perf_output_handle * handle,struct perf_event * event)1920 perf_aux_output_begin(struct perf_output_handle *handle,
1921 		      struct perf_event *event)				{ return NULL; }
1922 static inline void
perf_aux_output_end(struct perf_output_handle * handle,unsigned long size)1923 perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
1924 									{ }
1925 static inline int
perf_aux_output_skip(struct perf_output_handle * handle,unsigned long size)1926 perf_aux_output_skip(struct perf_output_handle *handle,
1927 		     unsigned long size)				{ return -EINVAL; }
1928 static inline void *
perf_get_aux(struct perf_output_handle * handle)1929 perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
1930 static inline void
perf_event_task_migrate(struct task_struct * task)1931 perf_event_task_migrate(struct task_struct *task)			{ }
1932 static inline void
perf_event_task_sched_in(struct task_struct * prev,struct task_struct * task)1933 perf_event_task_sched_in(struct task_struct *prev,
1934 			 struct task_struct *task)			{ }
1935 static inline void
perf_event_task_sched_out(struct task_struct * prev,struct task_struct * next)1936 perf_event_task_sched_out(struct task_struct *prev,
1937 			  struct task_struct *next)			{ }
perf_event_init_task(struct task_struct * child,u64 clone_flags)1938 static inline int perf_event_init_task(struct task_struct *child,
1939 				       u64 clone_flags)			{ return 0; }
perf_event_exit_task(struct task_struct * child)1940 static inline void perf_event_exit_task(struct task_struct *child)	{ }
perf_event_free_task(struct task_struct * task)1941 static inline void perf_event_free_task(struct task_struct *task)	{ }
perf_event_delayed_put(struct task_struct * task)1942 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
perf_event_get(unsigned int fd)1943 static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
perf_get_event(struct file * file)1944 static inline const struct perf_event *perf_get_event(struct file *file)
1945 {
1946 	return ERR_PTR(-EINVAL);
1947 }
perf_event_attrs(struct perf_event * event)1948 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
1949 {
1950 	return ERR_PTR(-EINVAL);
1951 }
perf_event_read_local(struct perf_event * event,u64 * value,u64 * enabled,u64 * running)1952 static inline int perf_event_read_local(struct perf_event *event, u64 *value,
1953 					u64 *enabled, u64 *running)
1954 {
1955 	return -EINVAL;
1956 }
perf_event_print_debug(void)1957 static inline void perf_event_print_debug(void)				{ }
perf_event_task_disable(void)1958 static inline int perf_event_task_disable(void)				{ return -EINVAL; }
perf_event_task_enable(void)1959 static inline int perf_event_task_enable(void)				{ return -EINVAL; }
perf_event_refresh(struct perf_event * event,int refresh)1960 static inline int perf_event_refresh(struct perf_event *event, int refresh)
1961 {
1962 	return -EINVAL;
1963 }
1964 
1965 static inline void
perf_sw_event(u32 event_id,u64 nr,struct pt_regs * regs,u64 addr)1966 perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)	{ }
1967 static inline void
perf_bp_event(struct perf_event * event,void * data)1968 perf_bp_event(struct perf_event *event, void *data)			{ }
1969 
perf_event_mmap(struct vm_area_struct * vma)1970 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
1971 
1972 typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
perf_event_ksymbol(u16 ksym_type,u64 addr,u32 len,bool unregister,const char * sym)1973 static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
1974 				      bool unregister, const char *sym)	{ }
perf_event_bpf_event(struct bpf_prog * prog,enum perf_bpf_event_type type,u16 flags)1975 static inline void perf_event_bpf_event(struct bpf_prog *prog,
1976 					enum perf_bpf_event_type type,
1977 					u16 flags)			{ }
perf_event_exec(void)1978 static inline void perf_event_exec(void)				{ }
perf_event_comm(struct task_struct * tsk,bool exec)1979 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
perf_event_namespaces(struct task_struct * tsk)1980 static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
perf_event_fork(struct task_struct * tsk)1981 static inline void perf_event_fork(struct task_struct *tsk)		{ }
perf_event_text_poke(const void * addr,const void * old_bytes,size_t old_len,const void * new_bytes,size_t new_len)1982 static inline void perf_event_text_poke(const void *addr,
1983 					const void *old_bytes,
1984 					size_t old_len,
1985 					const void *new_bytes,
1986 					size_t new_len)			{ }
perf_event_init(void)1987 static inline void perf_event_init(void)				{ }
perf_swevent_get_recursion_context(void)1988 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
perf_swevent_put_recursion_context(int rctx)1989 static inline void perf_swevent_put_recursion_context(int rctx)		{ }
perf_swevent_set_period(struct perf_event * event)1990 static inline u64 perf_swevent_set_period(struct perf_event *event)	{ return 0; }
perf_event_enable(struct perf_event * event)1991 static inline void perf_event_enable(struct perf_event *event)		{ }
perf_event_disable(struct perf_event * event)1992 static inline void perf_event_disable(struct perf_event *event)		{ }
__perf_event_disable(void * info)1993 static inline int __perf_event_disable(void *info)			{ return -1; }
perf_event_task_tick(void)1994 static inline void perf_event_task_tick(void)				{ }
perf_event_release_kernel(struct perf_event * event)1995 static inline int perf_event_release_kernel(struct perf_event *event)	{ return 0; }
1996 static inline int
perf_event_period(struct perf_event * event,u64 value)1997 perf_event_period(struct perf_event *event, u64 value)			{ return -EINVAL; }
1998 static inline u64
perf_event_pause(struct perf_event * event,bool reset)1999 perf_event_pause(struct perf_event *event, bool reset)			{ return 0; }
2000 static inline int
perf_exclude_event(struct perf_event * event,struct pt_regs * regs)2001 perf_exclude_event(struct perf_event *event, struct pt_regs *regs)	{ return 0; }
2002 
2003 #endif /* !CONFIG_PERF_EVENTS */
2004 
2005 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
2006 extern void perf_restore_debug_store(void);
2007 #else
perf_restore_debug_store(void)2008 static inline void perf_restore_debug_store(void)			{ }
2009 #endif
2010 
2011 #define perf_output_put(handle, x)	perf_output_copy((handle), &(x), sizeof(x))
2012 
2013 struct perf_pmu_events_attr {
2014 	struct device_attribute		attr;
2015 	u64				id;
2016 	const char			*event_str;
2017 };
2018 
2019 struct perf_pmu_events_ht_attr {
2020 	struct device_attribute		attr;
2021 	u64				id;
2022 	const char			*event_str_ht;
2023 	const char			*event_str_noht;
2024 };
2025 
2026 struct perf_pmu_events_hybrid_attr {
2027 	struct device_attribute		attr;
2028 	u64				id;
2029 	const char			*event_str;
2030 	u64				pmu_type;
2031 };
2032 
2033 struct perf_pmu_format_hybrid_attr {
2034 	struct device_attribute		attr;
2035 	u64				pmu_type;
2036 };
2037 
2038 ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
2039 			      char *page);
2040 
2041 #define PMU_EVENT_ATTR(_name, _var, _id, _show)				\
2042 static struct perf_pmu_events_attr _var = {				\
2043 	.attr = __ATTR(_name, 0444, _show, NULL),			\
2044 	.id   =  _id,							\
2045 };
2046 
2047 #define PMU_EVENT_ATTR_STRING(_name, _var, _str)			    \
2048 static struct perf_pmu_events_attr _var = {				    \
2049 	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
2050 	.id		= 0,						    \
2051 	.event_str	= _str,						    \
2052 };
2053 
2054 #define PMU_EVENT_ATTR_ID(_name, _show, _id)				\
2055 	(&((struct perf_pmu_events_attr[]) {				\
2056 		{ .attr = __ATTR(_name, 0444, _show, NULL),		\
2057 		  .id = _id, }						\
2058 	})[0].attr.attr)
2059 
2060 #define PMU_FORMAT_ATTR_SHOW(_name, _format)				\
2061 static ssize_t								\
2062 _name##_show(struct device *dev,					\
2063 			       struct device_attribute *attr,		\
2064 			       char *page)				\
2065 {									\
2066 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
2067 	return sprintf(page, _format "\n");				\
2068 }									\
2069 
2070 #define PMU_FORMAT_ATTR(_name, _format)					\
2071 	PMU_FORMAT_ATTR_SHOW(_name, _format)				\
2072 									\
2073 static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
2074 
2075 /* Performance counter hotplug functions */
2076 #ifdef CONFIG_PERF_EVENTS
2077 extern int perf_event_init_cpu(unsigned int cpu);
2078 extern int perf_event_exit_cpu(unsigned int cpu);
2079 #else
2080 # define perf_event_init_cpu		NULL
2081 # define perf_event_exit_cpu		NULL
2082 #endif
2083 
2084 extern void arch_perf_update_userpage(struct perf_event *event,
2085 				      struct perf_event_mmap_page *userpg,
2086 				      u64 now);
2087 
2088 /*
2089  * Snapshot branch stack on software events.
2090  *
2091  * Branch stack can be very useful in understanding software events. For
2092  * example, when a long function, e.g. sys_perf_event_open, returns an
2093  * errno, it is not obvious why the function failed. Branch stack could
2094  * provide very helpful information in this type of scenarios.
2095  *
2096  * On software event, it is necessary to stop the hardware branch recorder
2097  * fast. Otherwise, the hardware register/buffer will be flushed with
2098  * entries of the triggering event. Therefore, static call is used to
2099  * stop the hardware recorder.
2100  */
2101 
2102 /*
2103  * cnt is the number of entries allocated for entries.
2104  * Return number of entries copied to .
2105  */
2106 typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
2107 					   unsigned int cnt);
2108 DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
2109 
2110 #ifndef PERF_NEEDS_LOPWR_CB
perf_lopwr_cb(bool mode)2111 static inline void perf_lopwr_cb(bool mode)
2112 {
2113 }
2114 #endif
2115 
2116 #endif /* _LINUX_PERF_EVENT_H */
2117