xref: /linux/arch/arc/include/asm/perf_event.h (revision 93d90ad708b8da6efc0e487b66111aa9db7f70c7)
1 /*
2  * Linux performance counter support for ARC
3  *
4  * Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  */
11 
12 #ifndef __ASM_PERF_EVENT_H
13 #define __ASM_PERF_EVENT_H
14 
15 /* real maximum varies per CPU, this is the maximum supported by the driver */
16 #define ARC_PMU_MAX_HWEVENTS	64
17 
18 #define ARC_REG_CC_BUILD	0xF6
19 #define ARC_REG_CC_INDEX	0x240
20 #define ARC_REG_CC_NAME0	0x241
21 #define ARC_REG_CC_NAME1	0x242
22 
23 #define ARC_REG_PCT_BUILD	0xF5
24 #define ARC_REG_PCT_COUNTL	0x250
25 #define ARC_REG_PCT_COUNTH	0x251
26 #define ARC_REG_PCT_SNAPL	0x252
27 #define ARC_REG_PCT_SNAPH	0x253
28 #define ARC_REG_PCT_CONFIG	0x254
29 #define ARC_REG_PCT_CONTROL	0x255
30 #define ARC_REG_PCT_INDEX	0x256
31 
32 #define ARC_REG_PCT_CONTROL_CC	(1 << 16)	/* clear counts */
33 #define ARC_REG_PCT_CONTROL_SN	(1 << 17)	/* snapshot */
34 
35 struct arc_reg_pct_build {
36 #ifdef CONFIG_CPU_BIG_ENDIAN
37 	unsigned int m:8, c:8, r:6, s:2, v:8;
38 #else
39 	unsigned int v:8, s:2, r:6, c:8, m:8;
40 #endif
41 };
42 
43 struct arc_reg_cc_build {
44 #ifdef CONFIG_CPU_BIG_ENDIAN
45 	unsigned int c:16, r:8, v:8;
46 #else
47 	unsigned int v:8, r:8, c:16;
48 #endif
49 };
50 
51 #define PERF_COUNT_ARC_DCLM	(PERF_COUNT_HW_MAX + 0)
52 #define PERF_COUNT_ARC_DCSM	(PERF_COUNT_HW_MAX + 1)
53 #define PERF_COUNT_ARC_ICM	(PERF_COUNT_HW_MAX + 2)
54 #define PERF_COUNT_ARC_BPOK	(PERF_COUNT_HW_MAX + 3)
55 #define PERF_COUNT_ARC_EDTLB	(PERF_COUNT_HW_MAX + 4)
56 #define PERF_COUNT_ARC_EITLB	(PERF_COUNT_HW_MAX + 5)
57 #define PERF_COUNT_ARC_HW_MAX	(PERF_COUNT_HW_MAX + 6)
58 
59 /*
60  * The "generalized" performance events seem to really be a copy
61  * of the available events on x86 processors; the mapping to ARC
62  * events is not always possible 1-to-1. Fortunately, there doesn't
63  * seem to be an exact definition for these events, so we can cheat
64  * a bit where necessary.
65  *
66  * In particular, the following PERF events may behave a bit differently
67  * compared to other architectures:
68  *
69  * PERF_COUNT_HW_CPU_CYCLES
70  *	Cycles not in halted state
71  *
72  * PERF_COUNT_HW_REF_CPU_CYCLES
73  *	Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
74  *	for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
75  *
76  * PERF_COUNT_HW_BUS_CYCLES
77  *	Unclear what this means, Intel uses 0x013c, which according to
78  *	their datasheet means "unhalted reference cycles". It sounds similar
79  *	to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
80  *
81  * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
82  * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
83  *	The ARC 700 can either measure stalls per pipeline stage, or all stalls
84  *	combined; for now we assign all stalls to STALLED_CYCLES_BACKEND
85  *	and all pipeline flushes (e.g. caused by mispredicts, etc.) to
86  *	STALLED_CYCLES_FRONTEND.
87  *
88  *	We could start multiple performance counters and combine everything
89  *	afterwards, but that makes it complicated.
90  *
91  *	Note that I$ cache misses aren't counted by either of the two!
92  */
93 
94 static const char * const arc_pmu_ev_hw_map[] = {
95 	[PERF_COUNT_HW_CPU_CYCLES] = "crun",
96 	[PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
97 	[PERF_COUNT_HW_BUS_CYCLES] = "crun",
98 	[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
99 	[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
100 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
101 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
102 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
103 	[PERF_COUNT_ARC_DCLM] = "dclm",
104 	[PERF_COUNT_ARC_DCSM] = "dcsm",
105 	[PERF_COUNT_ARC_ICM] = "icm",
106 	[PERF_COUNT_ARC_BPOK] = "bpok",
107 	[PERF_COUNT_ARC_EDTLB] = "edtlb",
108 	[PERF_COUNT_ARC_EITLB] = "eitlb",
109 };
110 
111 #define C(_x)			PERF_COUNT_HW_CACHE_##_x
112 #define CACHE_OP_UNSUPPORTED	0xffff
113 
114 static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
115 	[C(L1D)] = {
116 		[C(OP_READ)] = {
117 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
118 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCLM,
119 		},
120 		[C(OP_WRITE)] = {
121 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
122 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_DCSM,
123 		},
124 		[C(OP_PREFETCH)] = {
125 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
126 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
127 		},
128 	},
129 	[C(L1I)] = {
130 		[C(OP_READ)] = {
131 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
132 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_ICM,
133 		},
134 		[C(OP_WRITE)] = {
135 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
136 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
137 		},
138 		[C(OP_PREFETCH)] = {
139 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
140 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
141 		},
142 	},
143 	[C(LL)] = {
144 		[C(OP_READ)] = {
145 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
146 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
147 		},
148 		[C(OP_WRITE)] = {
149 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
150 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
151 		},
152 		[C(OP_PREFETCH)] = {
153 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
154 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
155 		},
156 	},
157 	[C(DTLB)] = {
158 		[C(OP_READ)] = {
159 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
160 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EDTLB,
161 		},
162 		[C(OP_WRITE)] = {
163 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
164 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
165 		},
166 		[C(OP_PREFETCH)] = {
167 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
168 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
169 		},
170 	},
171 	[C(ITLB)] = {
172 		[C(OP_READ)] = {
173 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
174 			[C(RESULT_MISS)]	= PERF_COUNT_ARC_EITLB,
175 		},
176 		[C(OP_WRITE)] = {
177 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
178 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
179 		},
180 		[C(OP_PREFETCH)] = {
181 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
182 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
183 		},
184 	},
185 	[C(BPU)] = {
186 		[C(OP_READ)] = {
187 			[C(RESULT_ACCESS)] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
188 			[C(RESULT_MISS)]	= PERF_COUNT_HW_BRANCH_MISSES,
189 		},
190 		[C(OP_WRITE)] = {
191 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
192 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
193 		},
194 		[C(OP_PREFETCH)] = {
195 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
196 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
197 		},
198 	},
199 	[C(NODE)] = {
200 		[C(OP_READ)] = {
201 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
202 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
203 		},
204 		[C(OP_WRITE)] = {
205 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
206 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
207 		},
208 		[C(OP_PREFETCH)] = {
209 			[C(RESULT_ACCESS)]	= CACHE_OP_UNSUPPORTED,
210 			[C(RESULT_MISS)]	= CACHE_OP_UNSUPPORTED,
211 		},
212 	},
213 };
214 
215 #endif /* __ASM_PERF_EVENT_H */
216