xref: /freebsd/usr.sbin/pmcstudy/pmcstudy.c (revision 8ddb146abcdf061be9f2c0db7e391697dafad85c)
1 /*-
2  * Copyright (c) 2014-2015 Netflix, Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer,
9  *    in this position and unchanged.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <sys/errno.h>
34 #include <signal.h>
35 #include <sys/wait.h>
36 #include <getopt.h>
37 #include "eval_expr.h"
38 __FBSDID("$FreeBSD$");
39 
40 static int max_pmc_counters = 1;
41 static int run_all = 0;
42 
43 #define MAX_COUNTER_SLOTS 1024
44 #define MAX_NLEN 64
45 #define MAX_CPU 64
46 static int verbose = 0;
47 
48 extern char **environ;
49 extern struct expression *master_exp;
50 struct expression *master_exp=NULL;
51 
52 #define PMC_INITIAL_ALLOC 512
53 extern char **valid_pmcs;
54 char **valid_pmcs = NULL;
55 extern int valid_pmc_cnt;
56 int valid_pmc_cnt=0;
57 extern int pmc_allocated_cnt;
58 int pmc_allocated_cnt=0;
59 
60 /*
61  * The following two varients on popen and pclose with
62  * the cavet that they get you the PID so that you
63  * can supply it to pclose so it can send a SIGTERM
64  *  to the process.
65  */
66 static FILE *
67 my_popen(const char *command, const char *dir, pid_t *p_pid)
68 {
69 	FILE *io_out, *io_in;
70 	int pdesin[2], pdesout[2];
71 	char *argv[4];
72 	pid_t pid;
73 	char cmd[4];
74 	char cmd2[1024];
75 	char arg1[4];
76 
77 	if ((strcmp(dir, "r") != 0) &&
78 	    (strcmp(dir, "w") != 0)) {
79 		errno = EINVAL;
80 		return(NULL);
81 	}
82 	if (pipe(pdesin) < 0)
83 		return (NULL);
84 
85 	if (pipe(pdesout) < 0) {
86 		(void)close(pdesin[0]);
87 		(void)close(pdesin[1]);
88 		return (NULL);
89 	}
90 	strcpy(cmd, "sh");
91 	strcpy(arg1, "-c");
92 	strcpy(cmd2, command);
93 	argv[0] = cmd;
94 	argv[1] = arg1;
95 	argv[2] = cmd2;
96 	argv[3] = NULL;
97 
98 	switch (pid = fork()) {
99 	case -1:			/* Error. */
100 		(void)close(pdesin[0]);
101 		(void)close(pdesin[1]);
102 		(void)close(pdesout[0]);
103 		(void)close(pdesout[1]);
104 		return (NULL);
105 		/* NOTREACHED */
106 	case 0:				/* Child. */
107 		/* Close out un-used sides */
108 		(void)close(pdesin[1]);
109 		(void)close(pdesout[0]);
110 		/* Now prepare the stdin of the process */
111 		close(0);
112 		(void)dup(pdesin[0]);
113 		(void)close(pdesin[0]);
114 		/* Now prepare the stdout of the process */
115 		close(1);
116 		(void)dup(pdesout[1]);
117 		/* And lets do stderr just in case */
118 		close(2);
119 		(void)dup(pdesout[1]);
120 		(void)close(pdesout[1]);
121 		/* Now run it */
122 		execve("/bin/sh", argv, environ);
123 		exit(127);
124 		/* NOTREACHED */
125 	}
126 	/* Parent; assume fdopen can't fail. */
127 	/* Store the pid */
128 	*p_pid = pid;
129 	if (strcmp(dir, "r") != 0) {
130 		io_out = fdopen(pdesin[1], "w");
131 		(void)close(pdesin[0]);
132 		(void)close(pdesout[0]);
133 		(void)close(pdesout[1]);
134 		return(io_out);
135  	} else {
136 		/* Prepare the input stream */
137 		io_in = fdopen(pdesout[0], "r");
138 		(void)close(pdesout[1]);
139 		(void)close(pdesin[0]);
140 		(void)close(pdesin[1]);
141 		return (io_in);
142 	}
143 }
144 
145 /*
146  * pclose --
147  *	Pclose returns -1 if stream is not associated with a `popened' command,
148  *	if already `pclosed', or waitpid returns an error.
149  */
150 static void
151 my_pclose(FILE *io, pid_t the_pid)
152 {
153 	int pstat;
154 	pid_t pid;
155 
156 	/*
157 	 * Find the appropriate file pointer and remove it from the list.
158 	 */
159 	(void)fclose(io);
160 	/* Die if you are not dead! */
161 	kill(the_pid, SIGTERM);
162 	do {
163 		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
164 	} while (pid == -1 && errno == EINTR);
165 }
166 
167 struct counters {
168 	struct counters *next_cpu;
169 	char counter_name[MAX_NLEN];		/* Name of counter */
170 	int cpu;				/* CPU we are on */
171 	int pos;				/* Index we are filling to. */
172 	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
173 	uint64_t sum;				/* Summary of entries */
174 };
175 
176 extern struct counters *glob_cpu[MAX_CPU];
177 struct counters *glob_cpu[MAX_CPU];
178 
179 extern struct counters *cnts;
180 struct counters *cnts=NULL;
181 
182 extern int ncnts;
183 int ncnts=0;
184 
185 extern int (*expression)(struct counters *, int);
186 int (*expression)(struct counters *, int);
187 
188 static const char *threshold=NULL;
189 static const char *command;
190 
191 struct cpu_entry {
192 	const char *name;
193 	const char *thresh;
194 	const char *command;
195 	int (*func)(struct counters *, int);
196 	int counters_required;
197 };
198 
199 struct cpu_type {
200 	char cputype[32];
201 	int number;
202 	struct cpu_entry *ents;
203 	void (*explain)(const char *name);
204 };
205 extern struct cpu_type the_cpu;
206 struct cpu_type the_cpu;
207 
208 static void
209 explain_name_sb(const char *name)
210 {
211 	const char *mythresh;
212 	if (strcmp(name, "allocstall1") == 0) {
213 		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
214 		mythresh = "thresh > .05";
215 	} else if (strcmp(name, "allocstall2") == 0) {
216 		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
217 		mythresh = "thresh > .05";
218 	} else if (strcmp(name, "br_miss") == 0) {
219 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
220 		mythresh = "thresh >= .2";
221 	} else if (strcmp(name, "splitload") == 0) {
222 		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
223 		mythresh = "thresh >= .1";
224 	} else if (strcmp(name, "splitstore") == 0) {
225 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
226 		mythresh = "thresh >= .01";
227 	} else if (strcmp(name, "contested") == 0) {
228 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
229 		mythresh = "thresh >= .05";
230 	} else if (strcmp(name, "blockstorefwd") == 0) {
231 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
232 		mythresh = "thresh >= .05";
233 	} else if (strcmp(name, "cache2") == 0) {
234 		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
235 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
236 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
237 		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
238 		mythresh = "thresh >= .2";
239 	} else if (strcmp(name, "cache1") == 0) {
240 		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
241 		mythresh = "thresh >= .2";
242 	} else if (strcmp(name, "dtlbmissload") == 0) {
243 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
244 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
245 		mythresh = "thresh >= .1";
246 	} else if (strcmp(name, "frontendstall") == 0) {
247 		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
248 		mythresh = "thresh >= .15";
249 	} else if (strcmp(name, "clears") == 0) {
250 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
251 		printf("          MACHINE_CLEARS.SMC + \n");
252 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
253 		mythresh = "thresh >= .02";
254 	} else if (strcmp(name, "microassist") == 0) {
255 		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
256 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
257 		mythresh = "thresh >= .05";
258 	} else if (strcmp(name, "aliasing_4k") == 0) {
259 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
260 		mythresh = "thresh >= .1";
261 	} else if (strcmp(name, "fpassist") == 0) {
262 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
263 		mythresh = "look for a excessive value";
264 	} else if (strcmp(name, "otherassistavx") == 0) {
265 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
266 		mythresh = "look for a excessive value";
267 	} else if (strcmp(name, "otherassistsse") == 0) {
268 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
269 		mythresh = "look for a excessive value";
270 	} else if (strcmp(name, "eff1") == 0) {
271 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
272 		mythresh = "thresh < .9";
273 	} else if (strcmp(name, "eff2") == 0) {
274 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
275 		mythresh = "thresh > 1.0";
276 	} else if (strcmp(name, "dtlbmissstore") == 0) {
277 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
278 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
279 		mythresh = "thresh >= .05";
280 	} else {
281 		printf("Unknown name:%s\n", name);
282 		mythresh = "unknown entry";
283         }
284 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
285 }
286 
287 static void
288 explain_name_ib(const char *name)
289 {
290 	const char *mythresh;
291 	if (strcmp(name, "br_miss") == 0) {
292 		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
293 		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
294 		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
295 		mythresh = "thresh >= .2";
296 	} else if (strcmp(name, "eff1") == 0) {
297 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
298 		mythresh = "thresh < .9";
299 	} else if (strcmp(name, "eff2") == 0) {
300 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
301 		mythresh = "thresh > 1.0";
302 	} else if (strcmp(name, "cache1") == 0) {
303 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
304 		mythresh = "thresh >= .2";
305 	} else if (strcmp(name, "cache2") == 0) {
306 		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
307 		mythresh = "thresh >= .2";
308 	} else if (strcmp(name, "itlbmiss") == 0) {
309 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
310 		mythresh = "thresh > .05";
311 	} else if (strcmp(name, "icachemiss") == 0) {
312 		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
313 		mythresh = "thresh > .05";
314 	} else if (strcmp(name, "lcpstall") == 0) {
315 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
316 		mythresh = "thresh > .05";
317 	} else if (strcmp(name, "datashare") == 0) {
318 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
319 		mythresh = "thresh > .05";
320 	} else if (strcmp(name, "blockstorefwd") == 0) {
321 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
322 		mythresh = "thresh >= .05";
323 	} else if (strcmp(name, "splitload") == 0) {
324 		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
325 		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
326 		mythresh = "thresh >= .1";
327 	} else if (strcmp(name, "splitstore") == 0) {
328 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
329 		mythresh = "thresh >= .01";
330 	} else if (strcmp(name, "aliasing_4k") == 0) {
331 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
332 		mythresh = "thresh >= .1";
333 	} else if (strcmp(name, "dtlbmissload") == 0) {
334 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
335 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
336 		mythresh = "thresh >= .1";
337 	} else if (strcmp(name, "dtlbmissstore") == 0) {
338 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
339 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
340 		mythresh = "thresh >= .05";
341 	} else if (strcmp(name, "contested") == 0) {
342 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
343 		mythresh = "thresh >= .05";
344 	} else if (strcmp(name, "clears") == 0) {
345 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
346 		printf("          MACHINE_CLEARS.SMC + \n");
347 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
348 		mythresh = "thresh >= .02";
349 	} else if (strcmp(name, "microassist") == 0) {
350 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
351 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
352 		mythresh = "thresh >= .05";
353 	} else if (strcmp(name, "fpassist") == 0) {
354 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
355 		mythresh = "look for a excessive value";
356 	} else if (strcmp(name, "otherassistavx") == 0) {
357 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
358 		mythresh = "look for a excessive value";
359 	} else if (strcmp(name, "otherassistsse") == 0) {
360 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
361 		mythresh = "look for a excessive value";
362 	} else {
363 		printf("Unknown name:%s\n", name);
364 		mythresh = "unknown entry";
365         }
366 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
367 }
368 
369 
370 static void
371 explain_name_has(const char *name)
372 {
373 	const char *mythresh;
374 	if (strcmp(name, "eff1") == 0) {
375 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
376 		mythresh = "thresh < .75";
377 	} else if (strcmp(name, "eff2") == 0) {
378 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
379 		mythresh = "thresh > 1.0";
380 	} else if (strcmp(name, "itlbmiss") == 0) {
381 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
382 		mythresh = "thresh > .05";
383 	} else if (strcmp(name, "icachemiss") == 0) {
384 		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
385 		mythresh = "thresh > .05";
386 	} else if (strcmp(name, "lcpstall") == 0) {
387 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
388 		mythresh = "thresh > .05";
389 	} else if (strcmp(name, "cache1") == 0) {
390 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
391 		mythresh = "thresh >= .2";
392 	} else if (strcmp(name, "cache2") == 0) {
393 		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
394 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
395 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
396 		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
397 		mythresh = "thresh >= .2";
398 	} else if (strcmp(name, "contested") == 0) {
399 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
400 		mythresh = "thresh >= .05";
401 	} else if (strcmp(name, "datashare") == 0) {
402 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
403 		mythresh = "thresh > .05";
404 	} else if (strcmp(name, "blockstorefwd") == 0) {
405 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
406 		mythresh = "thresh >= .05";
407 	} else if (strcmp(name, "splitload") == 0) {
408 		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
409 		mythresh = "thresh >= .1";
410 	} else if (strcmp(name, "splitstore") == 0) {
411 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
412 		mythresh = "thresh >= .01";
413 	} else if (strcmp(name, "aliasing_4k") == 0) {
414 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
415 		mythresh = "thresh >= .1";
416 	} else if (strcmp(name, "dtlbmissload") == 0) {
417 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
418 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
419 		mythresh = "thresh >= .1";
420 	} else if (strcmp(name, "br_miss") == 0) {
421 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
422 		mythresh = "thresh >= .2";
423 	} else if (strcmp(name, "clears") == 0) {
424 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
425 		printf("          MACHINE_CLEARS.SMC + \n");
426 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
427 		mythresh = "thresh >= .02";
428 	} else if (strcmp(name, "microassist") == 0) {
429 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
430 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
431 		mythresh = "thresh >= .05";
432 	} else if (strcmp(name, "fpassist") == 0) {
433 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
434 		mythresh = "look for a excessive value";
435 	} else if (strcmp(name, "otherassistavx") == 0) {
436 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
437 		mythresh = "look for a excessive value";
438 	} else if (strcmp(name, "otherassistsse") == 0) {
439 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
440 		mythresh = "look for a excessive value";
441 	} else {
442 		printf("Unknown name:%s\n", name);
443 		mythresh = "unknown entry";
444         }
445 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
446 }
447 
448 
449 
450 static struct counters *
451 find_counter(struct counters *base, const char *name)
452 {
453 	struct counters *at;
454 	int len;
455 
456 	at = base;
457 	len = strlen(name);
458 	while(at) {
459 		if (strncmp(at->counter_name, name, len) == 0) {
460 			return(at);
461 		}
462 		at = at->next_cpu;
463 	}
464 	printf("Can't find counter %s\n", name);
465 	printf("We have:\n");
466 	at = base;
467 	while(at) {
468 		printf("- %s\n", at->counter_name);
469 		at = at->next_cpu;
470 	}
471 	exit(-1);
472 }
473 
474 static int
475 allocstall1(struct counters *cpu, int pos)
476 {
477 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
478 	int ret;
479 	struct counters *partial;
480 	struct counters *unhalt;
481 	double un, par, res;
482 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
483 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
484 	if (pos != -1) {
485 		par = partial->vals[pos] * 1.0;
486 		un = unhalt->vals[pos] * 1.0;
487 	} else {
488 		par = partial->sum * 1.0;
489 		un = unhalt->sum * 1.0;
490 	}
491 	res = par/un;
492 	ret = printf("%1.3f", res);
493 	return(ret);
494 }
495 
496 static int
497 allocstall2(struct counters *cpu, int pos)
498 {
499 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
500 	int ret;
501 	struct counters *partial;
502 	struct counters *unhalt;
503 	double un, par, res;
504 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
505 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
506 	if (pos != -1) {
507 		par = partial->vals[pos] * 1.0;
508 		un = unhalt->vals[pos] * 1.0;
509 	} else {
510 		par = partial->sum * 1.0;
511 		un = unhalt->sum * 1.0;
512 	}
513 	res = par/un;
514 	ret = printf("%1.3f", res);
515 	return(ret);
516 }
517 
518 static int
519 br_mispredict(struct counters *cpu, int pos)
520 {
521 	struct counters *brctr;
522 	struct counters *unhalt;
523 	int ret;
524 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
525 	double br, un, con, res;
526 	con = 20.0;
527 
528 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
529         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
530 	if (pos != -1) {
531 		br = brctr->vals[pos] * 1.0;
532 		un = unhalt->vals[pos] * 1.0;
533 	} else {
534 		br = brctr->sum * 1.0;
535 		un = unhalt->sum * 1.0;
536 	}
537 	res = (con * br)/un;
538  	ret = printf("%1.3f", res);
539 	return(ret);
540 }
541 
542 static int
543 br_mispredictib(struct counters *cpu, int pos)
544 {
545 	struct counters *brctr;
546 	struct counters *unhalt;
547 	struct counters *clear, *clear2, *clear3;
548 	struct counters *uops;
549 	struct counters *recv;
550 	struct counters *iss;
551 /*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
552 	int ret;
553         /*
554 	 * (BR_MISP_RETIRED.ALL_BRANCHES /
555 	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
556 	 *          MACHINE_CLEAR.COUNT) *
557 	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
558 	 *
559 	 */
560 	double br, cl, cl2, cl3, uo, re, un, con, res, is;
561 	con = 4.0;
562 
563 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
564         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
565 	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
566 	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
567 	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
568 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
569 	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
570 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
571 	if (pos != -1) {
572 		br = brctr->vals[pos] * 1.0;
573 		cl = clear->vals[pos] * 1.0;
574 		cl2 = clear2->vals[pos] * 1.0;
575 		cl3 = clear3->vals[pos] * 1.0;
576 		uo = uops->vals[pos] * 1.0;
577 		re = recv->vals[pos] * 1.0;
578 		is = iss->vals[pos] * 1.0;
579 		un = unhalt->vals[pos] * 1.0;
580 	} else {
581 		br = brctr->sum * 1.0;
582 		cl = clear->sum * 1.0;
583 		cl2 = clear2->sum * 1.0;
584 		cl3 = clear3->sum * 1.0;
585 		uo = uops->sum * 1.0;
586 		re = recv->sum * 1.0;
587 		is = iss->sum * 1.0;
588 		un = unhalt->sum * 1.0;
589 	}
590 	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
591  	ret = printf("%1.3f", res);
592 	return(ret);
593 }
594 
595 
596 static int
597 br_mispredict_broad(struct counters *cpu, int pos)
598 {
599 	struct counters *brctr;
600 	struct counters *unhalt;
601 	struct counters *clear;
602 	struct counters *uops;
603 	struct counters *uops_ret;
604 	struct counters *recv;
605 	int ret;
606 	double br, cl, uo, uo_r, re, con, un, res;
607 
608 	con = 4.0;
609 
610 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
611         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
612 	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
613 	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
614 	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
615 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
616 
617 	if (pos != -1) {
618 		un = unhalt->vals[pos] * 1.0;
619 		br = brctr->vals[pos] * 1.0;
620 		cl = clear->vals[pos] * 1.0;
621 		uo = uops->vals[pos] * 1.0;
622 		uo_r = uops_ret->vals[pos] * 1.0;
623 		re = recv->vals[pos] * 1.0;
624 	} else {
625 		un = unhalt->sum * 1.0;
626 		br = brctr->sum * 1.0;
627 		cl = clear->sum * 1.0;
628 		uo = uops->sum * 1.0;
629 		uo_r = uops_ret->sum * 1.0;
630 		re = recv->sum * 1.0;
631 	}
632 	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
633  	ret = printf("%1.3f", res);
634 	return(ret);
635 }
636 
637 static int
638 splitloadib(struct counters *cpu, int pos)
639 {
640 	int ret;
641 	struct counters *mem;
642 	struct counters *l1d, *ldblock;
643 	struct counters *unhalt;
644 	double un, memd, res, l1, ldb;
645         /*
646 	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
647 	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
648 	 */
649 
650 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
651 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
652 	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
653 	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
654 	if (pos != -1) {
655 		memd = mem->vals[pos] * 1.0;
656 		l1 = l1d->vals[pos] * 1.0;
657 		ldb = ldblock->vals[pos] * 1.0;
658 		un = unhalt->vals[pos] * 1.0;
659 	} else {
660 		memd = mem->sum * 1.0;
661 		l1 = l1d->sum * 1.0;
662 		ldb = ldblock->sum * 1.0;
663 		un = unhalt->sum * 1.0;
664 	}
665 	res = ((l1 / memd) * ldb)/un;
666 	ret = printf("%1.3f", res);
667 	return(ret);
668 }
669 
670 
671 static int
672 splitload(struct counters *cpu, int pos)
673 {
674 	int ret;
675 	struct counters *mem;
676 	struct counters *unhalt;
677 	double con, un, memd, res;
678 /*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
679 
680 	con = 5.0;
681 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
682 	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
683 	if (pos != -1) {
684 		memd = mem->vals[pos] * 1.0;
685 		un = unhalt->vals[pos] * 1.0;
686 	} else {
687 		memd = mem->sum * 1.0;
688 		un = unhalt->sum * 1.0;
689 	}
690 	res = (memd * con)/un;
691 	ret = printf("%1.3f", res);
692 	return(ret);
693 }
694 
695 
696 static int
697 splitload_sb(struct counters *cpu, int pos)
698 {
699 	int ret;
700 	struct counters *mem;
701 	struct counters *unhalt;
702 	double con, un, memd, res;
703 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
704 
705 	con = 5.0;
706 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707 	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
708 	if (pos != -1) {
709 		memd = mem->vals[pos] * 1.0;
710 		un = unhalt->vals[pos] * 1.0;
711 	} else {
712 		memd = mem->sum * 1.0;
713 		un = unhalt->sum * 1.0;
714 	}
715 	res = (memd * con)/un;
716 	ret = printf("%1.3f", res);
717 	return(ret);
718 }
719 
720 
721 static int
722 splitstore_sb(struct counters *cpu, int pos)
723 {
724         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
725 	int ret;
726 	struct counters *mem_split;
727 	struct counters *mem_stores;
728 	double memsplit, memstore, res;
729 	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
730 	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
731 	if (pos != -1) {
732 		memsplit = mem_split->vals[pos] * 1.0;
733 		memstore = mem_stores->vals[pos] * 1.0;
734 	} else {
735 		memsplit = mem_split->sum * 1.0;
736 		memstore = mem_stores->sum * 1.0;
737 	}
738 	res = memsplit/memstore;
739 	ret = printf("%1.3f", res);
740 	return(ret);
741 }
742 
743 
744 
745 static int
746 splitstore(struct counters *cpu, int pos)
747 {
748         /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
749 	int ret;
750 	struct counters *mem_split;
751 	struct counters *mem_stores;
752 	double memsplit, memstore, res;
753 	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
754 	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
755 	if (pos != -1) {
756 		memsplit = mem_split->vals[pos] * 1.0;
757 		memstore = mem_stores->vals[pos] * 1.0;
758 	} else {
759 		memsplit = mem_split->sum * 1.0;
760 		memstore = mem_stores->sum * 1.0;
761 	}
762 	res = memsplit/memstore;
763 	ret = printf("%1.3f", res);
764 	return(ret);
765 }
766 
767 
768 static int
769 contested(struct counters *cpu, int pos)
770 {
771         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
772 	int ret;
773 	struct counters *mem;
774 	struct counters *unhalt;
775 	double con, un, memd, res;
776 
777 	con = 60.0;
778 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
779 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
780 	if (pos != -1) {
781 		memd = mem->vals[pos] * 1.0;
782 		un = unhalt->vals[pos] * 1.0;
783 	} else {
784 		memd = mem->sum * 1.0;
785 		un = unhalt->sum * 1.0;
786 	}
787 	res = (memd * con)/un;
788 	ret = printf("%1.3f", res);
789 	return(ret);
790 }
791 
792 static int
793 contested_has(struct counters *cpu, int pos)
794 {
795         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
796 	int ret;
797 	struct counters *mem;
798 	struct counters *unhalt;
799 	double con, un, memd, res;
800 
801 	con = 84.0;
802 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
803 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
804 	if (pos != -1) {
805 		memd = mem->vals[pos] * 1.0;
806 		un = unhalt->vals[pos] * 1.0;
807 	} else {
808 		memd = mem->sum * 1.0;
809 		un = unhalt->sum * 1.0;
810 	}
811 	res = (memd * con)/un;
812 	ret = printf("%1.3f", res);
813 	return(ret);
814 }
815 
816 static int
817 contestedbroad(struct counters *cpu, int pos)
818 {
819         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
820 	int ret;
821 	struct counters *mem;
822 	struct counters *mem2;
823 	struct counters *unhalt;
824 	double con, un, memd, memtoo, res;
825 
826 	con = 84.0;
827 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
828 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
829 	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
830 
831 	if (pos != -1) {
832 		memd = mem->vals[pos] * 1.0;
833 		memtoo = mem2->vals[pos] * 1.0;
834 		un = unhalt->vals[pos] * 1.0;
835 	} else {
836 		memd = mem->sum * 1.0;
837 		memtoo = mem2->sum * 1.0;
838 		un = unhalt->sum * 1.0;
839 	}
840 	res = ((memd * con) + memtoo)/un;
841 	ret = printf("%1.3f", res);
842 	return(ret);
843 }
844 
845 
846 static int
847 blockstoreforward(struct counters *cpu, int pos)
848 {
849         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
850 	int ret;
851 	struct counters *ldb;
852 	struct counters *unhalt;
853 	double con, un, ld, res;
854 
855 	con = 13.0;
856 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
857 	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
858 	if (pos != -1) {
859 		ld = ldb->vals[pos] * 1.0;
860 		un = unhalt->vals[pos] * 1.0;
861 	} else {
862 		ld = ldb->sum * 1.0;
863 		un = unhalt->sum * 1.0;
864 	}
865 	res = (ld * con)/un;
866 	ret = printf("%1.3f", res);
867 	return(ret);
868 }
869 
870 static int
871 cache2(struct counters *cpu, int pos)
872 {
873 	/* ** Suspect ***
874 	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
875 	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
876 	 */
877 	int ret;
878 	struct counters *mem1, *mem2, *mem3;
879 	struct counters *unhalt;
880 	double con1, con2, con3, un, me_1, me_2, me_3, res;
881 
882 	con1 = 26.0;
883 	con2 = 43.0;
884 	con3 = 60.0;
885 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
886 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
887 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
888 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
889 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
890 	if (pos != -1) {
891 		me_1 = mem1->vals[pos] * 1.0;
892 		me_2 = mem2->vals[pos] * 1.0;
893 		me_3 = mem3->vals[pos] * 1.0;
894 		un = unhalt->vals[pos] * 1.0;
895 	} else {
896 		me_1 = mem1->sum * 1.0;
897 		me_2 = mem2->sum * 1.0;
898 		me_3 = mem3->sum * 1.0;
899 		un = unhalt->sum * 1.0;
900 	}
901 	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
902 	ret = printf("%1.3f", res);
903 	return(ret);
904 }
905 
906 static int
907 datasharing(struct counters *cpu, int pos)
908 {
909 	/*
910 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
911 	 */
912 	int ret;
913 	struct counters *mem;
914 	struct counters *unhalt;
915 	double con, res, me, un;
916 
917 	con = 43.0;
918 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
919 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
920 	if (pos != -1) {
921 		me = mem->vals[pos] * 1.0;
922 		un = unhalt->vals[pos] * 1.0;
923 	} else {
924 		me = mem->sum * 1.0;
925 		un = unhalt->sum * 1.0;
926 	}
927 	res = (me * con)/un;
928 	ret = printf("%1.3f", res);
929 	return(ret);
930 
931 }
932 
933 
934 static int
935 datasharing_has(struct counters *cpu, int pos)
936 {
937 	/*
938 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
939 	 */
940 	int ret;
941 	struct counters *mem;
942 	struct counters *unhalt;
943 	double con, res, me, un;
944 
945 	con = 72.0;
946 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
947 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
948 	if (pos != -1) {
949 		me = mem->vals[pos] * 1.0;
950 		un = unhalt->vals[pos] * 1.0;
951 	} else {
952 		me = mem->sum * 1.0;
953 		un = unhalt->sum * 1.0;
954 	}
955 	res = (me * con)/un;
956 	ret = printf("%1.3f", res);
957 	return(ret);
958 
959 }
960 
961 
962 static int
963 cache2ib(struct counters *cpu, int pos)
964 {
965         /*
966 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
967 	 */
968 	int ret;
969 	struct counters *mem;
970 	struct counters *unhalt;
971 	double con, un, me, res;
972 
973 	con = 29.0;
974 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
975 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
976 	if (pos != -1) {
977 		me = mem->vals[pos] * 1.0;
978 		un = unhalt->vals[pos] * 1.0;
979 	} else {
980 		me = mem->sum * 1.0;
981 		un = unhalt->sum * 1.0;
982 	}
983 	res = (con * me)/un;
984 	ret = printf("%1.3f", res);
985 	return(ret);
986 }
987 
988 static int
989 cache2has(struct counters *cpu, int pos)
990 {
991 	/*
992 	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
993 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
994 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
995 	 *           / CPU_CLK_UNHALTED.THREAD_P
996 	 */
997 	int ret;
998 	struct counters *mem1, *mem2, *mem3;
999 	struct counters *unhalt;
1000 	double con1, con2, con3, un, me1, me2, me3, res;
1001 
1002 	con1 = 36.0;
1003 	con2 = 72.0;
1004 	con3 = 84.0;
1005 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1006 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1007 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1008 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1009 	if (pos != -1) {
1010 		me1 = mem1->vals[pos] * 1.0;
1011 		me2 = mem2->vals[pos] * 1.0;
1012 		me3 = mem3->vals[pos] * 1.0;
1013 		un = unhalt->vals[pos] * 1.0;
1014 	} else {
1015 		me1 = mem1->sum * 1.0;
1016 		me2 = mem2->sum * 1.0;
1017 		me3 = mem3->sum * 1.0;
1018 		un = unhalt->sum * 1.0;
1019 	}
1020 	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1021 	ret = printf("%1.3f", res);
1022 	return(ret);
1023 }
1024 
1025 
1026 static int
1027 cache2broad(struct counters *cpu, int pos)
1028 {
1029         /*
1030 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1031 	 */
1032 	int ret;
1033 	struct counters *mem;
1034 	struct counters *unhalt;
1035 	double con, un, me, res;
1036 
1037 	con = 36.0;
1038 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1039 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1040 	if (pos != -1) {
1041 		me = mem->vals[pos] * 1.0;
1042 		un = unhalt->vals[pos] * 1.0;
1043 	} else {
1044 		me = mem->sum * 1.0;
1045 		un = unhalt->sum * 1.0;
1046 	}
1047 	res = (con * me)/un;
1048 	ret = printf("%1.3f", res);
1049 	return(ret);
1050 }
1051 
1052 
1053 static int
1054 cache1(struct counters *cpu, int pos)
1055 {
1056 	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1057 	int ret;
1058 	struct counters *mem;
1059 	struct counters *unhalt;
1060 	double con, un, me, res;
1061 
1062 	con = 180.0;
1063 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1064 	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1065 	if (pos != -1) {
1066 		me = mem->vals[pos] * 1.0;
1067 		un = unhalt->vals[pos] * 1.0;
1068 	} else {
1069 		me = mem->sum * 1.0;
1070 		un = unhalt->sum * 1.0;
1071 	}
1072 	res = (me * con)/un;
1073 	ret = printf("%1.3f", res);
1074 	return(ret);
1075 }
1076 
1077 static int
1078 cache1ib(struct counters *cpu, int pos)
1079 {
1080 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1081 	int ret;
1082 	struct counters *mem;
1083 	struct counters *unhalt;
1084 	double con, un, me, res;
1085 
1086 	con = 180.0;
1087 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1088 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1089 	if (pos != -1) {
1090 		me = mem->vals[pos] * 1.0;
1091 		un = unhalt->vals[pos] * 1.0;
1092 	} else {
1093 		me = mem->sum * 1.0;
1094 		un = unhalt->sum * 1.0;
1095 	}
1096 	res = (me * con)/un;
1097 	ret = printf("%1.3f", res);
1098 	return(ret);
1099 }
1100 
1101 
1102 static int
1103 cache1broad(struct counters *cpu, int pos)
1104 {
1105 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1106 	int ret;
1107 	struct counters *mem;
1108 	struct counters *unhalt;
1109 	double con, un, me, res;
1110 
1111 	con = 180.0;
1112 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1113 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1114 	if (pos != -1) {
1115 		me = mem->vals[pos] * 1.0;
1116 		un = unhalt->vals[pos] * 1.0;
1117 	} else {
1118 		me = mem->sum * 1.0;
1119 		un = unhalt->sum * 1.0;
1120 	}
1121 	res = (me * con)/un;
1122 	ret = printf("%1.3f", res);
1123 	return(ret);
1124 }
1125 
1126 
1127 static int
1128 dtlb_missload(struct counters *cpu, int pos)
1129 {
1130 	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1131 	int ret;
1132 	struct counters *dtlb_m, *dtlb_d;
1133 	struct counters *unhalt;
1134 	double con, un, d1, d2, res;
1135 
1136 	con = 7.0;
1137 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1138 	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1139 	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1140 	if (pos != -1) {
1141 		d1 = dtlb_m->vals[pos] * 1.0;
1142 		d2 = dtlb_d->vals[pos] * 1.0;
1143 		un = unhalt->vals[pos] * 1.0;
1144 	} else {
1145 		d1 = dtlb_m->sum * 1.0;
1146 		d2 = dtlb_d->sum * 1.0;
1147 		un = unhalt->sum * 1.0;
1148 	}
1149 	res = ((d1 * con) + d2)/un;
1150 	ret = printf("%1.3f", res);
1151 	return(ret);
1152 }
1153 
1154 static int
1155 dtlb_missstore(struct counters *cpu, int pos)
1156 {
1157         /*
1158 	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1159 	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1160 	 */
1161         int ret;
1162         struct counters *dtsb_m, *dtsb_d;
1163         struct counters *unhalt;
1164         double con, un, d1, d2, res;
1165 
1166         con = 7.0;
1167         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1168         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1169         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1170         if (pos != -1) {
1171                 d1 = dtsb_m->vals[pos] * 1.0;
1172                 d2 = dtsb_d->vals[pos] * 1.0;
1173                 un = unhalt->vals[pos] * 1.0;
1174         } else {
1175                 d1 = dtsb_m->sum * 1.0;
1176                 d2 = dtsb_d->sum * 1.0;
1177                 un = unhalt->sum * 1.0;
1178         }
1179         res = ((d1 * con) + d2)/un;
1180         ret = printf("%1.3f", res);
1181         return(ret);
1182 }
1183 
1184 static int
1185 itlb_miss(struct counters *cpu, int pos)
1186 {
1187 	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1188 	int ret;
1189 	struct counters *itlb;
1190 	struct counters *unhalt;
1191 	double un, d1, res;
1192 
1193 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1194 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1195 	if (pos != -1) {
1196 		d1 = itlb->vals[pos] * 1.0;
1197 		un = unhalt->vals[pos] * 1.0;
1198 	} else {
1199 		d1 = itlb->sum * 1.0;
1200 		un = unhalt->sum * 1.0;
1201 	}
1202 	res = d1/un;
1203 	ret = printf("%1.3f", res);
1204 	return(ret);
1205 }
1206 
1207 
1208 static int
1209 itlb_miss_broad(struct counters *cpu, int pos)
1210 {
1211 	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1212 	int ret;
1213 	struct counters *itlb;
1214 	struct counters *unhalt;
1215 	struct counters *four_k;
1216 	double un, d1, res, k;
1217 
1218 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1219 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1220 	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1221 	if (pos != -1) {
1222 		d1 = itlb->vals[pos] * 1.0;
1223 		un = unhalt->vals[pos] * 1.0;
1224 		k = four_k->vals[pos] * 1.0;
1225 	} else {
1226 		d1 = itlb->sum * 1.0;
1227 		un = unhalt->sum * 1.0;
1228 		k = four_k->sum * 1.0;
1229 	}
1230 	res = (7.0 * k + d1)/un;
1231 	ret = printf("%1.3f", res);
1232 	return(ret);
1233 }
1234 
1235 
1236 static int
1237 icache_miss(struct counters *cpu, int pos)
1238 {
1239 	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1240 
1241 	int ret;
1242 	struct counters *itlb, *icache;
1243 	struct counters *unhalt;
1244 	double un, d1, ic, res;
1245 
1246 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1247 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1248 	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1249 	if (pos != -1) {
1250 		d1 = itlb->vals[pos] * 1.0;
1251 		ic = icache->vals[pos] * 1.0;
1252 		un = unhalt->vals[pos] * 1.0;
1253 	} else {
1254 		d1 = itlb->sum * 1.0;
1255 		ic = icache->sum * 1.0;
1256 		un = unhalt->sum * 1.0;
1257 	}
1258 	res = (ic-d1)/un;
1259 	ret = printf("%1.3f", res);
1260 	return(ret);
1261 
1262 }
1263 
1264 static int
1265 icache_miss_has(struct counters *cpu, int pos)
1266 {
1267 	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1268 
1269 	int ret;
1270 	struct counters *icache;
1271 	struct counters *unhalt;
1272 	double un, con, ic, res;
1273 
1274 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1275 	icache = find_counter(cpu, "ICACHE.MISSES");
1276 	con = 36.0;
1277 	if (pos != -1) {
1278 		ic = icache->vals[pos] * 1.0;
1279 		un = unhalt->vals[pos] * 1.0;
1280 	} else {
1281 		ic = icache->sum * 1.0;
1282 		un = unhalt->sum * 1.0;
1283 	}
1284 	res = (con * ic)/un;
1285 	ret = printf("%1.3f", res);
1286 	return(ret);
1287 
1288 }
1289 
1290 static int
1291 lcp_stall(struct counters *cpu, int pos)
1292 {
1293          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1294 	int ret;
1295 	struct counters *ild;
1296 	struct counters *unhalt;
1297 	double un, d1, res;
1298 
1299 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1300 	ild = find_counter(cpu, "ILD_STALL.LCP");
1301 	if (pos != -1) {
1302 		d1 = ild->vals[pos] * 1.0;
1303 		un = unhalt->vals[pos] * 1.0;
1304 	} else {
1305 		d1 = ild->sum * 1.0;
1306 		un = unhalt->sum * 1.0;
1307 	}
1308 	res = d1/un;
1309 	ret = printf("%1.3f", res);
1310 	return(ret);
1311 
1312 }
1313 
1314 
1315 static int
1316 frontendstall(struct counters *cpu, int pos)
1317 {
1318       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1319 	int ret;
1320 	struct counters *idq;
1321 	struct counters *unhalt;
1322 	double con, un, id, res;
1323 
1324 	con = 4.0;
1325 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1326 	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1327 	if (pos != -1) {
1328 		id = idq->vals[pos] * 1.0;
1329 		un = unhalt->vals[pos] * 1.0;
1330 	} else {
1331 		id = idq->sum * 1.0;
1332 		un = unhalt->sum * 1.0;
1333 	}
1334 	res = id/(un * con);
1335 	ret = printf("%1.3f", res);
1336 	return(ret);
1337 }
1338 
1339 static int
1340 clears(struct counters *cpu, int pos)
1341 {
1342 	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1343 	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1344 
1345 	int ret;
1346 	struct counters *clr1, *clr2, *clr3;
1347 	struct counters *unhalt;
1348 	double con, un, cl1, cl2, cl3, res;
1349 
1350 	con = 100.0;
1351 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1352 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1353 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1354 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1355 
1356 	if (pos != -1) {
1357 		cl1 = clr1->vals[pos] * 1.0;
1358 		cl2 = clr2->vals[pos] * 1.0;
1359 		cl3 = clr3->vals[pos] * 1.0;
1360 		un = unhalt->vals[pos] * 1.0;
1361 	} else {
1362 		cl1 = clr1->sum * 1.0;
1363 		cl2 = clr2->sum * 1.0;
1364 		cl3 = clr3->sum * 1.0;
1365 		un = unhalt->sum * 1.0;
1366 	}
1367 	res = ((cl1 + cl2 + cl3) * con)/un;
1368 	ret = printf("%1.3f", res);
1369 	return(ret);
1370 }
1371 
1372 
1373 
1374 static int
1375 clears_broad(struct counters *cpu, int pos)
1376 {
1377 	int ret;
1378 	struct counters *clr1, *clr2, *clr3, *cyc;
1379 	struct counters *unhalt;
1380 	double con, un, cl1, cl2, cl3, cy, res;
1381 
1382 	con = 100.0;
1383 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1384 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1385 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1386 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1387 	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1388 	if (pos != -1) {
1389 		cl1 = clr1->vals[pos] * 1.0;
1390 		cl2 = clr2->vals[pos] * 1.0;
1391 		cl3 = clr3->vals[pos] * 1.0;
1392 		cy = cyc->vals[pos] * 1.0;
1393 		un = unhalt->vals[pos] * 1.0;
1394 	} else {
1395 		cl1 = clr1->sum * 1.0;
1396 		cl2 = clr2->sum * 1.0;
1397 		cl3 = clr3->sum * 1.0;
1398 		cy = cyc->sum * 1.0;
1399 		un = unhalt->sum * 1.0;
1400 	}
1401 	/* Formula not listed but extrapulated to add the cy ?? */
1402 	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1403 	ret = printf("%1.3f", res);
1404 	return(ret);
1405 }
1406 
1407 
1408 
1409 
1410 
1411 static int
1412 microassist(struct counters *cpu, int pos)
1413 {
1414 	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1415 	int ret;
1416 	struct counters *idq;
1417 	struct counters *unhalt;
1418 	double un, id, res, con;
1419 
1420 	con = 4.0;
1421 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1422 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1423 	if (pos != -1) {
1424 		id = idq->vals[pos] * 1.0;
1425 		un = unhalt->vals[pos] * 1.0;
1426 	} else {
1427 		id = idq->sum * 1.0;
1428 		un = unhalt->sum * 1.0;
1429 	}
1430 	res = id/(un * con);
1431 	ret = printf("%1.3f", res);
1432 	return(ret);
1433 }
1434 
1435 
1436 static int
1437 microassist_broad(struct counters *cpu, int pos)
1438 {
1439 	int ret;
1440 	struct counters *idq;
1441 	struct counters *unhalt;
1442 	struct counters *uopiss;
1443 	struct counters *uopret;
1444 	double un, id, res, con, uoi, uor;
1445 
1446 	con = 4.0;
1447 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1448 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1449 	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1450 	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1451 	if (pos != -1) {
1452 		id = idq->vals[pos] * 1.0;
1453 		un = unhalt->vals[pos] * 1.0;
1454 		uoi = uopiss->vals[pos] * 1.0;
1455 		uor = uopret->vals[pos] * 1.0;
1456 	} else {
1457 		id = idq->sum * 1.0;
1458 		un = unhalt->sum * 1.0;
1459 		uoi = uopiss->sum * 1.0;
1460 		uor = uopret->sum * 1.0;
1461 	}
1462 	res = (uor/uoi) * (id/(un * con));
1463 	ret = printf("%1.3f", res);
1464 	return(ret);
1465 }
1466 
1467 
1468 static int
1469 aliasing(struct counters *cpu, int pos)
1470 {
1471 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1472 	int ret;
1473 	struct counters *ld;
1474 	struct counters *unhalt;
1475 	double un, lds, con, res;
1476 
1477 	con = 5.0;
1478 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1479 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1480 	if (pos != -1) {
1481 		lds = ld->vals[pos] * 1.0;
1482 		un = unhalt->vals[pos] * 1.0;
1483 	} else {
1484 		lds = ld->sum * 1.0;
1485 		un = unhalt->sum * 1.0;
1486 	}
1487 	res = (lds * con)/un;
1488 	ret = printf("%1.3f", res);
1489 	return(ret);
1490 }
1491 
1492 static int
1493 aliasing_broad(struct counters *cpu, int pos)
1494 {
1495 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1496 	int ret;
1497 	struct counters *ld;
1498 	struct counters *unhalt;
1499 	double un, lds, con, res;
1500 
1501 	con = 7.0;
1502 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1503 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1504 	if (pos != -1) {
1505 		lds = ld->vals[pos] * 1.0;
1506 		un = unhalt->vals[pos] * 1.0;
1507 	} else {
1508 		lds = ld->sum * 1.0;
1509 		un = unhalt->sum * 1.0;
1510 	}
1511 	res = (lds * con)/un;
1512 	ret = printf("%1.3f", res);
1513 	return(ret);
1514 }
1515 
1516 
1517 static int
1518 fpassists(struct counters *cpu, int pos)
1519 {
1520 	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1521 	int ret;
1522 	struct counters *fp;
1523 	struct counters *inst;
1524 	double un, fpd, res;
1525 
1526 	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1527 	fp = find_counter(cpu, "FP_ASSIST.ANY");
1528 	if (pos != -1) {
1529 		fpd = fp->vals[pos] * 1.0;
1530 		un = inst->vals[pos] * 1.0;
1531 	} else {
1532 		fpd = fp->sum * 1.0;
1533 		un = inst->sum * 1.0;
1534 	}
1535 	res = fpd/un;
1536 	ret = printf("%1.3f", res);
1537 	return(ret);
1538 }
1539 
1540 static int
1541 otherassistavx(struct counters *cpu, int pos)
1542 {
1543 	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1544 	int ret;
1545 	struct counters *oth;
1546 	struct counters *unhalt;
1547 	double un, ot, con, res;
1548 
1549 	con = 75.0;
1550 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1551 	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1552 	if (pos != -1) {
1553 		ot = oth->vals[pos] * 1.0;
1554 		un = unhalt->vals[pos] * 1.0;
1555 	} else {
1556 		ot = oth->sum * 1.0;
1557 		un = unhalt->sum * 1.0;
1558 	}
1559 	res = (ot * con)/un;
1560 	ret = printf("%1.3f", res);
1561 	return(ret);
1562 }
1563 
1564 static int
1565 otherassistsse(struct counters *cpu, int pos)
1566 {
1567 
1568 	int ret;
1569 	struct counters *oth;
1570 	struct counters *unhalt;
1571 	double un, ot, con, res;
1572 
1573 	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1574 	con = 75.0;
1575 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1576 	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1577 	if (pos != -1) {
1578 		ot = oth->vals[pos] * 1.0;
1579 		un = unhalt->vals[pos] * 1.0;
1580 	} else {
1581 		ot = oth->sum * 1.0;
1582 		un = unhalt->sum * 1.0;
1583 	}
1584 	res = (ot * con)/un;
1585 	ret = printf("%1.3f", res);
1586 	return(ret);
1587 }
1588 
1589 static int
1590 efficiency1(struct counters *cpu, int pos)
1591 {
1592 
1593 	int ret;
1594 	struct counters *uops;
1595 	struct counters *unhalt;
1596 	double un, ot, con, res;
1597 
1598         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1599 	con = 4.0;
1600 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1601 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1602 	if (pos != -1) {
1603 		ot = uops->vals[pos] * 1.0;
1604 		un = unhalt->vals[pos] * 1.0;
1605 	} else {
1606 		ot = uops->sum * 1.0;
1607 		un = unhalt->sum * 1.0;
1608 	}
1609 	res = ot/(con * un);
1610 	ret = printf("%1.3f", res);
1611 	return(ret);
1612 }
1613 
1614 static int
1615 efficiency2(struct counters *cpu, int pos)
1616 {
1617 
1618 	int ret;
1619 	struct counters *uops;
1620 	struct counters *unhalt;
1621 	double un, ot, res;
1622 
1623         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1624 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1625 	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1626 	if (pos != -1) {
1627 		ot = uops->vals[pos] * 1.0;
1628 		un = unhalt->vals[pos] * 1.0;
1629 	} else {
1630 		ot = uops->sum * 1.0;
1631 		un = unhalt->sum * 1.0;
1632 	}
1633 	res = un/ot;
1634 	ret = printf("%1.3f", res);
1635 	return(ret);
1636 }
1637 
1638 #define SANDY_BRIDGE_COUNT 20
1639 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1640 /*01*/	{ "allocstall1", "thresh > .05",
1641 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1642 	  allocstall1, 2 },
1643 /* -- not defined for SB right (partial-rat_stalls) 02*/
1644         { "allocstall2", "thresh > .05",
1645 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1646 	  allocstall2, 2 },
1647 /*03*/	{ "br_miss", "thresh >= .2",
1648 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1649 	  br_mispredict, 2 },
1650 /*04*/	{ "splitload", "thresh >= .1",
1651 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1652 	  splitload_sb, 2 },
1653 /* 05*/	{ "splitstore", "thresh >= .01",
1654 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1655 	  splitstore_sb, 2 },
1656 /*06*/	{ "contested", "thresh >= .05",
1657 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1658 	  contested, 2 },
1659 /*07*/	{ "blockstorefwd", "thresh >= .05",
1660 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1661 	  blockstoreforward, 2 },
1662 /*08*/	{ "cache2", "thresh >= .2",
1663 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1664 	  cache2, 4 },
1665 /*09*/	{ "cache1", "thresh >= .2",
1666 	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1667 	  cache1, 2 },
1668 /*10*/	{ "dtlbmissload", "thresh >= .1",
1669 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1670 	  dtlb_missload, 3 },
1671 /*11*/	{ "dtlbmissstore", "thresh >= .05",
1672 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1673 	  dtlb_missstore, 3 },
1674 /*12*/	{ "frontendstall", "thresh >= .15",
1675 	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1676 	  frontendstall, 2 },
1677 /*13*/	{ "clears", "thresh >= .02",
1678 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1679 	  clears, 4 },
1680 /*14*/	{ "microassist", "thresh >= .05",
1681 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1682 	  microassist, 2 },
1683 /*15*/	{ "aliasing_4k", "thresh >= .1",
1684 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1685 	  aliasing, 2 },
1686 /*16*/	{ "fpassist", "look for a excessive value",
1687 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1688 	  fpassists, 2 },
1689 /*17*/	{ "otherassistavx", "look for a excessive value",
1690 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1691 	  otherassistavx, 2},
1692 /*18*/	{ "otherassistsse", "look for a excessive value",
1693 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1694 	  otherassistsse, 2 },
1695 /*19*/	{ "eff1", "thresh < .9",
1696 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1697 	  efficiency1, 2 },
1698 /*20*/	{ "eff2", "thresh > 1.0",
1699 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1700 	  efficiency2, 2 },
1701 };
1702 
1703 
1704 #define IVY_BRIDGE_COUNT 21
1705 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1706 /*1*/	{ "eff1", "thresh < .75",
1707 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1708 	  efficiency1, 2 },
1709 /*2*/	{ "eff2", "thresh > 1.0",
1710 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1711 	  efficiency2, 2 },
1712 /*3*/	{ "itlbmiss", "thresh > .05",
1713 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1714 	  itlb_miss, 2 },
1715 /*4*/	{ "icachemiss", "thresh > .05",
1716 	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1717 	  icache_miss, 3 },
1718 /*5*/	{ "lcpstall", "thresh > .05",
1719 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1720 	  lcp_stall, 2 },
1721 /*6*/	{ "cache1", "thresh >= .2",
1722 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1723 	  cache1ib, 2 },
1724 /*7*/	{ "cache2", "thresh >= .2",
1725 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1726 	  cache2ib, 2 },
1727 /*8*/	{ "contested", "thresh >= .05",
1728 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1729 	  contested, 2 },
1730 /*9*/	{ "datashare", "thresh >= .05",
1731 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1732 	  datasharing, 2 },
1733 /*10*/	{ "blockstorefwd", "thresh >= .05",
1734 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1735 	  blockstoreforward, 2 },
1736 /*11*/	{ "splitload", "thresh >= .1",
1737 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1738 	  splitloadib, 4 },
1739 /*12*/	{ "splitstore", "thresh >= .01",
1740 	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1741 	  splitstore, 2 },
1742 /*13*/	{ "aliasing_4k", "thresh >= .1",
1743 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1744 	  aliasing, 2 },
1745 /*14*/	{ "dtlbmissload", "thresh >= .1",
1746 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1747 	  dtlb_missload , 3},
1748 /*15*/	{ "dtlbmissstore", "thresh >= .05",
1749 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1750 	  dtlb_missstore, 3 },
1751 /*16*/	{ "br_miss", "thresh >= .2",
1752 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1753 	  br_mispredictib, 8 },
1754 /*17*/	{ "clears", "thresh >= .02",
1755 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1756 	  clears, 4 },
1757 /*18*/	{ "microassist", "thresh >= .05",
1758 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1759 	  microassist, 2 },
1760 /*19*/	{ "fpassist", "look for a excessive value",
1761 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1762 	  fpassists, 2 },
1763 /*20*/	{ "otherassistavx", "look for a excessive value",
1764 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1765 	  otherassistavx , 2},
1766 /*21*/	{ "otherassistsse", "look for a excessive value",
1767 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1768 	  otherassistsse, 2 },
1769 };
1770 
1771 #define HASWELL_COUNT 20
1772 static struct cpu_entry haswell[HASWELL_COUNT] = {
1773 /*1*/	{ "eff1", "thresh < .75",
1774 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1775 	  efficiency1, 2 },
1776 /*2*/	{ "eff2", "thresh > 1.0",
1777 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1778 	  efficiency2, 2 },
1779 /*3*/	{ "itlbmiss", "thresh > .05",
1780 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1781 	  itlb_miss, 2 },
1782 /*4*/	{ "icachemiss", "thresh > .05",
1783 	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1784 	  icache_miss_has, 2 },
1785 /*5*/	{ "lcpstall", "thresh > .05",
1786 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1787 	  lcp_stall, 2 },
1788 /*6*/	{ "cache1", "thresh >= .2",
1789 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1790 	  cache1ib, 2 },
1791 /*7*/	{ "cache2", "thresh >= .2",
1792 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1793 	  cache2has, 4 },
1794 /*8*/	{ "contested", "thresh >= .05",
1795 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1796 	  contested_has, 2 },
1797 /*9*/	{ "datashare", "thresh >= .05",
1798 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1799 	  datasharing_has, 2 },
1800 /*10*/	{ "blockstorefwd", "thresh >= .05",
1801 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1802 	  blockstoreforward, 2 },
1803 /*11*/	{ "splitload", "thresh >= .1",
1804 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1805 	  splitload , 2},
1806 /*12*/	{ "splitstore", "thresh >= .01",
1807 	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1808 	  splitstore, 2 },
1809 /*13*/	{ "aliasing_4k", "thresh >= .1",
1810 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1811 	  aliasing, 2 },
1812 /*14*/	{ "dtlbmissload", "thresh >= .1",
1813 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1814 	  dtlb_missload, 3 },
1815 /*15*/	{ "br_miss", "thresh >= .2",
1816 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1817 	  br_mispredict, 2 },
1818 /*16*/	{ "clears", "thresh >= .02",
1819 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1820 	  clears, 4 },
1821 /*17*/	{ "microassist", "thresh >= .05",
1822 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1823 	  microassist, 2 },
1824 /*18*/	{ "fpassist", "look for a excessive value",
1825 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1826 	  fpassists, 2 },
1827 /*19*/	{ "otherassistavx", "look for a excessive value",
1828 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1829 	  otherassistavx, 2 },
1830 /*20*/	{ "otherassistsse", "look for a excessive value",
1831 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1832 	  otherassistsse, 2 },
1833 };
1834 
1835 
1836 static void
1837 explain_name_broad(const char *name)
1838 {
1839 	const char *mythresh;
1840 	if (strcmp(name, "eff1") == 0) {
1841 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1842 		mythresh = "thresh < .75";
1843 	} else if (strcmp(name, "eff2") == 0) {
1844 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1845 		mythresh = "thresh > 1.0";
1846 	} else if (strcmp(name, "itlbmiss") == 0) {
1847 		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1848 		mythresh = "thresh > .05";
1849 	} else if (strcmp(name, "icachemiss") == 0) {
1850 		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1851 		mythresh = "thresh > .05";
1852 	} else if (strcmp(name, "lcpstall") == 0) {
1853 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1854 		mythresh = "thresh > .05";
1855 	} else if (strcmp(name, "cache1") == 0) {
1856 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1857 		mythresh = "thresh >= .1";
1858 	} else if (strcmp(name, "cache2") == 0) {
1859 		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1860 		mythresh = "thresh >= .2";
1861 	} else if (strcmp(name, "contested") == 0) {
1862 		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1863 		mythresh = "thresh >= .05";
1864 	} else if (strcmp(name, "datashare") == 0) {
1865 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1866 		mythresh = "thresh > .05";
1867 	} else if (strcmp(name, "blockstorefwd") == 0) {
1868 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1869 		mythresh = "thresh >= .05";
1870 	} else if (strcmp(name, "aliasing_4k") == 0) {
1871 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1872 		mythresh = "thresh >= .1";
1873 	} else if (strcmp(name, "dtlbmissload") == 0) {
1874 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1875 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1876 		mythresh = "thresh >= .1";
1877 
1878 	} else if (strcmp(name, "br_miss") == 0) {
1879 		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1880 		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1881 		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1882 		mythresh = "thresh >= .2";
1883 	} else if (strcmp(name, "clears") == 0) {
1884 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1885 		printf("          MACHINE_CLEARS.SMC + \n");
1886 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1887 		mythresh = "thresh >= .02";
1888 	} else if (strcmp(name, "fpassist") == 0) {
1889 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1890 		mythresh = "look for a excessive value";
1891 	} else if (strcmp(name, "otherassistavx") == 0) {
1892 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1893 		mythresh = "look for a excessive value";
1894 	} else if (strcmp(name, "microassist") == 0) {
1895 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1896 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1897 		mythresh = "thresh >= .05";
1898 	} else {
1899 		printf("Unknown name:%s\n", name);
1900 		mythresh = "unknown entry";
1901         }
1902 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1903 }
1904 
1905 
1906 #define BROADWELL_COUNT 17
1907 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1908 /*1*/	{ "eff1", "thresh < .75",
1909 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1910 	  efficiency1, 2 },
1911 /*2*/	{ "eff2", "thresh > 1.0",
1912 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1913 	  efficiency2, 2 },
1914 /*3*/	{ "itlbmiss", "thresh > .05",
1915 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1916 	  itlb_miss_broad, 3 },
1917 /*4*/	{ "icachemiss", "thresh > .05",
1918 	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1919 	  icache_miss_has, 2 },
1920 /*5*/	{ "lcpstall", "thresh > .05",
1921 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1922 	  lcp_stall, 2 },
1923 /*6*/	{ "cache1", "thresh >= .1",
1924 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1925 	  cache1broad, 2 },
1926 /*7*/	{ "cache2", "thresh >= .2",
1927 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1928 	  cache2broad, 2 },
1929 /*8*/	{ "contested", "thresh >= .05",
1930 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1931 	  contestedbroad, 2 },
1932 /*9*/	{ "datashare", "thresh >= .05",
1933 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1934 	  datasharing_has, 2 },
1935 /*10*/	{ "blockstorefwd", "thresh >= .05",
1936 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1937 	  blockstoreforward, 2 },
1938 /*11*/	{ "aliasing_4k", "thresh >= .1",
1939 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1940 	  aliasing_broad, 2 },
1941 /*12*/	{ "dtlbmissload", "thresh >= .1",
1942 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1943 	  dtlb_missload, 3 },
1944 /*13*/	{ "br_miss", "thresh >= .2",
1945 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1946 	  br_mispredict_broad, 7 },
1947 /*14*/	{ "clears", "thresh >= .02",
1948 	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1949 	  clears_broad, 5 },
1950 /*15*/	{ "fpassist", "look for a excessive value",
1951 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1952 	  fpassists, 2 },
1953 /*16*/	{ "otherassistavx", "look for a excessive value",
1954 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1955 	  otherassistavx, 2 },
1956 /*17*/	{ "microassist", "thresh >= .2",
1957 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1958 	  microassist_broad, 4 },
1959 };
1960 
1961 
1962 static void
1963 set_sandybridge(void)
1964 {
1965 	strcpy(the_cpu.cputype, "SandyBridge PMC");
1966 	the_cpu.number = SANDY_BRIDGE_COUNT;
1967 	the_cpu.ents = sandy_bridge;
1968 	the_cpu.explain = explain_name_sb;
1969 }
1970 
1971 static void
1972 set_ivybridge(void)
1973 {
1974 	strcpy(the_cpu.cputype, "IvyBridge PMC");
1975 	the_cpu.number = IVY_BRIDGE_COUNT;
1976 	the_cpu.ents = ivy_bridge;
1977 	the_cpu.explain = explain_name_ib;
1978 }
1979 
1980 
1981 static void
1982 set_haswell(void)
1983 {
1984 	strcpy(the_cpu.cputype, "HASWELL PMC");
1985 	the_cpu.number = HASWELL_COUNT;
1986 	the_cpu.ents = haswell;
1987 	the_cpu.explain = explain_name_has;
1988 }
1989 
1990 
1991 static void
1992 set_broadwell(void)
1993 {
1994 	strcpy(the_cpu.cputype, "HASWELL PMC");
1995 	the_cpu.number = BROADWELL_COUNT;
1996 	the_cpu.ents = broadwell;
1997 	the_cpu.explain = explain_name_broad;
1998 }
1999 
2000 
2001 static int
2002 set_expression(const char *name)
2003 {
2004 	int found = 0, i;
2005 	for(i=0 ; i< the_cpu.number; i++) {
2006 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2007 			found = 1;
2008 			expression = the_cpu.ents[i].func;
2009 			command = the_cpu.ents[i].command;
2010 			threshold = the_cpu.ents[i].thresh;
2011 			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2012 				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2013 				       the_cpu.ents[i].name,
2014 				       the_cpu.ents[i].counters_required, max_pmc_counters);
2015 				printf("Sorry this test can not be run\n");
2016 				if (run_all == 0) {
2017 					exit(-1);
2018 				} else {
2019 					return(-1);
2020 				}
2021 			}
2022 			break;
2023 		}
2024 	}
2025 	if (!found) {
2026 		printf("For CPU type %s we have no expression:%s\n",
2027 		       the_cpu.cputype, name);
2028 		exit(-1);
2029 	}
2030 	return(0);
2031 }
2032 
2033 
2034 
2035 
2036 
2037 static int
2038 validate_expression(char *name)
2039 {
2040 	int i, found;
2041 
2042 	found = 0;
2043 	for(i=0 ; i< the_cpu.number; i++) {
2044 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2045 			found = 1;
2046 			break;
2047 		}
2048 	}
2049 	if (!found) {
2050 		return(-1);
2051 	}
2052 	return (0);
2053 }
2054 
2055 static void
2056 do_expression(struct counters *cpu, int pos)
2057 {
2058 	if (expression == NULL)
2059 		return;
2060 	(*expression)(cpu, pos);
2061 }
2062 
2063 static void
2064 process_header(int idx, char *p)
2065 {
2066 	struct counters *up;
2067 	int i, len, nlen;
2068 	/*
2069 	 * Given header element idx, at p in
2070 	 * form 's/NN/nameof'
2071 	 * process the entry to pull out the name and
2072 	 * the CPU number.
2073 	 */
2074 	if (strncmp(p, "s/", 2)) {
2075 		printf("Check -- invalid header no s/ in %s\n",
2076 		       p);
2077 		return;
2078 	}
2079 	up = &cnts[idx];
2080 	up->cpu = strtol(&p[2], NULL, 10);
2081 	len = strlen(p);
2082 	for (i=2; i<len; i++) {
2083 		if (p[i] == '/') {
2084 			nlen = strlen(&p[(i+1)]);
2085 			if (nlen < (MAX_NLEN-1)) {
2086 				strcpy(up->counter_name, &p[(i+1)]);
2087 			} else {
2088 				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2089 			}
2090 		}
2091 	}
2092 }
2093 
2094 static void
2095 build_counters_from_header(FILE *io)
2096 {
2097 	char buffer[8192], *p;
2098 	int i, len, cnt;
2099 	size_t mlen;
2100 
2101 	/* We have a new start, lets
2102 	 * setup our headers and cpus.
2103 	 */
2104 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2105 		printf("First line can't be read from file err:%d\n", errno);
2106 		return;
2107 	}
2108 	/*
2109 	 * Ok output is an array of counters. Once
2110 	 * we start to read the values in we must
2111 	 * put them in there slot to match there CPU and
2112 	 * counter being updated. We create a mass array
2113 	 * of the counters, filling in the CPU and
2114 	 * counter name.
2115 	 */
2116 	/* How many do we get? */
2117 	len = strlen(buffer);
2118 	for (i=0, cnt=0; i<len; i++) {
2119 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2120 			cnt++;
2121 			for(;i<len;i++) {
2122 				if (buffer[i] == ' ')
2123 					break;
2124 			}
2125 		}
2126 	}
2127 	mlen = sizeof(struct counters) * cnt;
2128 	cnts = malloc(mlen);
2129 	ncnts = cnt;
2130 	if (cnts == NULL) {
2131 		printf("No memory err:%d\n", errno);
2132 		return;
2133 	}
2134 	memset(cnts, 0, mlen);
2135 	for (i=0, cnt=0; i<len; i++) {
2136 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2137 			p = &buffer[i];
2138 			for(;i<len;i++) {
2139 				if (buffer[i] == ' ') {
2140 					buffer[i] = 0;
2141 					break;
2142 				}
2143 			}
2144 			process_header(cnt, p);
2145 			cnt++;
2146 		}
2147 	}
2148 	if (verbose)
2149 		printf("We have %d entries\n", cnt);
2150 }
2151 extern int max_to_collect;
2152 int max_to_collect = MAX_COUNTER_SLOTS;
2153 
2154 static int
2155 read_a_line(FILE *io)
2156 {
2157 	char buffer[8192], *p, *stop;
2158 	int pos, i;
2159 
2160 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2161 		return(0);
2162 	}
2163 	p = buffer;
2164 	for (i=0; i<ncnts; i++) {
2165 		pos = cnts[i].pos;
2166 		cnts[i].vals[pos] = strtol(p, &stop, 0);
2167 		cnts[i].pos++;
2168 		cnts[i].sum += cnts[i].vals[pos];
2169 		p = stop;
2170 	}
2171 	return (1);
2172 }
2173 
2174 extern int cpu_count_out;
2175 int cpu_count_out=0;
2176 
2177 static void
2178 print_header(void)
2179 {
2180 	int i, cnt, printed_cnt;
2181 
2182 	printf("*********************************\n");
2183 	for(i=0, cnt=0; i<MAX_CPU; i++) {
2184 		if (glob_cpu[i]) {
2185 			cnt++;
2186 		}
2187 	}
2188 	cpu_count_out = cnt;
2189 	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2190 		if (glob_cpu[i]) {
2191 			printf("CPU%d", i);
2192 			printed_cnt++;
2193 		}
2194 		if (printed_cnt == cnt) {
2195 			printf("\n");
2196 			break;
2197 		} else {
2198 			printf("\t");
2199 		}
2200 	}
2201 }
2202 
2203 static void
2204 lace_cpus_together(void)
2205 {
2206 	int i, j, lace_cpu;
2207 	struct counters *cpat, *at;
2208 
2209 	for(i=0; i<ncnts; i++) {
2210 		cpat = &cnts[i];
2211 		if (cpat->next_cpu) {
2212 			/* Already laced in */
2213 			continue;
2214 		}
2215 		lace_cpu = cpat->cpu;
2216 		if (lace_cpu >= MAX_CPU) {
2217 			printf("CPU %d to big\n", lace_cpu);
2218 			continue;
2219 		}
2220 		if (glob_cpu[lace_cpu] == NULL) {
2221 			glob_cpu[lace_cpu] = cpat;
2222 		} else {
2223 			/* Already processed this cpu */
2224 			continue;
2225 		}
2226 		/* Ok look forward for cpu->cpu and link in */
2227 		for(j=(i+1); j<ncnts; j++) {
2228 			at = &cnts[j];
2229 			if (at->next_cpu) {
2230 				continue;
2231 			}
2232 			if (at->cpu == lace_cpu) {
2233 				/* Found one */
2234 				cpat->next_cpu = at;
2235 				cpat = at;
2236 			}
2237 		}
2238 	}
2239 }
2240 
2241 
2242 static void
2243 process_file(char *filename)
2244 {
2245 	FILE *io;
2246 	int i;
2247 	int line_at, not_done;
2248 	pid_t pid_of_command=0;
2249 
2250 	if (filename ==  NULL) {
2251 		io = my_popen(command, "r", &pid_of_command);
2252 		if (io == NULL) {
2253 			printf("Can't popen the command %s\n", command);
2254 			return;
2255 		}
2256 	} else {
2257 		io = fopen(filename, "r");
2258 		if (io == NULL) {
2259 			printf("Can't process file %s err:%d\n",
2260 			       filename, errno);
2261 			return;
2262 		}
2263 	}
2264 	build_counters_from_header(io);
2265 	if (cnts == NULL) {
2266 		/* Nothing we can do */
2267 		printf("Nothing to do -- no counters built\n");
2268 		if (filename) {
2269 			fclose(io);
2270 		} else {
2271 			my_pclose(io, pid_of_command);
2272 		}
2273 		return;
2274 	}
2275 	lace_cpus_together();
2276 	print_header();
2277 	if (verbose) {
2278 		for (i=0; i<ncnts; i++) {
2279 			printf("Counter:%s cpu:%d index:%d\n",
2280 			       cnts[i].counter_name,
2281 			       cnts[i].cpu, i);
2282 		}
2283 	}
2284 	line_at = 0;
2285 	not_done = 1;
2286 	while(not_done) {
2287 		if (read_a_line(io)) {
2288 			line_at++;
2289 		} else {
2290 			break;
2291 		}
2292 		if (line_at >= max_to_collect) {
2293 			not_done = 0;
2294 		}
2295 		if (filename == NULL) {
2296 			int cnt;
2297 			/* For the ones we dynamically open we print now */
2298 			for(i=0, cnt=0; i<MAX_CPU; i++) {
2299 				do_expression(glob_cpu[i], (line_at-1));
2300 				cnt++;
2301 				if (cnt == cpu_count_out) {
2302 					printf("\n");
2303 					break;
2304 				} else {
2305 					printf("\t");
2306 				}
2307 			}
2308 		}
2309 	}
2310 	if (filename) {
2311 		fclose(io);
2312 	} else {
2313 		my_pclose(io, pid_of_command);
2314 	}
2315 }
2316 #if defined(__amd64__)
2317 #define cpuid(in,a,b,c,d)\
2318   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2319 
2320 static __inline void
2321 do_cpuid(u_int ax, u_int cx, u_int *p)
2322 {
2323 	__asm __volatile("cpuid"
2324 			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2325 			 :  "0" (ax), "c" (cx) );
2326 }
2327 
2328 #else
2329 #define cpuid(in, a, b, c, d)
2330 #define do_cpuid(ax, cx, p)
2331 #endif
2332 
2333 static void
2334 get_cpuid_set(void)
2335 {
2336 	unsigned long eax, ebx, ecx, edx;
2337 	int model;
2338 	pid_t pid_of_command=0;
2339 	size_t sz, len;
2340 	FILE *io;
2341 	char linebuf[1024], *str;
2342 	u_int reg[4];
2343 
2344 	eax = ebx = ecx = edx = 0;
2345 
2346 	cpuid(0, eax, ebx, ecx, edx);
2347 	if (ebx == 0x68747541) {
2348 		printf("AMD processors are not supported by this program\n");
2349 		printf("Sorry\n");
2350 		exit(0);
2351 	} else if (ebx == 0x6972794) {
2352 		printf("Cyrix processors are not supported by this program\n");
2353 		printf("Sorry\n");
2354 		exit(0);
2355 	} else if (ebx == 0x756e6547) {
2356 		printf("Genuine Intel\n");
2357 	} else {
2358 		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2359 		exit(0);
2360 	}
2361 	cpuid(1, eax, ebx, ecx, edx);
2362 	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2363 	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2364 	switch (eax & 0xF00) {
2365 	case 0x500:		/* Pentium family processors */
2366 		printf("Intel Pentium P5\n");
2367 		goto not_supported;
2368 		break;
2369 	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2370 		switch (model) {
2371 		case 0x1:
2372 			printf("Intel Pentium P6\n");
2373 			goto not_supported;
2374 			break;
2375 		case 0x3:
2376 		case 0x5:
2377 			printf("Intel PII\n");
2378 			goto not_supported;
2379 			break;
2380 		case 0x6: case 0x16:
2381 			printf("Intel CL\n");
2382 			goto not_supported;
2383 			break;
2384 		case 0x7: case 0x8: case 0xA: case 0xB:
2385 			printf("Intel PIII\n");
2386 			goto not_supported;
2387 			break;
2388 		case 0x9: case 0xD:
2389 			printf("Intel PM\n");
2390 			goto not_supported;
2391 			break;
2392 		case 0xE:
2393 			printf("Intel CORE\n");
2394 			goto not_supported;
2395 			break;
2396 		case 0xF:
2397 			printf("Intel CORE2\n");
2398 			goto not_supported;
2399 			break;
2400 		case 0x17:
2401 			printf("Intel CORE2EXTREME\n");
2402 			goto not_supported;
2403 			break;
2404 		case 0x1C:	/* Per Intel document 320047-002. */
2405 			printf("Intel ATOM\n");
2406 			goto not_supported;
2407 			break;
2408 		case 0x1A:
2409 		case 0x1E:	/*
2410 				 * Per Intel document 253669-032 9/2009,
2411 				 * pages A-2 and A-57
2412 				 */
2413 		case 0x1F:	/*
2414 				 * Per Intel document 253669-032 9/2009,
2415 				 * pages A-2 and A-57
2416 				 */
2417 			printf("Intel COREI7\n");
2418 			goto not_supported;
2419 			break;
2420 		case 0x2E:
2421 			printf("Intel NEHALEM\n");
2422 			goto not_supported;
2423 			break;
2424 		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2425 		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2426 			printf("Intel WESTMERE\n");
2427 			goto not_supported;
2428 			break;
2429 		case 0x2F:	/* Westmere-EX, seen in wild */
2430 			printf("Intel WESTMERE\n");
2431 			goto not_supported;
2432 			break;
2433 		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2434 			printf("Intel SANDYBRIDGE\n");
2435 			set_sandybridge();
2436 			break;
2437 		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2438 			printf("Intel SANDYBRIDGE_XEON\n");
2439 			set_sandybridge();
2440 			break;
2441 		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2442 			printf("Intel IVYBRIDGE\n");
2443 			set_ivybridge();
2444 			break;
2445 		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2446 			printf("Intel IVYBRIDGE_XEON\n");
2447 			set_ivybridge();
2448 			break;
2449 		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2450 			printf("Intel HASWELL (Xeon)\n");
2451 			set_haswell();
2452 			break;
2453 		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2454 		case 0x45:
2455 		case 0x46:
2456 			printf("Intel HASWELL\n");
2457 			set_haswell();
2458 			break;
2459 
2460 		case 0x4e:
2461 		case 0x5e:
2462 			printf("Intel SKY-LAKE\n");
2463 			goto not_supported;
2464 			break;
2465 		case 0x3D:
2466 		case 0x47:
2467 			printf("Intel BROADWELL\n");
2468 			set_broadwell();
2469 			break;
2470 		case 0x4f:
2471 		case 0x56:
2472 			printf("Intel BROADWEL (Xeon)\n");
2473 			set_broadwell();
2474 			break;
2475 
2476 		case 0x4D:
2477 			/* Per Intel document 330061-001 01/2014. */
2478 			printf("Intel ATOM_SILVERMONT\n");
2479 			goto not_supported;
2480 			break;
2481 		default:
2482 			printf("Intel model 0x%x is not known -- sorry\n",
2483 			       model);
2484 			goto not_supported;
2485 			break;
2486 		}
2487 		break;
2488 	case 0xF00:		/* P4 */
2489 		printf("Intel unknown model %d\n", model);
2490 		goto not_supported;
2491 		break;
2492 	}
2493 	do_cpuid(0xa, 0, reg);
2494 	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2495 	printf("We have %d PMC counters to work with\n", max_pmc_counters);
2496 	/* Ok lets load the list of all known PMC's */
2497 	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2498 	if (valid_pmcs == NULL) {
2499 		/* Likely */
2500 		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2501 		sz = sizeof(char *) * pmc_allocated_cnt;
2502 		valid_pmcs = malloc(sz);
2503 		if (valid_pmcs == NULL) {
2504 			printf("No memory allocation fails at startup?\n");
2505 			exit(-1);
2506 		}
2507 		memset(valid_pmcs, 0, sz);
2508 	}
2509 
2510 	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2511 		if (linebuf[0] != '\t') {
2512 			/* sometimes headers ;-) */
2513 			continue;
2514 		}
2515 		len = strlen(linebuf);
2516 		if (linebuf[(len-1)] == '\n') {
2517 			/* Likely */
2518 			linebuf[(len-1)] = 0;
2519 		}
2520 		str = &linebuf[1];
2521 		len = strlen(str) + 1;
2522 		valid_pmcs[valid_pmc_cnt] = malloc(len);
2523 		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2524 			printf("No memory2 allocation fails at startup?\n");
2525 			exit(-1);
2526 		}
2527 		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2528 		strcpy(valid_pmcs[valid_pmc_cnt], str);
2529 		valid_pmc_cnt++;
2530 		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2531 			/* Got to expand -- unlikely */
2532 			char **more;
2533 
2534 			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2535 			more = malloc(sz);
2536 			if (more == NULL) {
2537 				printf("No memory3 allocation fails at startup?\n");
2538 				exit(-1);
2539 			}
2540 			memset(more, 0, sz);
2541 			memcpy(more, valid_pmcs, sz);
2542 			pmc_allocated_cnt *= 2;
2543 			free(valid_pmcs);
2544 			valid_pmcs = more;
2545 		}
2546 	}
2547 	my_pclose(io, pid_of_command);
2548 	return;
2549 not_supported:
2550 	printf("Not supported\n");
2551 	exit(-1);
2552 }
2553 
2554 static void
2555 explain_all(void)
2556 {
2557 	int i;
2558 	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2559 	printf("-------------------------------------------------------------\n");
2560 	for(i=0; i<the_cpu.number; i++){
2561 		printf("For -e %s ", the_cpu.ents[i].name);
2562 		(*the_cpu.explain)(the_cpu.ents[i].name);
2563 		printf("----------------------------\n");
2564 	}
2565 }
2566 
2567 static void
2568 test_for_a_pmc(const char *pmc, int out_so_far)
2569 {
2570 	FILE *io;
2571 	pid_t pid_of_command=0;
2572 	char my_command[1024];
2573 	char line[1024];
2574 	char resp[1024];
2575 	int len, llen, i;
2576 
2577 	if (out_so_far < 50) {
2578 		len = 50 - out_so_far;
2579 		for(i=0; i<len; i++) {
2580 			printf(" ");
2581 		}
2582 	}
2583 	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2584 	io = my_popen(my_command, "r", &pid_of_command);
2585 	if (io == NULL) {
2586 		printf("Failed -- popen fails\n");
2587 		return;
2588 	}
2589 	/* Setup what we expect */
2590 	len = sprintf(resp, "%s", pmc);
2591 	if (fgets(line, sizeof(line), io) == NULL) {
2592 		printf("Failed -- no output from pmstat\n");
2593 		goto out;
2594 	}
2595 	llen = strlen(line);
2596 	if (line[(llen-1)] == '\n') {
2597 		line[(llen-1)] = 0;
2598 		llen--;
2599 	}
2600 	for(i=2; i<(llen-len); i++) {
2601 		if (strncmp(&line[i], "ERROR", 5) == 0) {
2602 			printf("Failed %s\n", line);
2603 			goto out;
2604 		} else if (strncmp(&line[i], resp, len) == 0) {
2605 			int j, k;
2606 
2607 			if (fgets(line, sizeof(line), io) == NULL) {
2608 				printf("Failed -- no second output from pmstat\n");
2609 				goto out;
2610 			}
2611 			len = strlen(line);
2612 			for (j=0; j<len; j++) {
2613 				if (line[j] == ' ') {
2614 					j++;
2615 				} else {
2616 					break;
2617 				}
2618 			}
2619 			printf("Pass");
2620 			len = strlen(&line[j]);
2621 			if (len < 20) {
2622 				for(k=0; k<(20-len); k++) {
2623 					printf(" ");
2624 				}
2625 			}
2626 			if (len) {
2627 				printf("%s", &line[j]);
2628 			} else {
2629 				printf("\n");
2630 			}
2631 			goto out;
2632 		}
2633 	}
2634 	printf("Failed -- '%s' not '%s'\n", line, resp);
2635 out:
2636 	my_pclose(io, pid_of_command);
2637 
2638 }
2639 
2640 static int
2641 add_it_to(char **vars, int cur_cnt, char *name)
2642 {
2643 	int i;
2644 	size_t len;
2645 	for(i=0; i<cur_cnt; i++) {
2646 		if (strcmp(vars[i], name) == 0) {
2647 			/* Already have */
2648 			return(0);
2649 		}
2650 	}
2651 	if (vars[cur_cnt] != NULL) {
2652 		printf("Cur_cnt:%d filled with %s??\n",
2653 		       cur_cnt, vars[cur_cnt]);
2654 		exit(-1);
2655 	}
2656 	/* Ok its new */
2657 	len = strlen(name) + 1;
2658 	vars[cur_cnt] = malloc(len);
2659 	if (vars[cur_cnt] == NULL) {
2660 		printf("No memory %s\n", __FUNCTION__);
2661 		exit(-1);
2662 	}
2663 	memset(vars[cur_cnt], 0, len);
2664 	strcpy(vars[cur_cnt], name);
2665 	return(1);
2666 }
2667 
2668 static char *
2669 build_command_for_exp(struct expression *exp)
2670 {
2671 	/*
2672 	 * Build the pmcstat command to handle
2673 	 * the passed in expression.
2674 	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2675 	 * where NNN and QQQ represent the PMC's in the expression
2676 	 * uniquely..
2677 	 */
2678 	char forming[1024];
2679 	int cnt_pmc, alloced_pmcs, i;
2680 	struct expression *at;
2681 	char **vars, *cmd;
2682 	size_t mal;
2683 
2684 	alloced_pmcs = cnt_pmc = 0;
2685 	/* first how many do we have */
2686 	at = exp;
2687 	while (at) {
2688 		if (at->type == TYPE_VALUE_PMC) {
2689 			cnt_pmc++;
2690 		}
2691 		at = at->next;
2692 	}
2693 	if (cnt_pmc == 0) {
2694 		printf("No PMC's in your expression -- nothing to do!!\n");
2695 		exit(0);
2696 	}
2697 	mal = cnt_pmc * sizeof(char *);
2698 	vars = malloc(mal);
2699 	if (vars == NULL) {
2700 		printf("No memory\n");
2701 		exit(-1);
2702 	}
2703 	memset(vars, 0, mal);
2704 	at = exp;
2705 	while (at) {
2706 		if (at->type == TYPE_VALUE_PMC) {
2707 			if(add_it_to(vars, alloced_pmcs, at->name)) {
2708 				alloced_pmcs++;
2709 			}
2710 		}
2711 		at = at->next;
2712 	}
2713 	/* Now we have a unique list in vars so create our command */
2714 	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2715 	for(i=0; i<alloced_pmcs; i++) {
2716 		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2717 	}
2718 	cmd = malloc((mal+2));
2719 	if (cmd == NULL) {
2720 		printf("%s out of mem\n", __FUNCTION__);
2721 		exit(-1);
2722 	}
2723 	memset(cmd, 0, (mal+2));
2724 	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2725 	at = exp;
2726 	for(i=0; i<alloced_pmcs; i++) {
2727 		sprintf(forming, " -s %s", vars[i]);
2728 		strcat(cmd, forming);
2729 		free(vars[i]);
2730 		vars[i] = NULL;
2731 	}
2732 	free(vars);
2733 	return(cmd);
2734 }
2735 
2736 static int
2737 user_expr(struct counters *cpu, int pos)
2738 {
2739 	int ret;
2740 	double res;
2741 	struct counters *var;
2742 	struct expression *at;
2743 
2744 	at = master_exp;
2745 	while (at) {
2746 		if (at->type == TYPE_VALUE_PMC) {
2747 			var = find_counter(cpu, at->name);
2748 			if (var == NULL) {
2749 				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2750 				exit(-1);
2751 			}
2752 			if (pos != -1) {
2753 				at->value = var->vals[pos] * 1.0;
2754 			} else {
2755 				at->value = var->sum * 1.0;
2756 			}
2757 		}
2758 		at = at->next;
2759 	}
2760 	res = run_expr(master_exp, 1, NULL);
2761 	ret = printf("%1.3f", res);
2762 	return(ret);
2763 }
2764 
2765 
2766 static void
2767 set_manual_exp(struct expression *exp)
2768 {
2769 	expression = user_expr;
2770 	command = build_command_for_exp(exp);
2771 	threshold = "User defined threshold";
2772 }
2773 
2774 static void
2775 run_tests(void)
2776 {
2777 	int i, lenout;
2778 	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2779 	printf("------------------------------------------------------------------------\n");
2780 	for(i=0; i<valid_pmc_cnt; i++) {
2781 		lenout = printf("%s", valid_pmcs[i]);
2782 		fflush(stdout);
2783 		test_for_a_pmc(valid_pmcs[i], lenout);
2784 	}
2785 }
2786 static void
2787 list_all(void)
2788 {
2789 	int i, cnt, j;
2790 	printf("PMC                                               Abbreviation\n");
2791 	printf("--------------------------------------------------------------\n");
2792 	for(i=0; i<valid_pmc_cnt; i++) {
2793 		cnt = printf("%s", valid_pmcs[i]);
2794 		for(j=cnt; j<52; j++) {
2795 			printf(" ");
2796 		}
2797 		printf("%%%d\n", i);
2798 	}
2799 }
2800 
2801 
2802 int
2803 main(int argc, char **argv)
2804 {
2805 	int i, j, cnt;
2806 	char *filename=NULL;
2807 	const char *name=NULL;
2808 	int help_only = 0;
2809 	int test_mode = 0;
2810 	int test_at = 0;
2811 
2812 	get_cpuid_set();
2813 	memset(glob_cpu, 0, sizeof(glob_cpu));
2814 	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2815 		switch (i) {
2816 		case 'A':
2817 			run_all = 1;
2818 			break;
2819 		case 'L':
2820 			list_all();
2821 			return(0);
2822 		case 'H':
2823 			printf("**********************************\n");
2824 			explain_all();
2825 			printf("**********************************\n");
2826 			return(0);
2827 			break;
2828 		case 'T':
2829 			test_mode = 1;
2830 			break;
2831 		case 'E':
2832 			master_exp = parse_expression(optarg);
2833 			if (master_exp) {
2834 				set_manual_exp(master_exp);
2835 			}
2836 			break;
2837 		case 'e':
2838 			if (validate_expression(optarg)) {
2839 				printf("Unknown expression %s\n", optarg);
2840 				return(0);
2841 			}
2842 			name = optarg;
2843 			set_expression(optarg);
2844 			break;
2845 		case 'm':
2846 			max_to_collect = strtol(optarg, NULL, 0);
2847 			if (max_to_collect > MAX_COUNTER_SLOTS) {
2848 				/* You can't collect more than max in array */
2849 				max_to_collect = MAX_COUNTER_SLOTS;
2850 			}
2851 			break;
2852 		case 'v':
2853 			verbose++;
2854 			break;
2855 		case 'h':
2856 			help_only = 1;
2857 			break;
2858 		case 'i':
2859 			filename = optarg;
2860 			break;
2861 		case '?':
2862 		default:
2863 		use:
2864 			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2865 			       argv[0]);
2866 			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2867 			printf("-v -- verbose dump debug type things -- you don't want this\n");
2868 			printf("-m N -- maximum to collect is N measurements\n");
2869 			printf("-e expr-name -- Do expression expr-name\n");
2870 			printf("-E 'your expression' -- Do your expression\n");
2871 			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2872 			printf("-H -- Don't run anything, just explain all canned expressions\n");
2873 			printf("-T -- Test all PMC's defined by this processor\n");
2874 			printf("-A -- Run all canned tests\n");
2875 			return(0);
2876 			break;
2877 		}
2878 	}
2879 	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2880 	    (test_mode == 0) && (master_exp == NULL)) {
2881 		printf("Without setting an expression we cannot dynamically gather information\n");
2882 		printf("you must supply a filename (and you probably want verbosity)\n");
2883 		goto use;
2884 	}
2885 	if (run_all && max_to_collect > 10) {
2886 		max_to_collect = 3;
2887 	}
2888 	if (test_mode) {
2889 		run_tests();
2890 		return(0);
2891 	}
2892 	printf("*********************************\n");
2893 	if ((master_exp == NULL) && name) {
2894 		(*the_cpu.explain)(name);
2895 	} else if (master_exp) {
2896 		printf("Examine your expression ");
2897 		print_exp(master_exp);
2898 		printf("User defined threshold\n");
2899 	}
2900 	if (help_only) {
2901 		return(0);
2902 	}
2903 	if (run_all) {
2904 	more:
2905 		name = the_cpu.ents[test_at].name;
2906 		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2907 		test_at++;
2908 		if (set_expression(name) == -1) {
2909 			if (test_at >= the_cpu.number) {
2910 				goto done;
2911 			} else
2912 				goto more;
2913 		}
2914 
2915 	}
2916 	process_file(filename);
2917 	if (verbose >= 2) {
2918 		for (i=0; i<ncnts; i++) {
2919 			printf("Counter:%s cpu:%d index:%d\n",
2920 			       cnts[i].counter_name,
2921 			       cnts[i].cpu, i);
2922 			for(j=0; j<cnts[i].pos; j++) {
2923 				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2924 			}
2925 			printf(" sum - %ld\n", (long int)cnts[i].sum);
2926 		}
2927 	}
2928 	if (expression == NULL) {
2929 		return(0);
2930 	}
2931 	if (max_to_collect > 1) {
2932 		for(i=0, cnt=0; i<MAX_CPU; i++) {
2933 			if (glob_cpu[i]) {
2934 				do_expression(glob_cpu[i], -1);
2935 				cnt++;
2936 				if (cnt == cpu_count_out) {
2937 					printf("\n");
2938 					break;
2939 				} else {
2940 					printf("\t");
2941 				}
2942 			}
2943 		}
2944 	}
2945 	if (run_all && (test_at < the_cpu.number)) {
2946 		memset(glob_cpu, 0, sizeof(glob_cpu));
2947 		ncnts = 0;
2948 		printf("*********************************\n");
2949 		goto more;
2950 	} else if (run_all) {
2951 	done:
2952 		printf("*********************************\n");
2953 	}
2954 	return(0);
2955 }
2956