xref: /freebsd/usr.sbin/pmcstudy/pmcstudy.c (revision a90b9d0159070121c221b966469c3e36d912bf82)
1 /*-
2  * Copyright (c) 2014-2015 Netflix, Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer,
9  *    in this position and unchanged.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <sys/errno.h>
34 #include <signal.h>
35 #include <sys/wait.h>
36 #include <getopt.h>
37 #include "eval_expr.h"
38 static int max_pmc_counters = 1;
39 static int run_all = 0;
40 
41 #define MAX_COUNTER_SLOTS 1024
42 #define MAX_NLEN 64
43 #define MAX_CPU 64
44 static int verbose = 0;
45 
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
49 
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
54 int valid_pmc_cnt=0;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
57 
58 /*
59  * The following two varients on popen and pclose with
60  * the cavet that they get you the PID so that you
61  * can supply it to pclose so it can send a SIGTERM
62  *  to the process.
63  */
64 static FILE *
65 my_popen(const char *command, const char *dir, pid_t *p_pid)
66 {
67 	FILE *io_out, *io_in;
68 	int pdesin[2], pdesout[2];
69 	char *argv[4];
70 	pid_t pid;
71 	char cmd[4];
72 	char cmd2[1024];
73 	char arg1[4];
74 
75 	if ((strcmp(dir, "r") != 0) &&
76 	    (strcmp(dir, "w") != 0)) {
77 		errno = EINVAL;
78 		return(NULL);
79 	}
80 	if (pipe(pdesin) < 0)
81 		return (NULL);
82 
83 	if (pipe(pdesout) < 0) {
84 		(void)close(pdesin[0]);
85 		(void)close(pdesin[1]);
86 		return (NULL);
87 	}
88 	strcpy(cmd, "sh");
89 	strcpy(arg1, "-c");
90 	strcpy(cmd2, command);
91 	argv[0] = cmd;
92 	argv[1] = arg1;
93 	argv[2] = cmd2;
94 	argv[3] = NULL;
95 
96 	switch (pid = fork()) {
97 	case -1:			/* Error. */
98 		(void)close(pdesin[0]);
99 		(void)close(pdesin[1]);
100 		(void)close(pdesout[0]);
101 		(void)close(pdesout[1]);
102 		return (NULL);
103 		/* NOTREACHED */
104 	case 0:				/* Child. */
105 		/* Close out un-used sides */
106 		(void)close(pdesin[1]);
107 		(void)close(pdesout[0]);
108 		/* Now prepare the stdin of the process */
109 		close(0);
110 		(void)dup(pdesin[0]);
111 		(void)close(pdesin[0]);
112 		/* Now prepare the stdout of the process */
113 		close(1);
114 		(void)dup(pdesout[1]);
115 		/* And lets do stderr just in case */
116 		close(2);
117 		(void)dup(pdesout[1]);
118 		(void)close(pdesout[1]);
119 		/* Now run it */
120 		execve("/bin/sh", argv, environ);
121 		exit(127);
122 		/* NOTREACHED */
123 	}
124 	/* Parent; assume fdopen can't fail. */
125 	/* Store the pid */
126 	*p_pid = pid;
127 	if (strcmp(dir, "r") != 0) {
128 		io_out = fdopen(pdesin[1], "w");
129 		(void)close(pdesin[0]);
130 		(void)close(pdesout[0]);
131 		(void)close(pdesout[1]);
132 		return(io_out);
133  	} else {
134 		/* Prepare the input stream */
135 		io_in = fdopen(pdesout[0], "r");
136 		(void)close(pdesout[1]);
137 		(void)close(pdesin[0]);
138 		(void)close(pdesin[1]);
139 		return (io_in);
140 	}
141 }
142 
143 /*
144  * pclose --
145  *	Pclose returns -1 if stream is not associated with a `popened' command,
146  *	if already `pclosed', or waitpid returns an error.
147  */
148 static void
149 my_pclose(FILE *io, pid_t the_pid)
150 {
151 	int pstat;
152 	pid_t pid;
153 
154 	/*
155 	 * Find the appropriate file pointer and remove it from the list.
156 	 */
157 	(void)fclose(io);
158 	/* Die if you are not dead! */
159 	kill(the_pid, SIGTERM);
160 	do {
161 		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162 	} while (pid == -1 && errno == EINTR);
163 }
164 
165 struct counters {
166 	struct counters *next_cpu;
167 	char counter_name[MAX_NLEN];		/* Name of counter */
168 	int cpu;				/* CPU we are on */
169 	int pos;				/* Index we are filling to. */
170 	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
171 	uint64_t sum;				/* Summary of entries */
172 };
173 
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
176 
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
179 
180 extern int ncnts;
181 int ncnts=0;
182 
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
185 
186 static const char *threshold=NULL;
187 static const char *command;
188 
189 struct cpu_entry {
190 	const char *name;
191 	const char *thresh;
192 	const char *command;
193 	int (*func)(struct counters *, int);
194 	int counters_required;
195 };
196 
197 struct cpu_type {
198 	char cputype[32];
199 	int number;
200 	struct cpu_entry *ents;
201 	void (*explain)(const char *name);
202 };
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
205 
206 static void
207 explain_name_sb(const char *name)
208 {
209 	const char *mythresh;
210 	if (strcmp(name, "allocstall1") == 0) {
211 		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212 		mythresh = "thresh > .05";
213 	} else if (strcmp(name, "allocstall2") == 0) {
214 		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215 		mythresh = "thresh > .05";
216 	} else if (strcmp(name, "br_miss") == 0) {
217 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218 		mythresh = "thresh >= .2";
219 	} else if (strcmp(name, "splitload") == 0) {
220 		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221 		mythresh = "thresh >= .1";
222 	} else if (strcmp(name, "splitstore") == 0) {
223 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
224 		mythresh = "thresh >= .01";
225 	} else if (strcmp(name, "contested") == 0) {
226 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227 		mythresh = "thresh >= .05";
228 	} else if (strcmp(name, "blockstorefwd") == 0) {
229 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230 		mythresh = "thresh >= .05";
231 	} else if (strcmp(name, "cache2") == 0) {
232 		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235 		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236 		mythresh = "thresh >= .2";
237 	} else if (strcmp(name, "cache1") == 0) {
238 		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239 		mythresh = "thresh >= .2";
240 	} else if (strcmp(name, "dtlbmissload") == 0) {
241 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243 		mythresh = "thresh >= .1";
244 	} else if (strcmp(name, "frontendstall") == 0) {
245 		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246 		mythresh = "thresh >= .15";
247 	} else if (strcmp(name, "clears") == 0) {
248 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249 		printf("          MACHINE_CLEARS.SMC + \n");
250 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251 		mythresh = "thresh >= .02";
252 	} else if (strcmp(name, "microassist") == 0) {
253 		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255 		mythresh = "thresh >= .05";
256 	} else if (strcmp(name, "aliasing_4k") == 0) {
257 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258 		mythresh = "thresh >= .1";
259 	} else if (strcmp(name, "fpassist") == 0) {
260 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261 		mythresh = "look for a excessive value";
262 	} else if (strcmp(name, "otherassistavx") == 0) {
263 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264 		mythresh = "look for a excessive value";
265 	} else if (strcmp(name, "otherassistsse") == 0) {
266 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 		mythresh = "look for a excessive value";
268 	} else if (strcmp(name, "eff1") == 0) {
269 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270 		mythresh = "thresh < .9";
271 	} else if (strcmp(name, "eff2") == 0) {
272 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273 		mythresh = "thresh > 1.0";
274 	} else if (strcmp(name, "dtlbmissstore") == 0) {
275 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277 		mythresh = "thresh >= .05";
278 	} else {
279 		printf("Unknown name:%s\n", name);
280 		mythresh = "unknown entry";
281         }
282 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283 }
284 
285 static void
286 explain_name_ib(const char *name)
287 {
288 	const char *mythresh;
289 	if (strcmp(name, "br_miss") == 0) {
290 		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291 		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292 		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293 		mythresh = "thresh >= .2";
294 	} else if (strcmp(name, "eff1") == 0) {
295 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296 		mythresh = "thresh < .9";
297 	} else if (strcmp(name, "eff2") == 0) {
298 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299 		mythresh = "thresh > 1.0";
300 	} else if (strcmp(name, "cache1") == 0) {
301 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302 		mythresh = "thresh >= .2";
303 	} else if (strcmp(name, "cache2") == 0) {
304 		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305 		mythresh = "thresh >= .2";
306 	} else if (strcmp(name, "itlbmiss") == 0) {
307 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308 		mythresh = "thresh > .05";
309 	} else if (strcmp(name, "icachemiss") == 0) {
310 		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311 		mythresh = "thresh > .05";
312 	} else if (strcmp(name, "lcpstall") == 0) {
313 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314 		mythresh = "thresh > .05";
315 	} else if (strcmp(name, "datashare") == 0) {
316 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317 		mythresh = "thresh > .05";
318 	} else if (strcmp(name, "blockstorefwd") == 0) {
319 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320 		mythresh = "thresh >= .05";
321 	} else if (strcmp(name, "splitload") == 0) {
322 		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323 		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324 		mythresh = "thresh >= .1";
325 	} else if (strcmp(name, "splitstore") == 0) {
326 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
327 		mythresh = "thresh >= .01";
328 	} else if (strcmp(name, "aliasing_4k") == 0) {
329 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330 		mythresh = "thresh >= .1";
331 	} else if (strcmp(name, "dtlbmissload") == 0) {
332 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334 		mythresh = "thresh >= .1";
335 	} else if (strcmp(name, "dtlbmissstore") == 0) {
336 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338 		mythresh = "thresh >= .05";
339 	} else if (strcmp(name, "contested") == 0) {
340 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341 		mythresh = "thresh >= .05";
342 	} else if (strcmp(name, "clears") == 0) {
343 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344 		printf("          MACHINE_CLEARS.SMC + \n");
345 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346 		mythresh = "thresh >= .02";
347 	} else if (strcmp(name, "microassist") == 0) {
348 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350 		mythresh = "thresh >= .05";
351 	} else if (strcmp(name, "fpassist") == 0) {
352 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353 		mythresh = "look for a excessive value";
354 	} else if (strcmp(name, "otherassistavx") == 0) {
355 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356 		mythresh = "look for a excessive value";
357 	} else if (strcmp(name, "otherassistsse") == 0) {
358 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 		mythresh = "look for a excessive value";
360 	} else {
361 		printf("Unknown name:%s\n", name);
362 		mythresh = "unknown entry";
363         }
364 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365 }
366 
367 
368 static void
369 explain_name_has(const char *name)
370 {
371 	const char *mythresh;
372 	if (strcmp(name, "eff1") == 0) {
373 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374 		mythresh = "thresh < .75";
375 	} else if (strcmp(name, "eff2") == 0) {
376 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377 		mythresh = "thresh > 1.0";
378 	} else if (strcmp(name, "itlbmiss") == 0) {
379 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380 		mythresh = "thresh > .05";
381 	} else if (strcmp(name, "icachemiss") == 0) {
382 		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383 		mythresh = "thresh > .05";
384 	} else if (strcmp(name, "lcpstall") == 0) {
385 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386 		mythresh = "thresh > .05";
387 	} else if (strcmp(name, "cache1") == 0) {
388 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389 		mythresh = "thresh >= .2";
390 	} else if (strcmp(name, "cache2") == 0) {
391 		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394 		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395 		mythresh = "thresh >= .2";
396 	} else if (strcmp(name, "contested") == 0) {
397 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398 		mythresh = "thresh >= .05";
399 	} else if (strcmp(name, "datashare") == 0) {
400 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401 		mythresh = "thresh > .05";
402 	} else if (strcmp(name, "blockstorefwd") == 0) {
403 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404 		mythresh = "thresh >= .05";
405 	} else if (strcmp(name, "splitload") == 0) {
406 		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407 		mythresh = "thresh >= .1";
408 	} else if (strcmp(name, "splitstore") == 0) {
409 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
410 		mythresh = "thresh >= .01";
411 	} else if (strcmp(name, "aliasing_4k") == 0) {
412 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413 		mythresh = "thresh >= .1";
414 	} else if (strcmp(name, "dtlbmissload") == 0) {
415 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417 		mythresh = "thresh >= .1";
418 	} else if (strcmp(name, "br_miss") == 0) {
419 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420 		mythresh = "thresh >= .2";
421 	} else if (strcmp(name, "clears") == 0) {
422 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423 		printf("          MACHINE_CLEARS.SMC + \n");
424 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425 		mythresh = "thresh >= .02";
426 	} else if (strcmp(name, "microassist") == 0) {
427 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429 		mythresh = "thresh >= .05";
430 	} else if (strcmp(name, "fpassist") == 0) {
431 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432 		mythresh = "look for a excessive value";
433 	} else if (strcmp(name, "otherassistavx") == 0) {
434 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435 		mythresh = "look for a excessive value";
436 	} else if (strcmp(name, "otherassistsse") == 0) {
437 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 		mythresh = "look for a excessive value";
439 	} else {
440 		printf("Unknown name:%s\n", name);
441 		mythresh = "unknown entry";
442         }
443 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444 }
445 
446 
447 
448 static struct counters *
449 find_counter(struct counters *base, const char *name)
450 {
451 	struct counters *at;
452 	int len;
453 
454 	at = base;
455 	len = strlen(name);
456 	while(at) {
457 		if (strncmp(at->counter_name, name, len) == 0) {
458 			return(at);
459 		}
460 		at = at->next_cpu;
461 	}
462 	printf("Can't find counter %s\n", name);
463 	printf("We have:\n");
464 	at = base;
465 	while(at) {
466 		printf("- %s\n", at->counter_name);
467 		at = at->next_cpu;
468 	}
469 	exit(-1);
470 }
471 
472 static int
473 allocstall1(struct counters *cpu, int pos)
474 {
475 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
476 	int ret;
477 	struct counters *partial;
478 	struct counters *unhalt;
479 	double un, par, res;
480 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
481 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
482 	if (pos != -1) {
483 		par = partial->vals[pos] * 1.0;
484 		un = unhalt->vals[pos] * 1.0;
485 	} else {
486 		par = partial->sum * 1.0;
487 		un = unhalt->sum * 1.0;
488 	}
489 	res = par/un;
490 	ret = printf("%1.3f", res);
491 	return(ret);
492 }
493 
494 static int
495 allocstall2(struct counters *cpu, int pos)
496 {
497 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
498 	int ret;
499 	struct counters *partial;
500 	struct counters *unhalt;
501 	double un, par, res;
502 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
503 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
504 	if (pos != -1) {
505 		par = partial->vals[pos] * 1.0;
506 		un = unhalt->vals[pos] * 1.0;
507 	} else {
508 		par = partial->sum * 1.0;
509 		un = unhalt->sum * 1.0;
510 	}
511 	res = par/un;
512 	ret = printf("%1.3f", res);
513 	return(ret);
514 }
515 
516 static int
517 br_mispredict(struct counters *cpu, int pos)
518 {
519 	struct counters *brctr;
520 	struct counters *unhalt;
521 	int ret;
522 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
523 	double br, un, con, res;
524 	con = 20.0;
525 
526 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
527         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
528 	if (pos != -1) {
529 		br = brctr->vals[pos] * 1.0;
530 		un = unhalt->vals[pos] * 1.0;
531 	} else {
532 		br = brctr->sum * 1.0;
533 		un = unhalt->sum * 1.0;
534 	}
535 	res = (con * br)/un;
536  	ret = printf("%1.3f", res);
537 	return(ret);
538 }
539 
540 static int
541 br_mispredictib(struct counters *cpu, int pos)
542 {
543 	struct counters *brctr;
544 	struct counters *unhalt;
545 	struct counters *clear, *clear2, *clear3;
546 	struct counters *uops;
547 	struct counters *recv;
548 	struct counters *iss;
549 /*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
550 	int ret;
551         /*
552 	 * (BR_MISP_RETIRED.ALL_BRANCHES /
553 	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
554 	 *          MACHINE_CLEAR.COUNT) *
555 	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
556 	 *
557 	 */
558 	double br, cl, cl2, cl3, uo, re, un, con, res, is;
559 	con = 4.0;
560 
561 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
562         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
563 	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
564 	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
565 	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
566 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
567 	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
568 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
569 	if (pos != -1) {
570 		br = brctr->vals[pos] * 1.0;
571 		cl = clear->vals[pos] * 1.0;
572 		cl2 = clear2->vals[pos] * 1.0;
573 		cl3 = clear3->vals[pos] * 1.0;
574 		uo = uops->vals[pos] * 1.0;
575 		re = recv->vals[pos] * 1.0;
576 		is = iss->vals[pos] * 1.0;
577 		un = unhalt->vals[pos] * 1.0;
578 	} else {
579 		br = brctr->sum * 1.0;
580 		cl = clear->sum * 1.0;
581 		cl2 = clear2->sum * 1.0;
582 		cl3 = clear3->sum * 1.0;
583 		uo = uops->sum * 1.0;
584 		re = recv->sum * 1.0;
585 		is = iss->sum * 1.0;
586 		un = unhalt->sum * 1.0;
587 	}
588 	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
589  	ret = printf("%1.3f", res);
590 	return(ret);
591 }
592 
593 
594 static int
595 br_mispredict_broad(struct counters *cpu, int pos)
596 {
597 	struct counters *brctr;
598 	struct counters *unhalt;
599 	struct counters *clear;
600 	struct counters *uops;
601 	struct counters *uops_ret;
602 	struct counters *recv;
603 	int ret;
604 	double br, cl, uo, uo_r, re, con, un, res;
605 
606 	con = 4.0;
607 
608 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
609         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
610 	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
611 	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
612 	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
613 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
614 
615 	if (pos != -1) {
616 		un = unhalt->vals[pos] * 1.0;
617 		br = brctr->vals[pos] * 1.0;
618 		cl = clear->vals[pos] * 1.0;
619 		uo = uops->vals[pos] * 1.0;
620 		uo_r = uops_ret->vals[pos] * 1.0;
621 		re = recv->vals[pos] * 1.0;
622 	} else {
623 		un = unhalt->sum * 1.0;
624 		br = brctr->sum * 1.0;
625 		cl = clear->sum * 1.0;
626 		uo = uops->sum * 1.0;
627 		uo_r = uops_ret->sum * 1.0;
628 		re = recv->sum * 1.0;
629 	}
630 	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
631  	ret = printf("%1.3f", res);
632 	return(ret);
633 }
634 
635 static int
636 splitloadib(struct counters *cpu, int pos)
637 {
638 	int ret;
639 	struct counters *mem;
640 	struct counters *l1d, *ldblock;
641 	struct counters *unhalt;
642 	double un, memd, res, l1, ldb;
643         /*
644 	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
645 	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
646 	 */
647 
648 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
649 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
650 	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
651 	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
652 	if (pos != -1) {
653 		memd = mem->vals[pos] * 1.0;
654 		l1 = l1d->vals[pos] * 1.0;
655 		ldb = ldblock->vals[pos] * 1.0;
656 		un = unhalt->vals[pos] * 1.0;
657 	} else {
658 		memd = mem->sum * 1.0;
659 		l1 = l1d->sum * 1.0;
660 		ldb = ldblock->sum * 1.0;
661 		un = unhalt->sum * 1.0;
662 	}
663 	res = ((l1 / memd) * ldb)/un;
664 	ret = printf("%1.3f", res);
665 	return(ret);
666 }
667 
668 
669 static int
670 splitload(struct counters *cpu, int pos)
671 {
672 	int ret;
673 	struct counters *mem;
674 	struct counters *unhalt;
675 	double con, un, memd, res;
676 /*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
677 
678 	con = 5.0;
679 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
680 	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
681 	if (pos != -1) {
682 		memd = mem->vals[pos] * 1.0;
683 		un = unhalt->vals[pos] * 1.0;
684 	} else {
685 		memd = mem->sum * 1.0;
686 		un = unhalt->sum * 1.0;
687 	}
688 	res = (memd * con)/un;
689 	ret = printf("%1.3f", res);
690 	return(ret);
691 }
692 
693 
694 static int
695 splitload_sb(struct counters *cpu, int pos)
696 {
697 	int ret;
698 	struct counters *mem;
699 	struct counters *unhalt;
700 	double con, un, memd, res;
701 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
702 
703 	con = 5.0;
704 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
705 	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
706 	if (pos != -1) {
707 		memd = mem->vals[pos] * 1.0;
708 		un = unhalt->vals[pos] * 1.0;
709 	} else {
710 		memd = mem->sum * 1.0;
711 		un = unhalt->sum * 1.0;
712 	}
713 	res = (memd * con)/un;
714 	ret = printf("%1.3f", res);
715 	return(ret);
716 }
717 
718 
719 static int
720 splitstore_sb(struct counters *cpu, int pos)
721 {
722         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
723 	int ret;
724 	struct counters *mem_split;
725 	struct counters *mem_stores;
726 	double memsplit, memstore, res;
727 	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
728 	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
729 	if (pos != -1) {
730 		memsplit = mem_split->vals[pos] * 1.0;
731 		memstore = mem_stores->vals[pos] * 1.0;
732 	} else {
733 		memsplit = mem_split->sum * 1.0;
734 		memstore = mem_stores->sum * 1.0;
735 	}
736 	res = memsplit/memstore;
737 	ret = printf("%1.3f", res);
738 	return(ret);
739 }
740 
741 
742 
743 static int
744 splitstore(struct counters *cpu, int pos)
745 {
746         /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
747 	int ret;
748 	struct counters *mem_split;
749 	struct counters *mem_stores;
750 	double memsplit, memstore, res;
751 	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
752 	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
753 	if (pos != -1) {
754 		memsplit = mem_split->vals[pos] * 1.0;
755 		memstore = mem_stores->vals[pos] * 1.0;
756 	} else {
757 		memsplit = mem_split->sum * 1.0;
758 		memstore = mem_stores->sum * 1.0;
759 	}
760 	res = memsplit/memstore;
761 	ret = printf("%1.3f", res);
762 	return(ret);
763 }
764 
765 
766 static int
767 contested(struct counters *cpu, int pos)
768 {
769         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
770 	int ret;
771 	struct counters *mem;
772 	struct counters *unhalt;
773 	double con, un, memd, res;
774 
775 	con = 60.0;
776 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
777 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
778 	if (pos != -1) {
779 		memd = mem->vals[pos] * 1.0;
780 		un = unhalt->vals[pos] * 1.0;
781 	} else {
782 		memd = mem->sum * 1.0;
783 		un = unhalt->sum * 1.0;
784 	}
785 	res = (memd * con)/un;
786 	ret = printf("%1.3f", res);
787 	return(ret);
788 }
789 
790 static int
791 contested_has(struct counters *cpu, int pos)
792 {
793         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
794 	int ret;
795 	struct counters *mem;
796 	struct counters *unhalt;
797 	double con, un, memd, res;
798 
799 	con = 84.0;
800 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
801 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
802 	if (pos != -1) {
803 		memd = mem->vals[pos] * 1.0;
804 		un = unhalt->vals[pos] * 1.0;
805 	} else {
806 		memd = mem->sum * 1.0;
807 		un = unhalt->sum * 1.0;
808 	}
809 	res = (memd * con)/un;
810 	ret = printf("%1.3f", res);
811 	return(ret);
812 }
813 
814 static int
815 contestedbroad(struct counters *cpu, int pos)
816 {
817         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
818 	int ret;
819 	struct counters *mem;
820 	struct counters *mem2;
821 	struct counters *unhalt;
822 	double con, un, memd, memtoo, res;
823 
824 	con = 84.0;
825 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
826 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
827 	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
828 
829 	if (pos != -1) {
830 		memd = mem->vals[pos] * 1.0;
831 		memtoo = mem2->vals[pos] * 1.0;
832 		un = unhalt->vals[pos] * 1.0;
833 	} else {
834 		memd = mem->sum * 1.0;
835 		memtoo = mem2->sum * 1.0;
836 		un = unhalt->sum * 1.0;
837 	}
838 	res = ((memd * con) + memtoo)/un;
839 	ret = printf("%1.3f", res);
840 	return(ret);
841 }
842 
843 
844 static int
845 blockstoreforward(struct counters *cpu, int pos)
846 {
847         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
848 	int ret;
849 	struct counters *ldb;
850 	struct counters *unhalt;
851 	double con, un, ld, res;
852 
853 	con = 13.0;
854 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
855 	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
856 	if (pos != -1) {
857 		ld = ldb->vals[pos] * 1.0;
858 		un = unhalt->vals[pos] * 1.0;
859 	} else {
860 		ld = ldb->sum * 1.0;
861 		un = unhalt->sum * 1.0;
862 	}
863 	res = (ld * con)/un;
864 	ret = printf("%1.3f", res);
865 	return(ret);
866 }
867 
868 static int
869 cache2(struct counters *cpu, int pos)
870 {
871 	/* ** Suspect ***
872 	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
873 	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
874 	 */
875 	int ret;
876 	struct counters *mem1, *mem2, *mem3;
877 	struct counters *unhalt;
878 	double con1, con2, con3, un, me_1, me_2, me_3, res;
879 
880 	con1 = 26.0;
881 	con2 = 43.0;
882 	con3 = 60.0;
883 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
884 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
885 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
886 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
887 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
888 	if (pos != -1) {
889 		me_1 = mem1->vals[pos] * 1.0;
890 		me_2 = mem2->vals[pos] * 1.0;
891 		me_3 = mem3->vals[pos] * 1.0;
892 		un = unhalt->vals[pos] * 1.0;
893 	} else {
894 		me_1 = mem1->sum * 1.0;
895 		me_2 = mem2->sum * 1.0;
896 		me_3 = mem3->sum * 1.0;
897 		un = unhalt->sum * 1.0;
898 	}
899 	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
900 	ret = printf("%1.3f", res);
901 	return(ret);
902 }
903 
904 static int
905 datasharing(struct counters *cpu, int pos)
906 {
907 	/*
908 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
909 	 */
910 	int ret;
911 	struct counters *mem;
912 	struct counters *unhalt;
913 	double con, res, me, un;
914 
915 	con = 43.0;
916 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
917 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
918 	if (pos != -1) {
919 		me = mem->vals[pos] * 1.0;
920 		un = unhalt->vals[pos] * 1.0;
921 	} else {
922 		me = mem->sum * 1.0;
923 		un = unhalt->sum * 1.0;
924 	}
925 	res = (me * con)/un;
926 	ret = printf("%1.3f", res);
927 	return(ret);
928 
929 }
930 
931 
932 static int
933 datasharing_has(struct counters *cpu, int pos)
934 {
935 	/*
936 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
937 	 */
938 	int ret;
939 	struct counters *mem;
940 	struct counters *unhalt;
941 	double con, res, me, un;
942 
943 	con = 72.0;
944 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
945 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
946 	if (pos != -1) {
947 		me = mem->vals[pos] * 1.0;
948 		un = unhalt->vals[pos] * 1.0;
949 	} else {
950 		me = mem->sum * 1.0;
951 		un = unhalt->sum * 1.0;
952 	}
953 	res = (me * con)/un;
954 	ret = printf("%1.3f", res);
955 	return(ret);
956 
957 }
958 
959 
960 static int
961 cache2ib(struct counters *cpu, int pos)
962 {
963         /*
964 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
965 	 */
966 	int ret;
967 	struct counters *mem;
968 	struct counters *unhalt;
969 	double con, un, me, res;
970 
971 	con = 29.0;
972 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
973 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
974 	if (pos != -1) {
975 		me = mem->vals[pos] * 1.0;
976 		un = unhalt->vals[pos] * 1.0;
977 	} else {
978 		me = mem->sum * 1.0;
979 		un = unhalt->sum * 1.0;
980 	}
981 	res = (con * me)/un;
982 	ret = printf("%1.3f", res);
983 	return(ret);
984 }
985 
986 static int
987 cache2has(struct counters *cpu, int pos)
988 {
989 	/*
990 	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
991 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
992 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
993 	 *           / CPU_CLK_UNHALTED.THREAD_P
994 	 */
995 	int ret;
996 	struct counters *mem1, *mem2, *mem3;
997 	struct counters *unhalt;
998 	double con1, con2, con3, un, me1, me2, me3, res;
999 
1000 	con1 = 36.0;
1001 	con2 = 72.0;
1002 	con3 = 84.0;
1003 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1004 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1005 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1006 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1007 	if (pos != -1) {
1008 		me1 = mem1->vals[pos] * 1.0;
1009 		me2 = mem2->vals[pos] * 1.0;
1010 		me3 = mem3->vals[pos] * 1.0;
1011 		un = unhalt->vals[pos] * 1.0;
1012 	} else {
1013 		me1 = mem1->sum * 1.0;
1014 		me2 = mem2->sum * 1.0;
1015 		me3 = mem3->sum * 1.0;
1016 		un = unhalt->sum * 1.0;
1017 	}
1018 	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1019 	ret = printf("%1.3f", res);
1020 	return(ret);
1021 }
1022 
1023 
1024 static int
1025 cache2broad(struct counters *cpu, int pos)
1026 {
1027         /*
1028 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1029 	 */
1030 	int ret;
1031 	struct counters *mem;
1032 	struct counters *unhalt;
1033 	double con, un, me, res;
1034 
1035 	con = 36.0;
1036 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1037 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1038 	if (pos != -1) {
1039 		me = mem->vals[pos] * 1.0;
1040 		un = unhalt->vals[pos] * 1.0;
1041 	} else {
1042 		me = mem->sum * 1.0;
1043 		un = unhalt->sum * 1.0;
1044 	}
1045 	res = (con * me)/un;
1046 	ret = printf("%1.3f", res);
1047 	return(ret);
1048 }
1049 
1050 
1051 static int
1052 cache1(struct counters *cpu, int pos)
1053 {
1054 	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1055 	int ret;
1056 	struct counters *mem;
1057 	struct counters *unhalt;
1058 	double con, un, me, res;
1059 
1060 	con = 180.0;
1061 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1062 	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1063 	if (pos != -1) {
1064 		me = mem->vals[pos] * 1.0;
1065 		un = unhalt->vals[pos] * 1.0;
1066 	} else {
1067 		me = mem->sum * 1.0;
1068 		un = unhalt->sum * 1.0;
1069 	}
1070 	res = (me * con)/un;
1071 	ret = printf("%1.3f", res);
1072 	return(ret);
1073 }
1074 
1075 static int
1076 cache1ib(struct counters *cpu, int pos)
1077 {
1078 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1079 	int ret;
1080 	struct counters *mem;
1081 	struct counters *unhalt;
1082 	double con, un, me, res;
1083 
1084 	con = 180.0;
1085 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1086 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1087 	if (pos != -1) {
1088 		me = mem->vals[pos] * 1.0;
1089 		un = unhalt->vals[pos] * 1.0;
1090 	} else {
1091 		me = mem->sum * 1.0;
1092 		un = unhalt->sum * 1.0;
1093 	}
1094 	res = (me * con)/un;
1095 	ret = printf("%1.3f", res);
1096 	return(ret);
1097 }
1098 
1099 
1100 static int
1101 cache1broad(struct counters *cpu, int pos)
1102 {
1103 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1104 	int ret;
1105 	struct counters *mem;
1106 	struct counters *unhalt;
1107 	double con, un, me, res;
1108 
1109 	con = 180.0;
1110 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1111 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1112 	if (pos != -1) {
1113 		me = mem->vals[pos] * 1.0;
1114 		un = unhalt->vals[pos] * 1.0;
1115 	} else {
1116 		me = mem->sum * 1.0;
1117 		un = unhalt->sum * 1.0;
1118 	}
1119 	res = (me * con)/un;
1120 	ret = printf("%1.3f", res);
1121 	return(ret);
1122 }
1123 
1124 
1125 static int
1126 dtlb_missload(struct counters *cpu, int pos)
1127 {
1128 	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1129 	int ret;
1130 	struct counters *dtlb_m, *dtlb_d;
1131 	struct counters *unhalt;
1132 	double con, un, d1, d2, res;
1133 
1134 	con = 7.0;
1135 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1136 	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1137 	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1138 	if (pos != -1) {
1139 		d1 = dtlb_m->vals[pos] * 1.0;
1140 		d2 = dtlb_d->vals[pos] * 1.0;
1141 		un = unhalt->vals[pos] * 1.0;
1142 	} else {
1143 		d1 = dtlb_m->sum * 1.0;
1144 		d2 = dtlb_d->sum * 1.0;
1145 		un = unhalt->sum * 1.0;
1146 	}
1147 	res = ((d1 * con) + d2)/un;
1148 	ret = printf("%1.3f", res);
1149 	return(ret);
1150 }
1151 
1152 static int
1153 dtlb_missstore(struct counters *cpu, int pos)
1154 {
1155         /*
1156 	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1157 	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1158 	 */
1159         int ret;
1160         struct counters *dtsb_m, *dtsb_d;
1161         struct counters *unhalt;
1162         double con, un, d1, d2, res;
1163 
1164         con = 7.0;
1165         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1166         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1167         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1168         if (pos != -1) {
1169                 d1 = dtsb_m->vals[pos] * 1.0;
1170                 d2 = dtsb_d->vals[pos] * 1.0;
1171                 un = unhalt->vals[pos] * 1.0;
1172         } else {
1173                 d1 = dtsb_m->sum * 1.0;
1174                 d2 = dtsb_d->sum * 1.0;
1175                 un = unhalt->sum * 1.0;
1176         }
1177         res = ((d1 * con) + d2)/un;
1178         ret = printf("%1.3f", res);
1179         return(ret);
1180 }
1181 
1182 static int
1183 itlb_miss(struct counters *cpu, int pos)
1184 {
1185 	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1186 	int ret;
1187 	struct counters *itlb;
1188 	struct counters *unhalt;
1189 	double un, d1, res;
1190 
1191 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1192 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1193 	if (pos != -1) {
1194 		d1 = itlb->vals[pos] * 1.0;
1195 		un = unhalt->vals[pos] * 1.0;
1196 	} else {
1197 		d1 = itlb->sum * 1.0;
1198 		un = unhalt->sum * 1.0;
1199 	}
1200 	res = d1/un;
1201 	ret = printf("%1.3f", res);
1202 	return(ret);
1203 }
1204 
1205 
1206 static int
1207 itlb_miss_broad(struct counters *cpu, int pos)
1208 {
1209 	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1210 	int ret;
1211 	struct counters *itlb;
1212 	struct counters *unhalt;
1213 	struct counters *four_k;
1214 	double un, d1, res, k;
1215 
1216 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1217 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1218 	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1219 	if (pos != -1) {
1220 		d1 = itlb->vals[pos] * 1.0;
1221 		un = unhalt->vals[pos] * 1.0;
1222 		k = four_k->vals[pos] * 1.0;
1223 	} else {
1224 		d1 = itlb->sum * 1.0;
1225 		un = unhalt->sum * 1.0;
1226 		k = four_k->sum * 1.0;
1227 	}
1228 	res = (7.0 * k + d1)/un;
1229 	ret = printf("%1.3f", res);
1230 	return(ret);
1231 }
1232 
1233 
1234 static int
1235 icache_miss(struct counters *cpu, int pos)
1236 {
1237 	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1238 
1239 	int ret;
1240 	struct counters *itlb, *icache;
1241 	struct counters *unhalt;
1242 	double un, d1, ic, res;
1243 
1244 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1245 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1246 	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1247 	if (pos != -1) {
1248 		d1 = itlb->vals[pos] * 1.0;
1249 		ic = icache->vals[pos] * 1.0;
1250 		un = unhalt->vals[pos] * 1.0;
1251 	} else {
1252 		d1 = itlb->sum * 1.0;
1253 		ic = icache->sum * 1.0;
1254 		un = unhalt->sum * 1.0;
1255 	}
1256 	res = (ic-d1)/un;
1257 	ret = printf("%1.3f", res);
1258 	return(ret);
1259 
1260 }
1261 
1262 static int
1263 icache_miss_has(struct counters *cpu, int pos)
1264 {
1265 	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1266 
1267 	int ret;
1268 	struct counters *icache;
1269 	struct counters *unhalt;
1270 	double un, con, ic, res;
1271 
1272 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273 	icache = find_counter(cpu, "ICACHE.MISSES");
1274 	con = 36.0;
1275 	if (pos != -1) {
1276 		ic = icache->vals[pos] * 1.0;
1277 		un = unhalt->vals[pos] * 1.0;
1278 	} else {
1279 		ic = icache->sum * 1.0;
1280 		un = unhalt->sum * 1.0;
1281 	}
1282 	res = (con * ic)/un;
1283 	ret = printf("%1.3f", res);
1284 	return(ret);
1285 
1286 }
1287 
1288 static int
1289 lcp_stall(struct counters *cpu, int pos)
1290 {
1291          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1292 	int ret;
1293 	struct counters *ild;
1294 	struct counters *unhalt;
1295 	double un, d1, res;
1296 
1297 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298 	ild = find_counter(cpu, "ILD_STALL.LCP");
1299 	if (pos != -1) {
1300 		d1 = ild->vals[pos] * 1.0;
1301 		un = unhalt->vals[pos] * 1.0;
1302 	} else {
1303 		d1 = ild->sum * 1.0;
1304 		un = unhalt->sum * 1.0;
1305 	}
1306 	res = d1/un;
1307 	ret = printf("%1.3f", res);
1308 	return(ret);
1309 
1310 }
1311 
1312 
1313 static int
1314 frontendstall(struct counters *cpu, int pos)
1315 {
1316       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1317 	int ret;
1318 	struct counters *idq;
1319 	struct counters *unhalt;
1320 	double con, un, id, res;
1321 
1322 	con = 4.0;
1323 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1324 	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1325 	if (pos != -1) {
1326 		id = idq->vals[pos] * 1.0;
1327 		un = unhalt->vals[pos] * 1.0;
1328 	} else {
1329 		id = idq->sum * 1.0;
1330 		un = unhalt->sum * 1.0;
1331 	}
1332 	res = id/(un * con);
1333 	ret = printf("%1.3f", res);
1334 	return(ret);
1335 }
1336 
1337 static int
1338 clears(struct counters *cpu, int pos)
1339 {
1340 	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1341 	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1342 
1343 	int ret;
1344 	struct counters *clr1, *clr2, *clr3;
1345 	struct counters *unhalt;
1346 	double con, un, cl1, cl2, cl3, res;
1347 
1348 	con = 100.0;
1349 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1350 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1351 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1352 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1353 
1354 	if (pos != -1) {
1355 		cl1 = clr1->vals[pos] * 1.0;
1356 		cl2 = clr2->vals[pos] * 1.0;
1357 		cl3 = clr3->vals[pos] * 1.0;
1358 		un = unhalt->vals[pos] * 1.0;
1359 	} else {
1360 		cl1 = clr1->sum * 1.0;
1361 		cl2 = clr2->sum * 1.0;
1362 		cl3 = clr3->sum * 1.0;
1363 		un = unhalt->sum * 1.0;
1364 	}
1365 	res = ((cl1 + cl2 + cl3) * con)/un;
1366 	ret = printf("%1.3f", res);
1367 	return(ret);
1368 }
1369 
1370 
1371 
1372 static int
1373 clears_broad(struct counters *cpu, int pos)
1374 {
1375 	int ret;
1376 	struct counters *clr1, *clr2, *clr3, *cyc;
1377 	struct counters *unhalt;
1378 	double con, un, cl1, cl2, cl3, cy, res;
1379 
1380 	con = 100.0;
1381 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1382 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1383 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1384 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1385 	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1386 	if (pos != -1) {
1387 		cl1 = clr1->vals[pos] * 1.0;
1388 		cl2 = clr2->vals[pos] * 1.0;
1389 		cl3 = clr3->vals[pos] * 1.0;
1390 		cy = cyc->vals[pos] * 1.0;
1391 		un = unhalt->vals[pos] * 1.0;
1392 	} else {
1393 		cl1 = clr1->sum * 1.0;
1394 		cl2 = clr2->sum * 1.0;
1395 		cl3 = clr3->sum * 1.0;
1396 		cy = cyc->sum * 1.0;
1397 		un = unhalt->sum * 1.0;
1398 	}
1399 	/* Formula not listed but extrapulated to add the cy ?? */
1400 	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1401 	ret = printf("%1.3f", res);
1402 	return(ret);
1403 }
1404 
1405 
1406 
1407 
1408 
1409 static int
1410 microassist(struct counters *cpu, int pos)
1411 {
1412 	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1413 	int ret;
1414 	struct counters *idq;
1415 	struct counters *unhalt;
1416 	double un, id, res, con;
1417 
1418 	con = 4.0;
1419 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1420 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1421 	if (pos != -1) {
1422 		id = idq->vals[pos] * 1.0;
1423 		un = unhalt->vals[pos] * 1.0;
1424 	} else {
1425 		id = idq->sum * 1.0;
1426 		un = unhalt->sum * 1.0;
1427 	}
1428 	res = id/(un * con);
1429 	ret = printf("%1.3f", res);
1430 	return(ret);
1431 }
1432 
1433 
1434 static int
1435 microassist_broad(struct counters *cpu, int pos)
1436 {
1437 	int ret;
1438 	struct counters *idq;
1439 	struct counters *unhalt;
1440 	struct counters *uopiss;
1441 	struct counters *uopret;
1442 	double un, id, res, con, uoi, uor;
1443 
1444 	con = 4.0;
1445 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1446 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1447 	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1448 	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1449 	if (pos != -1) {
1450 		id = idq->vals[pos] * 1.0;
1451 		un = unhalt->vals[pos] * 1.0;
1452 		uoi = uopiss->vals[pos] * 1.0;
1453 		uor = uopret->vals[pos] * 1.0;
1454 	} else {
1455 		id = idq->sum * 1.0;
1456 		un = unhalt->sum * 1.0;
1457 		uoi = uopiss->sum * 1.0;
1458 		uor = uopret->sum * 1.0;
1459 	}
1460 	res = (uor/uoi) * (id/(un * con));
1461 	ret = printf("%1.3f", res);
1462 	return(ret);
1463 }
1464 
1465 
1466 static int
1467 aliasing(struct counters *cpu, int pos)
1468 {
1469 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1470 	int ret;
1471 	struct counters *ld;
1472 	struct counters *unhalt;
1473 	double un, lds, con, res;
1474 
1475 	con = 5.0;
1476 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1477 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1478 	if (pos != -1) {
1479 		lds = ld->vals[pos] * 1.0;
1480 		un = unhalt->vals[pos] * 1.0;
1481 	} else {
1482 		lds = ld->sum * 1.0;
1483 		un = unhalt->sum * 1.0;
1484 	}
1485 	res = (lds * con)/un;
1486 	ret = printf("%1.3f", res);
1487 	return(ret);
1488 }
1489 
1490 static int
1491 aliasing_broad(struct counters *cpu, int pos)
1492 {
1493 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1494 	int ret;
1495 	struct counters *ld;
1496 	struct counters *unhalt;
1497 	double un, lds, con, res;
1498 
1499 	con = 7.0;
1500 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1501 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1502 	if (pos != -1) {
1503 		lds = ld->vals[pos] * 1.0;
1504 		un = unhalt->vals[pos] * 1.0;
1505 	} else {
1506 		lds = ld->sum * 1.0;
1507 		un = unhalt->sum * 1.0;
1508 	}
1509 	res = (lds * con)/un;
1510 	ret = printf("%1.3f", res);
1511 	return(ret);
1512 }
1513 
1514 
1515 static int
1516 fpassists(struct counters *cpu, int pos)
1517 {
1518 	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1519 	int ret;
1520 	struct counters *fp;
1521 	struct counters *inst;
1522 	double un, fpd, res;
1523 
1524 	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1525 	fp = find_counter(cpu, "FP_ASSIST.ANY");
1526 	if (pos != -1) {
1527 		fpd = fp->vals[pos] * 1.0;
1528 		un = inst->vals[pos] * 1.0;
1529 	} else {
1530 		fpd = fp->sum * 1.0;
1531 		un = inst->sum * 1.0;
1532 	}
1533 	res = fpd/un;
1534 	ret = printf("%1.3f", res);
1535 	return(ret);
1536 }
1537 
1538 static int
1539 otherassistavx(struct counters *cpu, int pos)
1540 {
1541 	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1542 	int ret;
1543 	struct counters *oth;
1544 	struct counters *unhalt;
1545 	double un, ot, con, res;
1546 
1547 	con = 75.0;
1548 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1549 	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1550 	if (pos != -1) {
1551 		ot = oth->vals[pos] * 1.0;
1552 		un = unhalt->vals[pos] * 1.0;
1553 	} else {
1554 		ot = oth->sum * 1.0;
1555 		un = unhalt->sum * 1.0;
1556 	}
1557 	res = (ot * con)/un;
1558 	ret = printf("%1.3f", res);
1559 	return(ret);
1560 }
1561 
1562 static int
1563 otherassistsse(struct counters *cpu, int pos)
1564 {
1565 
1566 	int ret;
1567 	struct counters *oth;
1568 	struct counters *unhalt;
1569 	double un, ot, con, res;
1570 
1571 	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1572 	con = 75.0;
1573 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1574 	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1575 	if (pos != -1) {
1576 		ot = oth->vals[pos] * 1.0;
1577 		un = unhalt->vals[pos] * 1.0;
1578 	} else {
1579 		ot = oth->sum * 1.0;
1580 		un = unhalt->sum * 1.0;
1581 	}
1582 	res = (ot * con)/un;
1583 	ret = printf("%1.3f", res);
1584 	return(ret);
1585 }
1586 
1587 static int
1588 efficiency1(struct counters *cpu, int pos)
1589 {
1590 
1591 	int ret;
1592 	struct counters *uops;
1593 	struct counters *unhalt;
1594 	double un, ot, con, res;
1595 
1596         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1597 	con = 4.0;
1598 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1599 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1600 	if (pos != -1) {
1601 		ot = uops->vals[pos] * 1.0;
1602 		un = unhalt->vals[pos] * 1.0;
1603 	} else {
1604 		ot = uops->sum * 1.0;
1605 		un = unhalt->sum * 1.0;
1606 	}
1607 	res = ot/(con * un);
1608 	ret = printf("%1.3f", res);
1609 	return(ret);
1610 }
1611 
1612 static int
1613 efficiency2(struct counters *cpu, int pos)
1614 {
1615 
1616 	int ret;
1617 	struct counters *uops;
1618 	struct counters *unhalt;
1619 	double un, ot, res;
1620 
1621         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1622 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1623 	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1624 	if (pos != -1) {
1625 		ot = uops->vals[pos] * 1.0;
1626 		un = unhalt->vals[pos] * 1.0;
1627 	} else {
1628 		ot = uops->sum * 1.0;
1629 		un = unhalt->sum * 1.0;
1630 	}
1631 	res = un/ot;
1632 	ret = printf("%1.3f", res);
1633 	return(ret);
1634 }
1635 
1636 #define SANDY_BRIDGE_COUNT 20
1637 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1638 /*01*/	{ "allocstall1", "thresh > .05",
1639 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1640 	  allocstall1, 2 },
1641 /* -- not defined for SB right (partial-rat_stalls) 02*/
1642         { "allocstall2", "thresh > .05",
1643 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1644 	  allocstall2, 2 },
1645 /*03*/	{ "br_miss", "thresh >= .2",
1646 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1647 	  br_mispredict, 2 },
1648 /*04*/	{ "splitload", "thresh >= .1",
1649 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1650 	  splitload_sb, 2 },
1651 /* 05*/	{ "splitstore", "thresh >= .01",
1652 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1653 	  splitstore_sb, 2 },
1654 /*06*/	{ "contested", "thresh >= .05",
1655 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1656 	  contested, 2 },
1657 /*07*/	{ "blockstorefwd", "thresh >= .05",
1658 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659 	  blockstoreforward, 2 },
1660 /*08*/	{ "cache2", "thresh >= .2",
1661 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662 	  cache2, 4 },
1663 /*09*/	{ "cache1", "thresh >= .2",
1664 	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665 	  cache1, 2 },
1666 /*10*/	{ "dtlbmissload", "thresh >= .1",
1667 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668 	  dtlb_missload, 3 },
1669 /*11*/	{ "dtlbmissstore", "thresh >= .05",
1670 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671 	  dtlb_missstore, 3 },
1672 /*12*/	{ "frontendstall", "thresh >= .15",
1673 	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674 	  frontendstall, 2 },
1675 /*13*/	{ "clears", "thresh >= .02",
1676 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677 	  clears, 4 },
1678 /*14*/	{ "microassist", "thresh >= .05",
1679 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680 	  microassist, 2 },
1681 /*15*/	{ "aliasing_4k", "thresh >= .1",
1682 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683 	  aliasing, 2 },
1684 /*16*/	{ "fpassist", "look for a excessive value",
1685 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1686 	  fpassists, 2 },
1687 /*17*/	{ "otherassistavx", "look for a excessive value",
1688 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1689 	  otherassistavx, 2},
1690 /*18*/	{ "otherassistsse", "look for a excessive value",
1691 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692 	  otherassistsse, 2 },
1693 /*19*/	{ "eff1", "thresh < .9",
1694 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695 	  efficiency1, 2 },
1696 /*20*/	{ "eff2", "thresh > 1.0",
1697 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698 	  efficiency2, 2 },
1699 };
1700 
1701 
1702 #define IVY_BRIDGE_COUNT 21
1703 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1704 /*1*/	{ "eff1", "thresh < .75",
1705 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1706 	  efficiency1, 2 },
1707 /*2*/	{ "eff2", "thresh > 1.0",
1708 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709 	  efficiency2, 2 },
1710 /*3*/	{ "itlbmiss", "thresh > .05",
1711 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712 	  itlb_miss, 2 },
1713 /*4*/	{ "icachemiss", "thresh > .05",
1714 	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715 	  icache_miss, 3 },
1716 /*5*/	{ "lcpstall", "thresh > .05",
1717 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718 	  lcp_stall, 2 },
1719 /*6*/	{ "cache1", "thresh >= .2",
1720 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721 	  cache1ib, 2 },
1722 /*7*/	{ "cache2", "thresh >= .2",
1723 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724 	  cache2ib, 2 },
1725 /*8*/	{ "contested", "thresh >= .05",
1726 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727 	  contested, 2 },
1728 /*9*/	{ "datashare", "thresh >= .05",
1729 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730 	  datasharing, 2 },
1731 /*10*/	{ "blockstorefwd", "thresh >= .05",
1732 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733 	  blockstoreforward, 2 },
1734 /*11*/	{ "splitload", "thresh >= .1",
1735 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1736 	  splitloadib, 4 },
1737 /*12*/	{ "splitstore", "thresh >= .01",
1738 	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1739 	  splitstore, 2 },
1740 /*13*/	{ "aliasing_4k", "thresh >= .1",
1741 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1742 	  aliasing, 2 },
1743 /*14*/	{ "dtlbmissload", "thresh >= .1",
1744 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745 	  dtlb_missload , 3},
1746 /*15*/	{ "dtlbmissstore", "thresh >= .05",
1747 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748 	  dtlb_missstore, 3 },
1749 /*16*/	{ "br_miss", "thresh >= .2",
1750 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1751 	  br_mispredictib, 8 },
1752 /*17*/	{ "clears", "thresh >= .02",
1753 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1754 	  clears, 4 },
1755 /*18*/	{ "microassist", "thresh >= .05",
1756 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757 	  microassist, 2 },
1758 /*19*/	{ "fpassist", "look for a excessive value",
1759 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1760 	  fpassists, 2 },
1761 /*20*/	{ "otherassistavx", "look for a excessive value",
1762 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1763 	  otherassistavx , 2},
1764 /*21*/	{ "otherassistsse", "look for a excessive value",
1765 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766 	  otherassistsse, 2 },
1767 };
1768 
1769 #define HASWELL_COUNT 20
1770 static struct cpu_entry haswell[HASWELL_COUNT] = {
1771 /*1*/	{ "eff1", "thresh < .75",
1772 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1773 	  efficiency1, 2 },
1774 /*2*/	{ "eff2", "thresh > 1.0",
1775 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776 	  efficiency2, 2 },
1777 /*3*/	{ "itlbmiss", "thresh > .05",
1778 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779 	  itlb_miss, 2 },
1780 /*4*/	{ "icachemiss", "thresh > .05",
1781 	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782 	  icache_miss_has, 2 },
1783 /*5*/	{ "lcpstall", "thresh > .05",
1784 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785 	  lcp_stall, 2 },
1786 /*6*/	{ "cache1", "thresh >= .2",
1787 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788 	  cache1ib, 2 },
1789 /*7*/	{ "cache2", "thresh >= .2",
1790 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791 	  cache2has, 4 },
1792 /*8*/	{ "contested", "thresh >= .05",
1793 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794 	  contested_has, 2 },
1795 /*9*/	{ "datashare", "thresh >= .05",
1796 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797 	  datasharing_has, 2 },
1798 /*10*/	{ "blockstorefwd", "thresh >= .05",
1799 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800 	  blockstoreforward, 2 },
1801 /*11*/	{ "splitload", "thresh >= .1",
1802 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1803 	  splitload , 2},
1804 /*12*/	{ "splitstore", "thresh >= .01",
1805 	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1806 	  splitstore, 2 },
1807 /*13*/	{ "aliasing_4k", "thresh >= .1",
1808 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1809 	  aliasing, 2 },
1810 /*14*/	{ "dtlbmissload", "thresh >= .1",
1811 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812 	  dtlb_missload, 3 },
1813 /*15*/	{ "br_miss", "thresh >= .2",
1814 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1815 	  br_mispredict, 2 },
1816 /*16*/	{ "clears", "thresh >= .02",
1817 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1818 	  clears, 4 },
1819 /*17*/	{ "microassist", "thresh >= .05",
1820 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821 	  microassist, 2 },
1822 /*18*/	{ "fpassist", "look for a excessive value",
1823 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1824 	  fpassists, 2 },
1825 /*19*/	{ "otherassistavx", "look for a excessive value",
1826 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1827 	  otherassistavx, 2 },
1828 /*20*/	{ "otherassistsse", "look for a excessive value",
1829 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830 	  otherassistsse, 2 },
1831 };
1832 
1833 
1834 static void
1835 explain_name_broad(const char *name)
1836 {
1837 	const char *mythresh;
1838 	if (strcmp(name, "eff1") == 0) {
1839 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1840 		mythresh = "thresh < .75";
1841 	} else if (strcmp(name, "eff2") == 0) {
1842 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1843 		mythresh = "thresh > 1.0";
1844 	} else if (strcmp(name, "itlbmiss") == 0) {
1845 		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1846 		mythresh = "thresh > .05";
1847 	} else if (strcmp(name, "icachemiss") == 0) {
1848 		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1849 		mythresh = "thresh > .05";
1850 	} else if (strcmp(name, "lcpstall") == 0) {
1851 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1852 		mythresh = "thresh > .05";
1853 	} else if (strcmp(name, "cache1") == 0) {
1854 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1855 		mythresh = "thresh >= .1";
1856 	} else if (strcmp(name, "cache2") == 0) {
1857 		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1858 		mythresh = "thresh >= .2";
1859 	} else if (strcmp(name, "contested") == 0) {
1860 		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1861 		mythresh = "thresh >= .05";
1862 	} else if (strcmp(name, "datashare") == 0) {
1863 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1864 		mythresh = "thresh > .05";
1865 	} else if (strcmp(name, "blockstorefwd") == 0) {
1866 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1867 		mythresh = "thresh >= .05";
1868 	} else if (strcmp(name, "aliasing_4k") == 0) {
1869 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1870 		mythresh = "thresh >= .1";
1871 	} else if (strcmp(name, "dtlbmissload") == 0) {
1872 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1873 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1874 		mythresh = "thresh >= .1";
1875 
1876 	} else if (strcmp(name, "br_miss") == 0) {
1877 		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1878 		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1879 		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1880 		mythresh = "thresh >= .2";
1881 	} else if (strcmp(name, "clears") == 0) {
1882 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1883 		printf("          MACHINE_CLEARS.SMC + \n");
1884 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1885 		mythresh = "thresh >= .02";
1886 	} else if (strcmp(name, "fpassist") == 0) {
1887 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1888 		mythresh = "look for a excessive value";
1889 	} else if (strcmp(name, "otherassistavx") == 0) {
1890 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1891 		mythresh = "look for a excessive value";
1892 	} else if (strcmp(name, "microassist") == 0) {
1893 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1894 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1895 		mythresh = "thresh >= .05";
1896 	} else {
1897 		printf("Unknown name:%s\n", name);
1898 		mythresh = "unknown entry";
1899         }
1900 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1901 }
1902 
1903 
1904 #define BROADWELL_COUNT 17
1905 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1906 /*1*/	{ "eff1", "thresh < .75",
1907 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1908 	  efficiency1, 2 },
1909 /*2*/	{ "eff2", "thresh > 1.0",
1910 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911 	  efficiency2, 2 },
1912 /*3*/	{ "itlbmiss", "thresh > .05",
1913 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1914 	  itlb_miss_broad, 3 },
1915 /*4*/	{ "icachemiss", "thresh > .05",
1916 	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1917 	  icache_miss_has, 2 },
1918 /*5*/	{ "lcpstall", "thresh > .05",
1919 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920 	  lcp_stall, 2 },
1921 /*6*/	{ "cache1", "thresh >= .1",
1922 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923 	  cache1broad, 2 },
1924 /*7*/	{ "cache2", "thresh >= .2",
1925 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926 	  cache2broad, 2 },
1927 /*8*/	{ "contested", "thresh >= .05",
1928 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1929 	  contestedbroad, 2 },
1930 /*9*/	{ "datashare", "thresh >= .05",
1931 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1932 	  datasharing_has, 2 },
1933 /*10*/	{ "blockstorefwd", "thresh >= .05",
1934 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935 	  blockstoreforward, 2 },
1936 /*11*/	{ "aliasing_4k", "thresh >= .1",
1937 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938 	  aliasing_broad, 2 },
1939 /*12*/	{ "dtlbmissload", "thresh >= .1",
1940 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941 	  dtlb_missload, 3 },
1942 /*13*/	{ "br_miss", "thresh >= .2",
1943 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1944 	  br_mispredict_broad, 7 },
1945 /*14*/	{ "clears", "thresh >= .02",
1946 	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1947 	  clears_broad, 5 },
1948 /*15*/	{ "fpassist", "look for a excessive value",
1949 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1950 	  fpassists, 2 },
1951 /*16*/	{ "otherassistavx", "look for a excessive value",
1952 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1953 	  otherassistavx, 2 },
1954 /*17*/	{ "microassist", "thresh >= .2",
1955 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1956 	  microassist_broad, 4 },
1957 };
1958 
1959 
1960 static void
1961 set_sandybridge(void)
1962 {
1963 	strcpy(the_cpu.cputype, "SandyBridge PMC");
1964 	the_cpu.number = SANDY_BRIDGE_COUNT;
1965 	the_cpu.ents = sandy_bridge;
1966 	the_cpu.explain = explain_name_sb;
1967 }
1968 
1969 static void
1970 set_ivybridge(void)
1971 {
1972 	strcpy(the_cpu.cputype, "IvyBridge PMC");
1973 	the_cpu.number = IVY_BRIDGE_COUNT;
1974 	the_cpu.ents = ivy_bridge;
1975 	the_cpu.explain = explain_name_ib;
1976 }
1977 
1978 
1979 static void
1980 set_haswell(void)
1981 {
1982 	strcpy(the_cpu.cputype, "HASWELL PMC");
1983 	the_cpu.number = HASWELL_COUNT;
1984 	the_cpu.ents = haswell;
1985 	the_cpu.explain = explain_name_has;
1986 }
1987 
1988 
1989 static void
1990 set_broadwell(void)
1991 {
1992 	strcpy(the_cpu.cputype, "HASWELL PMC");
1993 	the_cpu.number = BROADWELL_COUNT;
1994 	the_cpu.ents = broadwell;
1995 	the_cpu.explain = explain_name_broad;
1996 }
1997 
1998 
1999 static int
2000 set_expression(const char *name)
2001 {
2002 	int found = 0, i;
2003 	for(i=0 ; i< the_cpu.number; i++) {
2004 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2005 			found = 1;
2006 			expression = the_cpu.ents[i].func;
2007 			command = the_cpu.ents[i].command;
2008 			threshold = the_cpu.ents[i].thresh;
2009 			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2010 				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2011 				       the_cpu.ents[i].name,
2012 				       the_cpu.ents[i].counters_required, max_pmc_counters);
2013 				printf("Sorry this test can not be run\n");
2014 				if (run_all == 0) {
2015 					exit(-1);
2016 				} else {
2017 					return(-1);
2018 				}
2019 			}
2020 			break;
2021 		}
2022 	}
2023 	if (!found) {
2024 		printf("For CPU type %s we have no expression:%s\n",
2025 		       the_cpu.cputype, name);
2026 		exit(-1);
2027 	}
2028 	return(0);
2029 }
2030 
2031 
2032 
2033 
2034 
2035 static int
2036 validate_expression(char *name)
2037 {
2038 	int i, found;
2039 
2040 	found = 0;
2041 	for(i=0 ; i< the_cpu.number; i++) {
2042 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2043 			found = 1;
2044 			break;
2045 		}
2046 	}
2047 	if (!found) {
2048 		return(-1);
2049 	}
2050 	return (0);
2051 }
2052 
2053 static void
2054 do_expression(struct counters *cpu, int pos)
2055 {
2056 	if (expression == NULL)
2057 		return;
2058 	(*expression)(cpu, pos);
2059 }
2060 
2061 static void
2062 process_header(int idx, char *p)
2063 {
2064 	struct counters *up;
2065 	int i, len, nlen;
2066 	/*
2067 	 * Given header element idx, at p in
2068 	 * form 's/NN/nameof'
2069 	 * process the entry to pull out the name and
2070 	 * the CPU number.
2071 	 */
2072 	if (strncmp(p, "s/", 2)) {
2073 		printf("Check -- invalid header no s/ in %s\n",
2074 		       p);
2075 		return;
2076 	}
2077 	up = &cnts[idx];
2078 	up->cpu = strtol(&p[2], NULL, 10);
2079 	len = strlen(p);
2080 	for (i=2; i<len; i++) {
2081 		if (p[i] == '/') {
2082 			nlen = strlen(&p[(i+1)]);
2083 			if (nlen < (MAX_NLEN-1)) {
2084 				strcpy(up->counter_name, &p[(i+1)]);
2085 			} else {
2086 				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2087 			}
2088 		}
2089 	}
2090 }
2091 
2092 static void
2093 build_counters_from_header(FILE *io)
2094 {
2095 	char buffer[8192], *p;
2096 	int i, len, cnt;
2097 	size_t mlen;
2098 
2099 	/* We have a new start, lets
2100 	 * setup our headers and cpus.
2101 	 */
2102 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2103 		printf("First line can't be read from file err:%d\n", errno);
2104 		return;
2105 	}
2106 	/*
2107 	 * Ok output is an array of counters. Once
2108 	 * we start to read the values in we must
2109 	 * put them in there slot to match there CPU and
2110 	 * counter being updated. We create a mass array
2111 	 * of the counters, filling in the CPU and
2112 	 * counter name.
2113 	 */
2114 	/* How many do we get? */
2115 	len = strlen(buffer);
2116 	for (i=0, cnt=0; i<len; i++) {
2117 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2118 			cnt++;
2119 			for(;i<len;i++) {
2120 				if (buffer[i] == ' ')
2121 					break;
2122 			}
2123 		}
2124 	}
2125 	mlen = sizeof(struct counters) * cnt;
2126 	cnts = malloc(mlen);
2127 	ncnts = cnt;
2128 	if (cnts == NULL) {
2129 		printf("No memory err:%d\n", errno);
2130 		return;
2131 	}
2132 	memset(cnts, 0, mlen);
2133 	for (i=0, cnt=0; i<len; i++) {
2134 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2135 			p = &buffer[i];
2136 			for(;i<len;i++) {
2137 				if (buffer[i] == ' ') {
2138 					buffer[i] = 0;
2139 					break;
2140 				}
2141 			}
2142 			process_header(cnt, p);
2143 			cnt++;
2144 		}
2145 	}
2146 	if (verbose)
2147 		printf("We have %d entries\n", cnt);
2148 }
2149 extern int max_to_collect;
2150 int max_to_collect = MAX_COUNTER_SLOTS;
2151 
2152 static int
2153 read_a_line(FILE *io)
2154 {
2155 	char buffer[8192], *p, *stop;
2156 	int pos, i;
2157 
2158 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2159 		return(0);
2160 	}
2161 	p = buffer;
2162 	for (i=0; i<ncnts; i++) {
2163 		pos = cnts[i].pos;
2164 		cnts[i].vals[pos] = strtol(p, &stop, 0);
2165 		cnts[i].pos++;
2166 		cnts[i].sum += cnts[i].vals[pos];
2167 		p = stop;
2168 	}
2169 	return (1);
2170 }
2171 
2172 extern int cpu_count_out;
2173 int cpu_count_out=0;
2174 
2175 static void
2176 print_header(void)
2177 {
2178 	int i, cnt, printed_cnt;
2179 
2180 	printf("*********************************\n");
2181 	for(i=0, cnt=0; i<MAX_CPU; i++) {
2182 		if (glob_cpu[i]) {
2183 			cnt++;
2184 		}
2185 	}
2186 	cpu_count_out = cnt;
2187 	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2188 		if (glob_cpu[i]) {
2189 			printf("CPU%d", i);
2190 			printed_cnt++;
2191 		}
2192 		if (printed_cnt == cnt) {
2193 			printf("\n");
2194 			break;
2195 		} else {
2196 			printf("\t");
2197 		}
2198 	}
2199 }
2200 
2201 static void
2202 lace_cpus_together(void)
2203 {
2204 	int i, j, lace_cpu;
2205 	struct counters *cpat, *at;
2206 
2207 	for(i=0; i<ncnts; i++) {
2208 		cpat = &cnts[i];
2209 		if (cpat->next_cpu) {
2210 			/* Already laced in */
2211 			continue;
2212 		}
2213 		lace_cpu = cpat->cpu;
2214 		if (lace_cpu >= MAX_CPU) {
2215 			printf("CPU %d to big\n", lace_cpu);
2216 			continue;
2217 		}
2218 		if (glob_cpu[lace_cpu] == NULL) {
2219 			glob_cpu[lace_cpu] = cpat;
2220 		} else {
2221 			/* Already processed this cpu */
2222 			continue;
2223 		}
2224 		/* Ok look forward for cpu->cpu and link in */
2225 		for(j=(i+1); j<ncnts; j++) {
2226 			at = &cnts[j];
2227 			if (at->next_cpu) {
2228 				continue;
2229 			}
2230 			if (at->cpu == lace_cpu) {
2231 				/* Found one */
2232 				cpat->next_cpu = at;
2233 				cpat = at;
2234 			}
2235 		}
2236 	}
2237 }
2238 
2239 
2240 static void
2241 process_file(char *filename)
2242 {
2243 	FILE *io;
2244 	int i;
2245 	int line_at, not_done;
2246 	pid_t pid_of_command=0;
2247 
2248 	if (filename ==  NULL) {
2249 		io = my_popen(command, "r", &pid_of_command);
2250 		if (io == NULL) {
2251 			printf("Can't popen the command %s\n", command);
2252 			return;
2253 		}
2254 	} else {
2255 		io = fopen(filename, "r");
2256 		if (io == NULL) {
2257 			printf("Can't process file %s err:%d\n",
2258 			       filename, errno);
2259 			return;
2260 		}
2261 	}
2262 	build_counters_from_header(io);
2263 	if (cnts == NULL) {
2264 		/* Nothing we can do */
2265 		printf("Nothing to do -- no counters built\n");
2266 		if (filename) {
2267 			fclose(io);
2268 		} else {
2269 			my_pclose(io, pid_of_command);
2270 		}
2271 		return;
2272 	}
2273 	lace_cpus_together();
2274 	print_header();
2275 	if (verbose) {
2276 		for (i=0; i<ncnts; i++) {
2277 			printf("Counter:%s cpu:%d index:%d\n",
2278 			       cnts[i].counter_name,
2279 			       cnts[i].cpu, i);
2280 		}
2281 	}
2282 	line_at = 0;
2283 	not_done = 1;
2284 	while(not_done) {
2285 		if (read_a_line(io)) {
2286 			line_at++;
2287 		} else {
2288 			break;
2289 		}
2290 		if (line_at >= max_to_collect) {
2291 			not_done = 0;
2292 		}
2293 		if (filename == NULL) {
2294 			int cnt;
2295 			/* For the ones we dynamically open we print now */
2296 			for(i=0, cnt=0; i<MAX_CPU; i++) {
2297 				do_expression(glob_cpu[i], (line_at-1));
2298 				cnt++;
2299 				if (cnt == cpu_count_out) {
2300 					printf("\n");
2301 					break;
2302 				} else {
2303 					printf("\t");
2304 				}
2305 			}
2306 		}
2307 	}
2308 	if (filename) {
2309 		fclose(io);
2310 	} else {
2311 		my_pclose(io, pid_of_command);
2312 	}
2313 }
2314 #if defined(__amd64__)
2315 #define cpuid(in,a,b,c,d)\
2316   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2317 
2318 static __inline void
2319 do_cpuid(u_int ax, u_int cx, u_int *p)
2320 {
2321 	__asm __volatile("cpuid"
2322 			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2323 			 :  "0" (ax), "c" (cx) );
2324 }
2325 
2326 #else
2327 #define cpuid(in, a, b, c, d)
2328 #define do_cpuid(ax, cx, p)
2329 #endif
2330 
2331 static void
2332 get_cpuid_set(void)
2333 {
2334 	unsigned long eax, ebx, ecx, edx;
2335 	int model;
2336 	pid_t pid_of_command=0;
2337 	size_t sz, len;
2338 	FILE *io;
2339 	char linebuf[1024], *str;
2340 	u_int reg[4];
2341 
2342 	eax = ebx = ecx = edx = 0;
2343 
2344 	cpuid(0, eax, ebx, ecx, edx);
2345 	if (ebx == 0x68747541) {
2346 		printf("AMD processors are not supported by this program\n");
2347 		printf("Sorry\n");
2348 		exit(0);
2349 	} else if (ebx == 0x6972794) {
2350 		printf("Cyrix processors are not supported by this program\n");
2351 		printf("Sorry\n");
2352 		exit(0);
2353 	} else if (ebx == 0x756e6547) {
2354 		printf("Genuine Intel\n");
2355 	} else {
2356 		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2357 		exit(0);
2358 	}
2359 	cpuid(1, eax, ebx, ecx, edx);
2360 	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2361 	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2362 	switch (eax & 0xF00) {
2363 	case 0x500:		/* Pentium family processors */
2364 		printf("Intel Pentium P5\n");
2365 		goto not_supported;
2366 		break;
2367 	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2368 		switch (model) {
2369 		case 0x1:
2370 			printf("Intel Pentium P6\n");
2371 			goto not_supported;
2372 			break;
2373 		case 0x3:
2374 		case 0x5:
2375 			printf("Intel PII\n");
2376 			goto not_supported;
2377 			break;
2378 		case 0x6: case 0x16:
2379 			printf("Intel CL\n");
2380 			goto not_supported;
2381 			break;
2382 		case 0x7: case 0x8: case 0xA: case 0xB:
2383 			printf("Intel PIII\n");
2384 			goto not_supported;
2385 			break;
2386 		case 0x9: case 0xD:
2387 			printf("Intel PM\n");
2388 			goto not_supported;
2389 			break;
2390 		case 0xE:
2391 			printf("Intel CORE\n");
2392 			goto not_supported;
2393 			break;
2394 		case 0xF:
2395 			printf("Intel CORE2\n");
2396 			goto not_supported;
2397 			break;
2398 		case 0x17:
2399 			printf("Intel CORE2EXTREME\n");
2400 			goto not_supported;
2401 			break;
2402 		case 0x1C:	/* Per Intel document 320047-002. */
2403 			printf("Intel ATOM\n");
2404 			goto not_supported;
2405 			break;
2406 		case 0x1A:
2407 		case 0x1E:	/*
2408 				 * Per Intel document 253669-032 9/2009,
2409 				 * pages A-2 and A-57
2410 				 */
2411 		case 0x1F:	/*
2412 				 * Per Intel document 253669-032 9/2009,
2413 				 * pages A-2 and A-57
2414 				 */
2415 			printf("Intel COREI7\n");
2416 			goto not_supported;
2417 			break;
2418 		case 0x2E:
2419 			printf("Intel NEHALEM\n");
2420 			goto not_supported;
2421 			break;
2422 		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2423 		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2424 			printf("Intel WESTMERE\n");
2425 			goto not_supported;
2426 			break;
2427 		case 0x2F:	/* Westmere-EX, seen in wild */
2428 			printf("Intel WESTMERE\n");
2429 			goto not_supported;
2430 			break;
2431 		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2432 			printf("Intel SANDYBRIDGE\n");
2433 			set_sandybridge();
2434 			break;
2435 		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2436 			printf("Intel SANDYBRIDGE_XEON\n");
2437 			set_sandybridge();
2438 			break;
2439 		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2440 			printf("Intel IVYBRIDGE\n");
2441 			set_ivybridge();
2442 			break;
2443 		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2444 			printf("Intel IVYBRIDGE_XEON\n");
2445 			set_ivybridge();
2446 			break;
2447 		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2448 			printf("Intel HASWELL (Xeon)\n");
2449 			set_haswell();
2450 			break;
2451 		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2452 		case 0x45:
2453 		case 0x46:
2454 			printf("Intel HASWELL\n");
2455 			set_haswell();
2456 			break;
2457 
2458 		case 0x4e:
2459 		case 0x5e:
2460 			printf("Intel SKY-LAKE\n");
2461 			goto not_supported;
2462 			break;
2463 		case 0x3D:
2464 		case 0x47:
2465 			printf("Intel BROADWELL\n");
2466 			set_broadwell();
2467 			break;
2468 		case 0x4f:
2469 		case 0x56:
2470 			printf("Intel BROADWEL (Xeon)\n");
2471 			set_broadwell();
2472 			break;
2473 
2474 		case 0x4D:
2475 			/* Per Intel document 330061-001 01/2014. */
2476 			printf("Intel ATOM_SILVERMONT\n");
2477 			goto not_supported;
2478 			break;
2479 		default:
2480 			printf("Intel model 0x%x is not known -- sorry\n",
2481 			       model);
2482 			goto not_supported;
2483 			break;
2484 		}
2485 		break;
2486 	case 0xF00:		/* P4 */
2487 		printf("Intel unknown model %d\n", model);
2488 		goto not_supported;
2489 		break;
2490 	}
2491 	do_cpuid(0xa, 0, reg);
2492 	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2493 	printf("We have %d PMC counters to work with\n", max_pmc_counters);
2494 	/* Ok lets load the list of all known PMC's */
2495 	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2496 	if (valid_pmcs == NULL) {
2497 		/* Likely */
2498 		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2499 		sz = sizeof(char *) * pmc_allocated_cnt;
2500 		valid_pmcs = malloc(sz);
2501 		if (valid_pmcs == NULL) {
2502 			printf("No memory allocation fails at startup?\n");
2503 			exit(-1);
2504 		}
2505 		memset(valid_pmcs, 0, sz);
2506 	}
2507 
2508 	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2509 		if (linebuf[0] != '\t') {
2510 			/* sometimes headers ;-) */
2511 			continue;
2512 		}
2513 		len = strlen(linebuf);
2514 		if (linebuf[(len-1)] == '\n') {
2515 			/* Likely */
2516 			linebuf[(len-1)] = 0;
2517 		}
2518 		str = &linebuf[1];
2519 		len = strlen(str) + 1;
2520 		valid_pmcs[valid_pmc_cnt] = malloc(len);
2521 		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2522 			printf("No memory2 allocation fails at startup?\n");
2523 			exit(-1);
2524 		}
2525 		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2526 		strcpy(valid_pmcs[valid_pmc_cnt], str);
2527 		valid_pmc_cnt++;
2528 		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2529 			/* Got to expand -- unlikely */
2530 			char **more;
2531 
2532 			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2533 			more = malloc(sz);
2534 			if (more == NULL) {
2535 				printf("No memory3 allocation fails at startup?\n");
2536 				exit(-1);
2537 			}
2538 			memset(more, 0, sz);
2539 			memcpy(more, valid_pmcs, sz);
2540 			pmc_allocated_cnt *= 2;
2541 			free(valid_pmcs);
2542 			valid_pmcs = more;
2543 		}
2544 	}
2545 	my_pclose(io, pid_of_command);
2546 	return;
2547 not_supported:
2548 	printf("Not supported\n");
2549 	exit(-1);
2550 }
2551 
2552 static void
2553 explain_all(void)
2554 {
2555 	int i;
2556 	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2557 	printf("-------------------------------------------------------------\n");
2558 	for(i=0; i<the_cpu.number; i++){
2559 		printf("For -e %s ", the_cpu.ents[i].name);
2560 		(*the_cpu.explain)(the_cpu.ents[i].name);
2561 		printf("----------------------------\n");
2562 	}
2563 }
2564 
2565 static void
2566 test_for_a_pmc(const char *pmc, int out_so_far)
2567 {
2568 	FILE *io;
2569 	pid_t pid_of_command=0;
2570 	char my_command[1024];
2571 	char line[1024];
2572 	char resp[1024];
2573 	int len, llen, i;
2574 
2575 	if (out_so_far < 50) {
2576 		len = 50 - out_so_far;
2577 		for(i=0; i<len; i++) {
2578 			printf(" ");
2579 		}
2580 	}
2581 	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2582 	io = my_popen(my_command, "r", &pid_of_command);
2583 	if (io == NULL) {
2584 		printf("Failed -- popen fails\n");
2585 		return;
2586 	}
2587 	/* Setup what we expect */
2588 	len = sprintf(resp, "%s", pmc);
2589 	if (fgets(line, sizeof(line), io) == NULL) {
2590 		printf("Failed -- no output from pmstat\n");
2591 		goto out;
2592 	}
2593 	llen = strlen(line);
2594 	if (line[(llen-1)] == '\n') {
2595 		line[(llen-1)] = 0;
2596 		llen--;
2597 	}
2598 	for(i=2; i<(llen-len); i++) {
2599 		if (strncmp(&line[i], "ERROR", 5) == 0) {
2600 			printf("Failed %s\n", line);
2601 			goto out;
2602 		} else if (strncmp(&line[i], resp, len) == 0) {
2603 			int j, k;
2604 
2605 			if (fgets(line, sizeof(line), io) == NULL) {
2606 				printf("Failed -- no second output from pmstat\n");
2607 				goto out;
2608 			}
2609 			len = strlen(line);
2610 			for (j=0; j<len; j++) {
2611 				if (line[j] == ' ') {
2612 					j++;
2613 				} else {
2614 					break;
2615 				}
2616 			}
2617 			printf("Pass");
2618 			len = strlen(&line[j]);
2619 			if (len < 20) {
2620 				for(k=0; k<(20-len); k++) {
2621 					printf(" ");
2622 				}
2623 			}
2624 			if (len) {
2625 				printf("%s", &line[j]);
2626 			} else {
2627 				printf("\n");
2628 			}
2629 			goto out;
2630 		}
2631 	}
2632 	printf("Failed -- '%s' not '%s'\n", line, resp);
2633 out:
2634 	my_pclose(io, pid_of_command);
2635 
2636 }
2637 
2638 static int
2639 add_it_to(char **vars, int cur_cnt, char *name)
2640 {
2641 	int i;
2642 	size_t len;
2643 	for(i=0; i<cur_cnt; i++) {
2644 		if (strcmp(vars[i], name) == 0) {
2645 			/* Already have */
2646 			return(0);
2647 		}
2648 	}
2649 	if (vars[cur_cnt] != NULL) {
2650 		printf("Cur_cnt:%d filled with %s??\n",
2651 		       cur_cnt, vars[cur_cnt]);
2652 		exit(-1);
2653 	}
2654 	/* Ok its new */
2655 	len = strlen(name) + 1;
2656 	vars[cur_cnt] = malloc(len);
2657 	if (vars[cur_cnt] == NULL) {
2658 		printf("No memory %s\n", __FUNCTION__);
2659 		exit(-1);
2660 	}
2661 	memset(vars[cur_cnt], 0, len);
2662 	strcpy(vars[cur_cnt], name);
2663 	return(1);
2664 }
2665 
2666 static char *
2667 build_command_for_exp(struct expression *exp)
2668 {
2669 	/*
2670 	 * Build the pmcstat command to handle
2671 	 * the passed in expression.
2672 	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2673 	 * where NNN and QQQ represent the PMC's in the expression
2674 	 * uniquely..
2675 	 */
2676 	char forming[1024];
2677 	int cnt_pmc, alloced_pmcs, i;
2678 	struct expression *at;
2679 	char **vars, *cmd;
2680 	size_t mal;
2681 
2682 	alloced_pmcs = cnt_pmc = 0;
2683 	/* first how many do we have */
2684 	at = exp;
2685 	while (at) {
2686 		if (at->type == TYPE_VALUE_PMC) {
2687 			cnt_pmc++;
2688 		}
2689 		at = at->next;
2690 	}
2691 	if (cnt_pmc == 0) {
2692 		printf("No PMC's in your expression -- nothing to do!!\n");
2693 		exit(0);
2694 	}
2695 	mal = cnt_pmc * sizeof(char *);
2696 	vars = malloc(mal);
2697 	if (vars == NULL) {
2698 		printf("No memory\n");
2699 		exit(-1);
2700 	}
2701 	memset(vars, 0, mal);
2702 	at = exp;
2703 	while (at) {
2704 		if (at->type == TYPE_VALUE_PMC) {
2705 			if(add_it_to(vars, alloced_pmcs, at->name)) {
2706 				alloced_pmcs++;
2707 			}
2708 		}
2709 		at = at->next;
2710 	}
2711 	/* Now we have a unique list in vars so create our command */
2712 	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2713 	for(i=0; i<alloced_pmcs; i++) {
2714 		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2715 	}
2716 	cmd = malloc((mal+2));
2717 	if (cmd == NULL) {
2718 		printf("%s out of mem\n", __FUNCTION__);
2719 		exit(-1);
2720 	}
2721 	memset(cmd, 0, (mal+2));
2722 	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2723 	at = exp;
2724 	for(i=0; i<alloced_pmcs; i++) {
2725 		sprintf(forming, " -s %s", vars[i]);
2726 		strcat(cmd, forming);
2727 		free(vars[i]);
2728 		vars[i] = NULL;
2729 	}
2730 	free(vars);
2731 	return(cmd);
2732 }
2733 
2734 static int
2735 user_expr(struct counters *cpu, int pos)
2736 {
2737 	int ret;
2738 	double res;
2739 	struct counters *var;
2740 	struct expression *at;
2741 
2742 	at = master_exp;
2743 	while (at) {
2744 		if (at->type == TYPE_VALUE_PMC) {
2745 			var = find_counter(cpu, at->name);
2746 			if (var == NULL) {
2747 				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2748 				exit(-1);
2749 			}
2750 			if (pos != -1) {
2751 				at->value = var->vals[pos] * 1.0;
2752 			} else {
2753 				at->value = var->sum * 1.0;
2754 			}
2755 		}
2756 		at = at->next;
2757 	}
2758 	res = run_expr(master_exp, 1, NULL);
2759 	ret = printf("%1.3f", res);
2760 	return(ret);
2761 }
2762 
2763 
2764 static void
2765 set_manual_exp(struct expression *exp)
2766 {
2767 	expression = user_expr;
2768 	command = build_command_for_exp(exp);
2769 	threshold = "User defined threshold";
2770 }
2771 
2772 static void
2773 run_tests(void)
2774 {
2775 	int i, lenout;
2776 	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2777 	printf("------------------------------------------------------------------------\n");
2778 	for(i=0; i<valid_pmc_cnt; i++) {
2779 		lenout = printf("%s", valid_pmcs[i]);
2780 		fflush(stdout);
2781 		test_for_a_pmc(valid_pmcs[i], lenout);
2782 	}
2783 }
2784 static void
2785 list_all(void)
2786 {
2787 	int i, cnt, j;
2788 	printf("PMC                                               Abbreviation\n");
2789 	printf("--------------------------------------------------------------\n");
2790 	for(i=0; i<valid_pmc_cnt; i++) {
2791 		cnt = printf("%s", valid_pmcs[i]);
2792 		for(j=cnt; j<52; j++) {
2793 			printf(" ");
2794 		}
2795 		printf("%%%d\n", i);
2796 	}
2797 }
2798 
2799 
2800 int
2801 main(int argc, char **argv)
2802 {
2803 	int i, j, cnt;
2804 	char *filename=NULL;
2805 	const char *name=NULL;
2806 	int help_only = 0;
2807 	int test_mode = 0;
2808 	int test_at = 0;
2809 
2810 	get_cpuid_set();
2811 	memset(glob_cpu, 0, sizeof(glob_cpu));
2812 	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2813 		switch (i) {
2814 		case 'A':
2815 			run_all = 1;
2816 			break;
2817 		case 'L':
2818 			list_all();
2819 			return(0);
2820 		case 'H':
2821 			printf("**********************************\n");
2822 			explain_all();
2823 			printf("**********************************\n");
2824 			return(0);
2825 			break;
2826 		case 'T':
2827 			test_mode = 1;
2828 			break;
2829 		case 'E':
2830 			master_exp = parse_expression(optarg);
2831 			if (master_exp) {
2832 				set_manual_exp(master_exp);
2833 			}
2834 			break;
2835 		case 'e':
2836 			if (validate_expression(optarg)) {
2837 				printf("Unknown expression %s\n", optarg);
2838 				return(0);
2839 			}
2840 			name = optarg;
2841 			set_expression(optarg);
2842 			break;
2843 		case 'm':
2844 			max_to_collect = strtol(optarg, NULL, 0);
2845 			if (max_to_collect > MAX_COUNTER_SLOTS) {
2846 				/* You can't collect more than max in array */
2847 				max_to_collect = MAX_COUNTER_SLOTS;
2848 			}
2849 			break;
2850 		case 'v':
2851 			verbose++;
2852 			break;
2853 		case 'h':
2854 			help_only = 1;
2855 			break;
2856 		case 'i':
2857 			filename = optarg;
2858 			break;
2859 		case '?':
2860 		default:
2861 		use:
2862 			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2863 			       argv[0]);
2864 			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2865 			printf("-v -- verbose dump debug type things -- you don't want this\n");
2866 			printf("-m N -- maximum to collect is N measurements\n");
2867 			printf("-e expr-name -- Do expression expr-name\n");
2868 			printf("-E 'your expression' -- Do your expression\n");
2869 			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2870 			printf("-H -- Don't run anything, just explain all canned expressions\n");
2871 			printf("-T -- Test all PMC's defined by this processor\n");
2872 			printf("-A -- Run all canned tests\n");
2873 			return(0);
2874 			break;
2875 		}
2876 	}
2877 	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2878 	    (test_mode == 0) && (master_exp == NULL)) {
2879 		printf("Without setting an expression we cannot dynamically gather information\n");
2880 		printf("you must supply a filename (and you probably want verbosity)\n");
2881 		goto use;
2882 	}
2883 	if (run_all && max_to_collect > 10) {
2884 		max_to_collect = 3;
2885 	}
2886 	if (test_mode) {
2887 		run_tests();
2888 		return(0);
2889 	}
2890 	printf("*********************************\n");
2891 	if ((master_exp == NULL) && name) {
2892 		(*the_cpu.explain)(name);
2893 	} else if (master_exp) {
2894 		printf("Examine your expression ");
2895 		print_exp(master_exp);
2896 		printf("User defined threshold\n");
2897 	}
2898 	if (help_only) {
2899 		return(0);
2900 	}
2901 	if (run_all) {
2902 	more:
2903 		name = the_cpu.ents[test_at].name;
2904 		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2905 		test_at++;
2906 		if (set_expression(name) == -1) {
2907 			if (test_at >= the_cpu.number) {
2908 				goto done;
2909 			} else
2910 				goto more;
2911 		}
2912 
2913 	}
2914 	process_file(filename);
2915 	if (verbose >= 2) {
2916 		for (i=0; i<ncnts; i++) {
2917 			printf("Counter:%s cpu:%d index:%d\n",
2918 			       cnts[i].counter_name,
2919 			       cnts[i].cpu, i);
2920 			for(j=0; j<cnts[i].pos; j++) {
2921 				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2922 			}
2923 			printf(" sum - %ld\n", (long int)cnts[i].sum);
2924 		}
2925 	}
2926 	if (expression == NULL) {
2927 		return(0);
2928 	}
2929 	if (max_to_collect > 1) {
2930 		for(i=0, cnt=0; i<MAX_CPU; i++) {
2931 			if (glob_cpu[i]) {
2932 				do_expression(glob_cpu[i], -1);
2933 				cnt++;
2934 				if (cnt == cpu_count_out) {
2935 					printf("\n");
2936 					break;
2937 				} else {
2938 					printf("\t");
2939 				}
2940 			}
2941 		}
2942 	}
2943 	if (run_all && (test_at < the_cpu.number)) {
2944 		memset(glob_cpu, 0, sizeof(glob_cpu));
2945 		ncnts = 0;
2946 		printf("*********************************\n");
2947 		goto more;
2948 	} else if (run_all) {
2949 	done:
2950 		printf("*********************************\n");
2951 	}
2952 	return(0);
2953 }
2954