xref: /freebsd/usr.sbin/pmcstudy/pmcstudy.c (revision cc426dd31990b8b50b210efc450e404596548ca1)
1 /*-
2  * Copyright (c) 2014, 2015 Netflix Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/errno.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <getopt.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
40 
41 static int max_pmc_counters = 1;
42 static int run_all = 0;
43 
44 #define MAX_COUNTER_SLOTS 1024
45 #define MAX_NLEN 64
46 #define MAX_CPU 64
47 static int verbose = 0;
48 
49 extern char **environ;
50 extern struct expression *master_exp;
51 struct expression *master_exp=NULL;
52 
53 #define PMC_INITIAL_ALLOC 512
54 extern char **valid_pmcs;
55 char **valid_pmcs = NULL;
56 extern int valid_pmc_cnt;
57 int valid_pmc_cnt=0;
58 extern int pmc_allocated_cnt;
59 int pmc_allocated_cnt=0;
60 
61 /*
62  * The following two varients on popen and pclose with
63  * the cavet that they get you the PID so that you
64  * can supply it to pclose so it can send a SIGTERM
65  *  to the process.
66  */
67 static FILE *
68 my_popen(const char *command, const char *dir, pid_t *p_pid)
69 {
70 	FILE *io_out, *io_in;
71 	int pdesin[2], pdesout[2];
72 	char *argv[4];
73 	pid_t pid;
74 	char cmd[4];
75 	char cmd2[1024];
76 	char arg1[4];
77 
78 	if ((strcmp(dir, "r") != 0) &&
79 	    (strcmp(dir, "w") != 0)) {
80 		errno = EINVAL;
81 		return(NULL);
82 	}
83 	if (pipe(pdesin) < 0)
84 		return (NULL);
85 
86 	if (pipe(pdesout) < 0) {
87 		(void)close(pdesin[0]);
88 		(void)close(pdesin[1]);
89 		return (NULL);
90 	}
91 	strcpy(cmd, "sh");
92 	strcpy(arg1, "-c");
93 	strcpy(cmd2, command);
94 	argv[0] = cmd;
95 	argv[1] = arg1;
96 	argv[2] = cmd2;
97 	argv[3] = NULL;
98 
99 	switch (pid = fork()) {
100 	case -1:			/* Error. */
101 		(void)close(pdesin[0]);
102 		(void)close(pdesin[1]);
103 		(void)close(pdesout[0]);
104 		(void)close(pdesout[1]);
105 		return (NULL);
106 		/* NOTREACHED */
107 	case 0:				/* Child. */
108 		/* Close out un-used sides */
109 		(void)close(pdesin[1]);
110 		(void)close(pdesout[0]);
111 		/* Now prepare the stdin of the process */
112 		close(0);
113 		(void)dup(pdesin[0]);
114 		(void)close(pdesin[0]);
115 		/* Now prepare the stdout of the process */
116 		close(1);
117 		(void)dup(pdesout[1]);
118 		/* And lets do stderr just in case */
119 		close(2);
120 		(void)dup(pdesout[1]);
121 		(void)close(pdesout[1]);
122 		/* Now run it */
123 		execve("/bin/sh", argv, environ);
124 		exit(127);
125 		/* NOTREACHED */
126 	}
127 	/* Parent; assume fdopen can't fail. */
128 	/* Store the pid */
129 	*p_pid = pid;
130 	if (strcmp(dir, "r") != 0) {
131 		io_out = fdopen(pdesin[1], "w");
132 		(void)close(pdesin[0]);
133 		(void)close(pdesout[0]);
134 		(void)close(pdesout[1]);
135 		return(io_out);
136  	} else {
137 		/* Prepare the input stream */
138 		io_in = fdopen(pdesout[0], "r");
139 		(void)close(pdesout[1]);
140 		(void)close(pdesin[0]);
141 		(void)close(pdesin[1]);
142 		return (io_in);
143 	}
144 }
145 
146 /*
147  * pclose --
148  *	Pclose returns -1 if stream is not associated with a `popened' command,
149  *	if already `pclosed', or waitpid returns an error.
150  */
151 static void
152 my_pclose(FILE *io, pid_t the_pid)
153 {
154 	int pstat;
155 	pid_t pid;
156 
157 	/*
158 	 * Find the appropriate file pointer and remove it from the list.
159 	 */
160 	(void)fclose(io);
161 	/* Die if you are not dead! */
162 	kill(the_pid, SIGTERM);
163 	do {
164 		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
165 	} while (pid == -1 && errno == EINTR);
166 }
167 
168 struct counters {
169 	struct counters *next_cpu;
170 	char counter_name[MAX_NLEN];		/* Name of counter */
171 	int cpu;				/* CPU we are on */
172 	int pos;				/* Index we are filling to. */
173 	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
174 	uint64_t sum;				/* Summary of entries */
175 };
176 
177 extern struct counters *glob_cpu[MAX_CPU];
178 struct counters *glob_cpu[MAX_CPU];
179 
180 extern struct counters *cnts;
181 struct counters *cnts=NULL;
182 
183 extern int ncnts;
184 int ncnts=0;
185 
186 extern int (*expression)(struct counters *, int);
187 int (*expression)(struct counters *, int);
188 
189 static const char *threshold=NULL;
190 static const char *command;
191 
192 struct cpu_entry {
193 	const char *name;
194 	const char *thresh;
195 	const char *command;
196 	int (*func)(struct counters *, int);
197 	int counters_required;
198 };
199 
200 struct cpu_type {
201 	char cputype[32];
202 	int number;
203 	struct cpu_entry *ents;
204 	void (*explain)(const char *name);
205 };
206 extern struct cpu_type the_cpu;
207 struct cpu_type the_cpu;
208 
209 static void
210 explain_name_sb(const char *name)
211 {
212 	const char *mythresh;
213 	if (strcmp(name, "allocstall1") == 0) {
214 		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
215 		mythresh = "thresh > .05";
216 	} else if (strcmp(name, "allocstall2") == 0) {
217 		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
218 		mythresh = "thresh > .05";
219 	} else if (strcmp(name, "br_miss") == 0) {
220 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
221 		mythresh = "thresh >= .2";
222 	} else if (strcmp(name, "splitload") == 0) {
223 		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
224 		mythresh = "thresh >= .1";
225 	} else if (strcmp(name, "splitstore") == 0) {
226 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
227 		mythresh = "thresh >= .01";
228 	} else if (strcmp(name, "contested") == 0) {
229 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
230 		mythresh = "thresh >= .05";
231 	} else if (strcmp(name, "blockstorefwd") == 0) {
232 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
233 		mythresh = "thresh >= .05";
234 	} else if (strcmp(name, "cache2") == 0) {
235 		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
236 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
237 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
238 		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
239 		mythresh = "thresh >= .2";
240 	} else if (strcmp(name, "cache1") == 0) {
241 		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
242 		mythresh = "thresh >= .2";
243 	} else if (strcmp(name, "dtlbmissload") == 0) {
244 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
245 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
246 		mythresh = "thresh >= .1";
247 	} else if (strcmp(name, "frontendstall") == 0) {
248 		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
249 		mythresh = "thresh >= .15";
250 	} else if (strcmp(name, "clears") == 0) {
251 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
252 		printf("          MACHINE_CLEARS.SMC + \n");
253 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
254 		mythresh = "thresh >= .02";
255 	} else if (strcmp(name, "microassist") == 0) {
256 		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
257 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
258 		mythresh = "thresh >= .05";
259 	} else if (strcmp(name, "aliasing_4k") == 0) {
260 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
261 		mythresh = "thresh >= .1";
262 	} else if (strcmp(name, "fpassist") == 0) {
263 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
264 		mythresh = "look for a excessive value";
265 	} else if (strcmp(name, "otherassistavx") == 0) {
266 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 		mythresh = "look for a excessive value";
268 	} else if (strcmp(name, "otherassistsse") == 0) {
269 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
270 		mythresh = "look for a excessive value";
271 	} else if (strcmp(name, "eff1") == 0) {
272 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
273 		mythresh = "thresh < .9";
274 	} else if (strcmp(name, "eff2") == 0) {
275 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
276 		mythresh = "thresh > 1.0";
277 	} else if (strcmp(name, "dtlbmissstore") == 0) {
278 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
279 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
280 		mythresh = "thresh >= .05";
281 	} else {
282 		printf("Unknown name:%s\n", name);
283 		mythresh = "unknown entry";
284         }
285 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
286 }
287 
288 static void
289 explain_name_ib(const char *name)
290 {
291 	const char *mythresh;
292 	if (strcmp(name, "br_miss") == 0) {
293 		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
294 		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
295 		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
296 		mythresh = "thresh >= .2";
297 	} else if (strcmp(name, "eff1") == 0) {
298 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
299 		mythresh = "thresh < .9";
300 	} else if (strcmp(name, "eff2") == 0) {
301 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
302 		mythresh = "thresh > 1.0";
303 	} else if (strcmp(name, "cache1") == 0) {
304 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
305 		mythresh = "thresh >= .2";
306 	} else if (strcmp(name, "cache2") == 0) {
307 		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
308 		mythresh = "thresh >= .2";
309 	} else if (strcmp(name, "itlbmiss") == 0) {
310 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
311 		mythresh = "thresh > .05";
312 	} else if (strcmp(name, "icachemiss") == 0) {
313 		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
314 		mythresh = "thresh > .05";
315 	} else if (strcmp(name, "lcpstall") == 0) {
316 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
317 		mythresh = "thresh > .05";
318 	} else if (strcmp(name, "datashare") == 0) {
319 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
320 		mythresh = "thresh > .05";
321 	} else if (strcmp(name, "blockstorefwd") == 0) {
322 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
323 		mythresh = "thresh >= .05";
324 	} else if (strcmp(name, "splitload") == 0) {
325 		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
326 		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
327 		mythresh = "thresh >= .1";
328 	} else if (strcmp(name, "splitstore") == 0) {
329 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
330 		mythresh = "thresh >= .01";
331 	} else if (strcmp(name, "aliasing_4k") == 0) {
332 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
333 		mythresh = "thresh >= .1";
334 	} else if (strcmp(name, "dtlbmissload") == 0) {
335 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
336 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
337 		mythresh = "thresh >= .1";
338 	} else if (strcmp(name, "dtlbmissstore") == 0) {
339 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
340 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
341 		mythresh = "thresh >= .05";
342 	} else if (strcmp(name, "contested") == 0) {
343 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
344 		mythresh = "thresh >= .05";
345 	} else if (strcmp(name, "clears") == 0) {
346 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
347 		printf("          MACHINE_CLEARS.SMC + \n");
348 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
349 		mythresh = "thresh >= .02";
350 	} else if (strcmp(name, "microassist") == 0) {
351 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
352 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
353 		mythresh = "thresh >= .05";
354 	} else if (strcmp(name, "fpassist") == 0) {
355 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
356 		mythresh = "look for a excessive value";
357 	} else if (strcmp(name, "otherassistavx") == 0) {
358 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 		mythresh = "look for a excessive value";
360 	} else if (strcmp(name, "otherassistsse") == 0) {
361 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
362 		mythresh = "look for a excessive value";
363 	} else {
364 		printf("Unknown name:%s\n", name);
365 		mythresh = "unknown entry";
366         }
367 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
368 }
369 
370 
371 static void
372 explain_name_has(const char *name)
373 {
374 	const char *mythresh;
375 	if (strcmp(name, "eff1") == 0) {
376 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
377 		mythresh = "thresh < .75";
378 	} else if (strcmp(name, "eff2") == 0) {
379 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
380 		mythresh = "thresh > 1.0";
381 	} else if (strcmp(name, "itlbmiss") == 0) {
382 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
383 		mythresh = "thresh > .05";
384 	} else if (strcmp(name, "icachemiss") == 0) {
385 		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
386 		mythresh = "thresh > .05";
387 	} else if (strcmp(name, "lcpstall") == 0) {
388 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
389 		mythresh = "thresh > .05";
390 	} else if (strcmp(name, "cache1") == 0) {
391 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
392 		mythresh = "thresh >= .2";
393 	} else if (strcmp(name, "cache2") == 0) {
394 		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
395 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
396 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
397 		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
398 		mythresh = "thresh >= .2";
399 	} else if (strcmp(name, "contested") == 0) {
400 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
401 		mythresh = "thresh >= .05";
402 	} else if (strcmp(name, "datashare") == 0) {
403 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
404 		mythresh = "thresh > .05";
405 	} else if (strcmp(name, "blockstorefwd") == 0) {
406 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
407 		mythresh = "thresh >= .05";
408 	} else if (strcmp(name, "splitload") == 0) {
409 		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
410 		mythresh = "thresh >= .1";
411 	} else if (strcmp(name, "splitstore") == 0) {
412 		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
413 		mythresh = "thresh >= .01";
414 	} else if (strcmp(name, "aliasing_4k") == 0) {
415 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
416 		mythresh = "thresh >= .1";
417 	} else if (strcmp(name, "dtlbmissload") == 0) {
418 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
419 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
420 		mythresh = "thresh >= .1";
421 	} else if (strcmp(name, "br_miss") == 0) {
422 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
423 		mythresh = "thresh >= .2";
424 	} else if (strcmp(name, "clears") == 0) {
425 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
426 		printf("          MACHINE_CLEARS.SMC + \n");
427 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
428 		mythresh = "thresh >= .02";
429 	} else if (strcmp(name, "microassist") == 0) {
430 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
431 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
432 		mythresh = "thresh >= .05";
433 	} else if (strcmp(name, "fpassist") == 0) {
434 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
435 		mythresh = "look for a excessive value";
436 	} else if (strcmp(name, "otherassistavx") == 0) {
437 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 		mythresh = "look for a excessive value";
439 	} else if (strcmp(name, "otherassistsse") == 0) {
440 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
441 		mythresh = "look for a excessive value";
442 	} else {
443 		printf("Unknown name:%s\n", name);
444 		mythresh = "unknown entry";
445         }
446 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
447 }
448 
449 
450 
451 static struct counters *
452 find_counter(struct counters *base, const char *name)
453 {
454 	struct counters *at;
455 	int len;
456 
457 	at = base;
458 	len = strlen(name);
459 	while(at) {
460 		if (strncmp(at->counter_name, name, len) == 0) {
461 			return(at);
462 		}
463 		at = at->next_cpu;
464 	}
465 	printf("Can't find counter %s\n", name);
466 	printf("We have:\n");
467 	at = base;
468 	while(at) {
469 		printf("- %s\n", at->counter_name);
470 		at = at->next_cpu;
471 	}
472 	exit(-1);
473 }
474 
475 static int
476 allocstall1(struct counters *cpu, int pos)
477 {
478 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
479 	int ret;
480 	struct counters *partial;
481 	struct counters *unhalt;
482 	double un, par, res;
483 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
484 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
485 	if (pos != -1) {
486 		par = partial->vals[pos] * 1.0;
487 		un = unhalt->vals[pos] * 1.0;
488 	} else {
489 		par = partial->sum * 1.0;
490 		un = unhalt->sum * 1.0;
491 	}
492 	res = par/un;
493 	ret = printf("%1.3f", res);
494 	return(ret);
495 }
496 
497 static int
498 allocstall2(struct counters *cpu, int pos)
499 {
500 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
501 	int ret;
502 	struct counters *partial;
503 	struct counters *unhalt;
504 	double un, par, res;
505 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
506 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
507 	if (pos != -1) {
508 		par = partial->vals[pos] * 1.0;
509 		un = unhalt->vals[pos] * 1.0;
510 	} else {
511 		par = partial->sum * 1.0;
512 		un = unhalt->sum * 1.0;
513 	}
514 	res = par/un;
515 	ret = printf("%1.3f", res);
516 	return(ret);
517 }
518 
519 static int
520 br_mispredict(struct counters *cpu, int pos)
521 {
522 	struct counters *brctr;
523 	struct counters *unhalt;
524 	int ret;
525 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
526 	double br, un, con, res;
527 	con = 20.0;
528 
529 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
530         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
531 	if (pos != -1) {
532 		br = brctr->vals[pos] * 1.0;
533 		un = unhalt->vals[pos] * 1.0;
534 	} else {
535 		br = brctr->sum * 1.0;
536 		un = unhalt->sum * 1.0;
537 	}
538 	res = (con * br)/un;
539  	ret = printf("%1.3f", res);
540 	return(ret);
541 }
542 
543 static int
544 br_mispredictib(struct counters *cpu, int pos)
545 {
546 	struct counters *brctr;
547 	struct counters *unhalt;
548 	struct counters *clear, *clear2, *clear3;
549 	struct counters *uops;
550 	struct counters *recv;
551 	struct counters *iss;
552 /*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
553 	int ret;
554         /*
555 	 * (BR_MISP_RETIRED.ALL_BRANCHES /
556 	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
557 	 *          MACHINE_CLEAR.COUNT) *
558 	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
559 	 *
560 	 */
561 	double br, cl, cl2, cl3, uo, re, un, con, res, is;
562 	con = 4.0;
563 
564 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
565         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
566 	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
567 	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
568 	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
569 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
570 	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
571 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
572 	if (pos != -1) {
573 		br = brctr->vals[pos] * 1.0;
574 		cl = clear->vals[pos] * 1.0;
575 		cl2 = clear2->vals[pos] * 1.0;
576 		cl3 = clear3->vals[pos] * 1.0;
577 		uo = uops->vals[pos] * 1.0;
578 		re = recv->vals[pos] * 1.0;
579 		is = iss->vals[pos] * 1.0;
580 		un = unhalt->vals[pos] * 1.0;
581 	} else {
582 		br = brctr->sum * 1.0;
583 		cl = clear->sum * 1.0;
584 		cl2 = clear2->sum * 1.0;
585 		cl3 = clear3->sum * 1.0;
586 		uo = uops->sum * 1.0;
587 		re = recv->sum * 1.0;
588 		is = iss->sum * 1.0;
589 		un = unhalt->sum * 1.0;
590 	}
591 	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
592  	ret = printf("%1.3f", res);
593 	return(ret);
594 }
595 
596 
597 static int
598 br_mispredict_broad(struct counters *cpu, int pos)
599 {
600 	struct counters *brctr;
601 	struct counters *unhalt;
602 	struct counters *clear;
603 	struct counters *uops;
604 	struct counters *uops_ret;
605 	struct counters *recv;
606 	int ret;
607 	double br, cl, uo, uo_r, re, con, un, res;
608 
609 	con = 4.0;
610 
611 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
612         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
613 	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
614 	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
615 	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
616 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
617 
618 	if (pos != -1) {
619 		un = unhalt->vals[pos] * 1.0;
620 		br = brctr->vals[pos] * 1.0;
621 		cl = clear->vals[pos] * 1.0;
622 		uo = uops->vals[pos] * 1.0;
623 		uo_r = uops_ret->vals[pos] * 1.0;
624 		re = recv->vals[pos] * 1.0;
625 	} else {
626 		un = unhalt->sum * 1.0;
627 		br = brctr->sum * 1.0;
628 		cl = clear->sum * 1.0;
629 		uo = uops->sum * 1.0;
630 		uo_r = uops_ret->sum * 1.0;
631 		re = recv->sum * 1.0;
632 	}
633 	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
634  	ret = printf("%1.3f", res);
635 	return(ret);
636 }
637 
638 static int
639 splitloadib(struct counters *cpu, int pos)
640 {
641 	int ret;
642 	struct counters *mem;
643 	struct counters *l1d, *ldblock;
644 	struct counters *unhalt;
645 	double un, memd, res, l1, ldb;
646         /*
647 	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
648 	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
649 	 */
650 
651 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
652 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
653 	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
654 	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
655 	if (pos != -1) {
656 		memd = mem->vals[pos] * 1.0;
657 		l1 = l1d->vals[pos] * 1.0;
658 		ldb = ldblock->vals[pos] * 1.0;
659 		un = unhalt->vals[pos] * 1.0;
660 	} else {
661 		memd = mem->sum * 1.0;
662 		l1 = l1d->sum * 1.0;
663 		ldb = ldblock->sum * 1.0;
664 		un = unhalt->sum * 1.0;
665 	}
666 	res = ((l1 / memd) * ldb)/un;
667 	ret = printf("%1.3f", res);
668 	return(ret);
669 }
670 
671 
672 static int
673 splitload(struct counters *cpu, int pos)
674 {
675 	int ret;
676 	struct counters *mem;
677 	struct counters *unhalt;
678 	double con, un, memd, res;
679 /*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
680 
681 	con = 5.0;
682 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683 	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
684 	if (pos != -1) {
685 		memd = mem->vals[pos] * 1.0;
686 		un = unhalt->vals[pos] * 1.0;
687 	} else {
688 		memd = mem->sum * 1.0;
689 		un = unhalt->sum * 1.0;
690 	}
691 	res = (memd * con)/un;
692 	ret = printf("%1.3f", res);
693 	return(ret);
694 }
695 
696 
697 static int
698 splitload_sb(struct counters *cpu, int pos)
699 {
700 	int ret;
701 	struct counters *mem;
702 	struct counters *unhalt;
703 	double con, un, memd, res;
704 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
705 
706 	con = 5.0;
707 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
708 	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
709 	if (pos != -1) {
710 		memd = mem->vals[pos] * 1.0;
711 		un = unhalt->vals[pos] * 1.0;
712 	} else {
713 		memd = mem->sum * 1.0;
714 		un = unhalt->sum * 1.0;
715 	}
716 	res = (memd * con)/un;
717 	ret = printf("%1.3f", res);
718 	return(ret);
719 }
720 
721 
722 static int
723 splitstore_sb(struct counters *cpu, int pos)
724 {
725         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
726 	int ret;
727 	struct counters *mem_split;
728 	struct counters *mem_stores;
729 	double memsplit, memstore, res;
730 	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
731 	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
732 	if (pos != -1) {
733 		memsplit = mem_split->vals[pos] * 1.0;
734 		memstore = mem_stores->vals[pos] * 1.0;
735 	} else {
736 		memsplit = mem_split->sum * 1.0;
737 		memstore = mem_stores->sum * 1.0;
738 	}
739 	res = memsplit/memstore;
740 	ret = printf("%1.3f", res);
741 	return(ret);
742 }
743 
744 
745 
746 static int
747 splitstore(struct counters *cpu, int pos)
748 {
749         /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
750 	int ret;
751 	struct counters *mem_split;
752 	struct counters *mem_stores;
753 	double memsplit, memstore, res;
754 	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
755 	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
756 	if (pos != -1) {
757 		memsplit = mem_split->vals[pos] * 1.0;
758 		memstore = mem_stores->vals[pos] * 1.0;
759 	} else {
760 		memsplit = mem_split->sum * 1.0;
761 		memstore = mem_stores->sum * 1.0;
762 	}
763 	res = memsplit/memstore;
764 	ret = printf("%1.3f", res);
765 	return(ret);
766 }
767 
768 
769 static int
770 contested(struct counters *cpu, int pos)
771 {
772         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
773 	int ret;
774 	struct counters *mem;
775 	struct counters *unhalt;
776 	double con, un, memd, res;
777 
778 	con = 60.0;
779 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
780 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
781 	if (pos != -1) {
782 		memd = mem->vals[pos] * 1.0;
783 		un = unhalt->vals[pos] * 1.0;
784 	} else {
785 		memd = mem->sum * 1.0;
786 		un = unhalt->sum * 1.0;
787 	}
788 	res = (memd * con)/un;
789 	ret = printf("%1.3f", res);
790 	return(ret);
791 }
792 
793 static int
794 contested_has(struct counters *cpu, int pos)
795 {
796         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
797 	int ret;
798 	struct counters *mem;
799 	struct counters *unhalt;
800 	double con, un, memd, res;
801 
802 	con = 84.0;
803 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
804 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
805 	if (pos != -1) {
806 		memd = mem->vals[pos] * 1.0;
807 		un = unhalt->vals[pos] * 1.0;
808 	} else {
809 		memd = mem->sum * 1.0;
810 		un = unhalt->sum * 1.0;
811 	}
812 	res = (memd * con)/un;
813 	ret = printf("%1.3f", res);
814 	return(ret);
815 }
816 
817 static int
818 contestedbroad(struct counters *cpu, int pos)
819 {
820         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
821 	int ret;
822 	struct counters *mem;
823 	struct counters *mem2;
824 	struct counters *unhalt;
825 	double con, un, memd, memtoo, res;
826 
827 	con = 84.0;
828 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
829 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
830 	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
831 
832 	if (pos != -1) {
833 		memd = mem->vals[pos] * 1.0;
834 		memtoo = mem2->vals[pos] * 1.0;
835 		un = unhalt->vals[pos] * 1.0;
836 	} else {
837 		memd = mem->sum * 1.0;
838 		memtoo = mem2->sum * 1.0;
839 		un = unhalt->sum * 1.0;
840 	}
841 	res = ((memd * con) + memtoo)/un;
842 	ret = printf("%1.3f", res);
843 	return(ret);
844 }
845 
846 
847 static int
848 blockstoreforward(struct counters *cpu, int pos)
849 {
850         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
851 	int ret;
852 	struct counters *ldb;
853 	struct counters *unhalt;
854 	double con, un, ld, res;
855 
856 	con = 13.0;
857 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
858 	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
859 	if (pos != -1) {
860 		ld = ldb->vals[pos] * 1.0;
861 		un = unhalt->vals[pos] * 1.0;
862 	} else {
863 		ld = ldb->sum * 1.0;
864 		un = unhalt->sum * 1.0;
865 	}
866 	res = (ld * con)/un;
867 	ret = printf("%1.3f", res);
868 	return(ret);
869 }
870 
871 static int
872 cache2(struct counters *cpu, int pos)
873 {
874 	/* ** Suspect ***
875 	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
876 	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
877 	 */
878 	int ret;
879 	struct counters *mem1, *mem2, *mem3;
880 	struct counters *unhalt;
881 	double con1, con2, con3, un, me_1, me_2, me_3, res;
882 
883 	con1 = 26.0;
884 	con2 = 43.0;
885 	con3 = 60.0;
886 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
887 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
888 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
889 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
890 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
891 	if (pos != -1) {
892 		me_1 = mem1->vals[pos] * 1.0;
893 		me_2 = mem2->vals[pos] * 1.0;
894 		me_3 = mem3->vals[pos] * 1.0;
895 		un = unhalt->vals[pos] * 1.0;
896 	} else {
897 		me_1 = mem1->sum * 1.0;
898 		me_2 = mem2->sum * 1.0;
899 		me_3 = mem3->sum * 1.0;
900 		un = unhalt->sum * 1.0;
901 	}
902 	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
903 	ret = printf("%1.3f", res);
904 	return(ret);
905 }
906 
907 static int
908 datasharing(struct counters *cpu, int pos)
909 {
910 	/*
911 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
912 	 */
913 	int ret;
914 	struct counters *mem;
915 	struct counters *unhalt;
916 	double con, res, me, un;
917 
918 	con = 43.0;
919 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
920 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
921 	if (pos != -1) {
922 		me = mem->vals[pos] * 1.0;
923 		un = unhalt->vals[pos] * 1.0;
924 	} else {
925 		me = mem->sum * 1.0;
926 		un = unhalt->sum * 1.0;
927 	}
928 	res = (me * con)/un;
929 	ret = printf("%1.3f", res);
930 	return(ret);
931 
932 }
933 
934 
935 static int
936 datasharing_has(struct counters *cpu, int pos)
937 {
938 	/*
939 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
940 	 */
941 	int ret;
942 	struct counters *mem;
943 	struct counters *unhalt;
944 	double con, res, me, un;
945 
946 	con = 72.0;
947 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
948 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
949 	if (pos != -1) {
950 		me = mem->vals[pos] * 1.0;
951 		un = unhalt->vals[pos] * 1.0;
952 	} else {
953 		me = mem->sum * 1.0;
954 		un = unhalt->sum * 1.0;
955 	}
956 	res = (me * con)/un;
957 	ret = printf("%1.3f", res);
958 	return(ret);
959 
960 }
961 
962 
963 static int
964 cache2ib(struct counters *cpu, int pos)
965 {
966         /*
967 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
968 	 */
969 	int ret;
970 	struct counters *mem;
971 	struct counters *unhalt;
972 	double con, un, me, res;
973 
974 	con = 29.0;
975 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
976 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
977 	if (pos != -1) {
978 		me = mem->vals[pos] * 1.0;
979 		un = unhalt->vals[pos] * 1.0;
980 	} else {
981 		me = mem->sum * 1.0;
982 		un = unhalt->sum * 1.0;
983 	}
984 	res = (con * me)/un;
985 	ret = printf("%1.3f", res);
986 	return(ret);
987 }
988 
989 static int
990 cache2has(struct counters *cpu, int pos)
991 {
992 	/*
993 	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
994 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
995 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
996 	 *           / CPU_CLK_UNHALTED.THREAD_P
997 	 */
998 	int ret;
999 	struct counters *mem1, *mem2, *mem3;
1000 	struct counters *unhalt;
1001 	double con1, con2, con3, un, me1, me2, me3, res;
1002 
1003 	con1 = 36.0;
1004 	con2 = 72.0;
1005 	con3 = 84.0;
1006 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1007 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1008 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1009 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1010 	if (pos != -1) {
1011 		me1 = mem1->vals[pos] * 1.0;
1012 		me2 = mem2->vals[pos] * 1.0;
1013 		me3 = mem3->vals[pos] * 1.0;
1014 		un = unhalt->vals[pos] * 1.0;
1015 	} else {
1016 		me1 = mem1->sum * 1.0;
1017 		me2 = mem2->sum * 1.0;
1018 		me3 = mem3->sum * 1.0;
1019 		un = unhalt->sum * 1.0;
1020 	}
1021 	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1022 	ret = printf("%1.3f", res);
1023 	return(ret);
1024 }
1025 
1026 
1027 static int
1028 cache2broad(struct counters *cpu, int pos)
1029 {
1030         /*
1031 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1032 	 */
1033 	int ret;
1034 	struct counters *mem;
1035 	struct counters *unhalt;
1036 	double con, un, me, res;
1037 
1038 	con = 36.0;
1039 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1041 	if (pos != -1) {
1042 		me = mem->vals[pos] * 1.0;
1043 		un = unhalt->vals[pos] * 1.0;
1044 	} else {
1045 		me = mem->sum * 1.0;
1046 		un = unhalt->sum * 1.0;
1047 	}
1048 	res = (con * me)/un;
1049 	ret = printf("%1.3f", res);
1050 	return(ret);
1051 }
1052 
1053 
1054 static int
1055 cache1(struct counters *cpu, int pos)
1056 {
1057 	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1058 	int ret;
1059 	struct counters *mem;
1060 	struct counters *unhalt;
1061 	double con, un, me, res;
1062 
1063 	con = 180.0;
1064 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1065 	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1066 	if (pos != -1) {
1067 		me = mem->vals[pos] * 1.0;
1068 		un = unhalt->vals[pos] * 1.0;
1069 	} else {
1070 		me = mem->sum * 1.0;
1071 		un = unhalt->sum * 1.0;
1072 	}
1073 	res = (me * con)/un;
1074 	ret = printf("%1.3f", res);
1075 	return(ret);
1076 }
1077 
1078 static int
1079 cache1ib(struct counters *cpu, int pos)
1080 {
1081 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1082 	int ret;
1083 	struct counters *mem;
1084 	struct counters *unhalt;
1085 	double con, un, me, res;
1086 
1087 	con = 180.0;
1088 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1089 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1090 	if (pos != -1) {
1091 		me = mem->vals[pos] * 1.0;
1092 		un = unhalt->vals[pos] * 1.0;
1093 	} else {
1094 		me = mem->sum * 1.0;
1095 		un = unhalt->sum * 1.0;
1096 	}
1097 	res = (me * con)/un;
1098 	ret = printf("%1.3f", res);
1099 	return(ret);
1100 }
1101 
1102 
1103 static int
1104 cache1broad(struct counters *cpu, int pos)
1105 {
1106 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1107 	int ret;
1108 	struct counters *mem;
1109 	struct counters *unhalt;
1110 	double con, un, me, res;
1111 
1112 	con = 180.0;
1113 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1114 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1115 	if (pos != -1) {
1116 		me = mem->vals[pos] * 1.0;
1117 		un = unhalt->vals[pos] * 1.0;
1118 	} else {
1119 		me = mem->sum * 1.0;
1120 		un = unhalt->sum * 1.0;
1121 	}
1122 	res = (me * con)/un;
1123 	ret = printf("%1.3f", res);
1124 	return(ret);
1125 }
1126 
1127 
1128 static int
1129 dtlb_missload(struct counters *cpu, int pos)
1130 {
1131 	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1132 	int ret;
1133 	struct counters *dtlb_m, *dtlb_d;
1134 	struct counters *unhalt;
1135 	double con, un, d1, d2, res;
1136 
1137 	con = 7.0;
1138 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1139 	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1140 	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1141 	if (pos != -1) {
1142 		d1 = dtlb_m->vals[pos] * 1.0;
1143 		d2 = dtlb_d->vals[pos] * 1.0;
1144 		un = unhalt->vals[pos] * 1.0;
1145 	} else {
1146 		d1 = dtlb_m->sum * 1.0;
1147 		d2 = dtlb_d->sum * 1.0;
1148 		un = unhalt->sum * 1.0;
1149 	}
1150 	res = ((d1 * con) + d2)/un;
1151 	ret = printf("%1.3f", res);
1152 	return(ret);
1153 }
1154 
1155 static int
1156 dtlb_missstore(struct counters *cpu, int pos)
1157 {
1158         /*
1159 	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1160 	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1161 	 */
1162         int ret;
1163         struct counters *dtsb_m, *dtsb_d;
1164         struct counters *unhalt;
1165         double con, un, d1, d2, res;
1166 
1167         con = 7.0;
1168         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1169         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1170         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1171         if (pos != -1) {
1172                 d1 = dtsb_m->vals[pos] * 1.0;
1173                 d2 = dtsb_d->vals[pos] * 1.0;
1174                 un = unhalt->vals[pos] * 1.0;
1175         } else {
1176                 d1 = dtsb_m->sum * 1.0;
1177                 d2 = dtsb_d->sum * 1.0;
1178                 un = unhalt->sum * 1.0;
1179         }
1180         res = ((d1 * con) + d2)/un;
1181         ret = printf("%1.3f", res);
1182         return(ret);
1183 }
1184 
1185 static int
1186 itlb_miss(struct counters *cpu, int pos)
1187 {
1188 	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1189 	int ret;
1190 	struct counters *itlb;
1191 	struct counters *unhalt;
1192 	double un, d1, res;
1193 
1194 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1195 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1196 	if (pos != -1) {
1197 		d1 = itlb->vals[pos] * 1.0;
1198 		un = unhalt->vals[pos] * 1.0;
1199 	} else {
1200 		d1 = itlb->sum * 1.0;
1201 		un = unhalt->sum * 1.0;
1202 	}
1203 	res = d1/un;
1204 	ret = printf("%1.3f", res);
1205 	return(ret);
1206 }
1207 
1208 
1209 static int
1210 itlb_miss_broad(struct counters *cpu, int pos)
1211 {
1212 	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1213 	int ret;
1214 	struct counters *itlb;
1215 	struct counters *unhalt;
1216 	struct counters *four_k;
1217 	double un, d1, res, k;
1218 
1219 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1220 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1221 	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1222 	if (pos != -1) {
1223 		d1 = itlb->vals[pos] * 1.0;
1224 		un = unhalt->vals[pos] * 1.0;
1225 		k = four_k->vals[pos] * 1.0;
1226 	} else {
1227 		d1 = itlb->sum * 1.0;
1228 		un = unhalt->sum * 1.0;
1229 		k = four_k->sum * 1.0;
1230 	}
1231 	res = (7.0 * k + d1)/un;
1232 	ret = printf("%1.3f", res);
1233 	return(ret);
1234 }
1235 
1236 
1237 static int
1238 icache_miss(struct counters *cpu, int pos)
1239 {
1240 	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1241 
1242 	int ret;
1243 	struct counters *itlb, *icache;
1244 	struct counters *unhalt;
1245 	double un, d1, ic, res;
1246 
1247 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1249 	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1250 	if (pos != -1) {
1251 		d1 = itlb->vals[pos] * 1.0;
1252 		ic = icache->vals[pos] * 1.0;
1253 		un = unhalt->vals[pos] * 1.0;
1254 	} else {
1255 		d1 = itlb->sum * 1.0;
1256 		ic = icache->sum * 1.0;
1257 		un = unhalt->sum * 1.0;
1258 	}
1259 	res = (ic-d1)/un;
1260 	ret = printf("%1.3f", res);
1261 	return(ret);
1262 
1263 }
1264 
1265 static int
1266 icache_miss_has(struct counters *cpu, int pos)
1267 {
1268 	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1269 
1270 	int ret;
1271 	struct counters *icache;
1272 	struct counters *unhalt;
1273 	double un, con, ic, res;
1274 
1275 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1276 	icache = find_counter(cpu, "ICACHE.MISSES");
1277 	con = 36.0;
1278 	if (pos != -1) {
1279 		ic = icache->vals[pos] * 1.0;
1280 		un = unhalt->vals[pos] * 1.0;
1281 	} else {
1282 		ic = icache->sum * 1.0;
1283 		un = unhalt->sum * 1.0;
1284 	}
1285 	res = (con * ic)/un;
1286 	ret = printf("%1.3f", res);
1287 	return(ret);
1288 
1289 }
1290 
1291 static int
1292 lcp_stall(struct counters *cpu, int pos)
1293 {
1294          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1295 	int ret;
1296 	struct counters *ild;
1297 	struct counters *unhalt;
1298 	double un, d1, res;
1299 
1300 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1301 	ild = find_counter(cpu, "ILD_STALL.LCP");
1302 	if (pos != -1) {
1303 		d1 = ild->vals[pos] * 1.0;
1304 		un = unhalt->vals[pos] * 1.0;
1305 	} else {
1306 		d1 = ild->sum * 1.0;
1307 		un = unhalt->sum * 1.0;
1308 	}
1309 	res = d1/un;
1310 	ret = printf("%1.3f", res);
1311 	return(ret);
1312 
1313 }
1314 
1315 
1316 static int
1317 frontendstall(struct counters *cpu, int pos)
1318 {
1319       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1320 	int ret;
1321 	struct counters *idq;
1322 	struct counters *unhalt;
1323 	double con, un, id, res;
1324 
1325 	con = 4.0;
1326 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1327 	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1328 	if (pos != -1) {
1329 		id = idq->vals[pos] * 1.0;
1330 		un = unhalt->vals[pos] * 1.0;
1331 	} else {
1332 		id = idq->sum * 1.0;
1333 		un = unhalt->sum * 1.0;
1334 	}
1335 	res = id/(un * con);
1336 	ret = printf("%1.3f", res);
1337 	return(ret);
1338 }
1339 
1340 static int
1341 clears(struct counters *cpu, int pos)
1342 {
1343 	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1344 	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1345 
1346 	int ret;
1347 	struct counters *clr1, *clr2, *clr3;
1348 	struct counters *unhalt;
1349 	double con, un, cl1, cl2, cl3, res;
1350 
1351 	con = 100.0;
1352 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1353 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1354 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1355 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1356 
1357 	if (pos != -1) {
1358 		cl1 = clr1->vals[pos] * 1.0;
1359 		cl2 = clr2->vals[pos] * 1.0;
1360 		cl3 = clr3->vals[pos] * 1.0;
1361 		un = unhalt->vals[pos] * 1.0;
1362 	} else {
1363 		cl1 = clr1->sum * 1.0;
1364 		cl2 = clr2->sum * 1.0;
1365 		cl3 = clr3->sum * 1.0;
1366 		un = unhalt->sum * 1.0;
1367 	}
1368 	res = ((cl1 + cl2 + cl3) * con)/un;
1369 	ret = printf("%1.3f", res);
1370 	return(ret);
1371 }
1372 
1373 
1374 
1375 static int
1376 clears_broad(struct counters *cpu, int pos)
1377 {
1378 	int ret;
1379 	struct counters *clr1, *clr2, *clr3, *cyc;
1380 	struct counters *unhalt;
1381 	double con, un, cl1, cl2, cl3, cy, res;
1382 
1383 	con = 100.0;
1384 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1385 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1386 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1387 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1388 	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1389 	if (pos != -1) {
1390 		cl1 = clr1->vals[pos] * 1.0;
1391 		cl2 = clr2->vals[pos] * 1.0;
1392 		cl3 = clr3->vals[pos] * 1.0;
1393 		cy = cyc->vals[pos] * 1.0;
1394 		un = unhalt->vals[pos] * 1.0;
1395 	} else {
1396 		cl1 = clr1->sum * 1.0;
1397 		cl2 = clr2->sum * 1.0;
1398 		cl3 = clr3->sum * 1.0;
1399 		cy = cyc->sum * 1.0;
1400 		un = unhalt->sum * 1.0;
1401 	}
1402 	/* Formula not listed but extrapulated to add the cy ?? */
1403 	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1404 	ret = printf("%1.3f", res);
1405 	return(ret);
1406 }
1407 
1408 
1409 
1410 
1411 
1412 static int
1413 microassist(struct counters *cpu, int pos)
1414 {
1415 	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1416 	int ret;
1417 	struct counters *idq;
1418 	struct counters *unhalt;
1419 	double un, id, res, con;
1420 
1421 	con = 4.0;
1422 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1423 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1424 	if (pos != -1) {
1425 		id = idq->vals[pos] * 1.0;
1426 		un = unhalt->vals[pos] * 1.0;
1427 	} else {
1428 		id = idq->sum * 1.0;
1429 		un = unhalt->sum * 1.0;
1430 	}
1431 	res = id/(un * con);
1432 	ret = printf("%1.3f", res);
1433 	return(ret);
1434 }
1435 
1436 
1437 static int
1438 microassist_broad(struct counters *cpu, int pos)
1439 {
1440 	int ret;
1441 	struct counters *idq;
1442 	struct counters *unhalt;
1443 	struct counters *uopiss;
1444 	struct counters *uopret;
1445 	double un, id, res, con, uoi, uor;
1446 
1447 	con = 4.0;
1448 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1449 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1450 	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1451 	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1452 	if (pos != -1) {
1453 		id = idq->vals[pos] * 1.0;
1454 		un = unhalt->vals[pos] * 1.0;
1455 		uoi = uopiss->vals[pos] * 1.0;
1456 		uor = uopret->vals[pos] * 1.0;
1457 	} else {
1458 		id = idq->sum * 1.0;
1459 		un = unhalt->sum * 1.0;
1460 		uoi = uopiss->sum * 1.0;
1461 		uor = uopret->sum * 1.0;
1462 	}
1463 	res = (uor/uoi) * (id/(un * con));
1464 	ret = printf("%1.3f", res);
1465 	return(ret);
1466 }
1467 
1468 
1469 static int
1470 aliasing(struct counters *cpu, int pos)
1471 {
1472 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1473 	int ret;
1474 	struct counters *ld;
1475 	struct counters *unhalt;
1476 	double un, lds, con, res;
1477 
1478 	con = 5.0;
1479 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1480 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1481 	if (pos != -1) {
1482 		lds = ld->vals[pos] * 1.0;
1483 		un = unhalt->vals[pos] * 1.0;
1484 	} else {
1485 		lds = ld->sum * 1.0;
1486 		un = unhalt->sum * 1.0;
1487 	}
1488 	res = (lds * con)/un;
1489 	ret = printf("%1.3f", res);
1490 	return(ret);
1491 }
1492 
1493 static int
1494 aliasing_broad(struct counters *cpu, int pos)
1495 {
1496 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1497 	int ret;
1498 	struct counters *ld;
1499 	struct counters *unhalt;
1500 	double un, lds, con, res;
1501 
1502 	con = 7.0;
1503 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1504 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1505 	if (pos != -1) {
1506 		lds = ld->vals[pos] * 1.0;
1507 		un = unhalt->vals[pos] * 1.0;
1508 	} else {
1509 		lds = ld->sum * 1.0;
1510 		un = unhalt->sum * 1.0;
1511 	}
1512 	res = (lds * con)/un;
1513 	ret = printf("%1.3f", res);
1514 	return(ret);
1515 }
1516 
1517 
1518 static int
1519 fpassists(struct counters *cpu, int pos)
1520 {
1521 	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1522 	int ret;
1523 	struct counters *fp;
1524 	struct counters *inst;
1525 	double un, fpd, res;
1526 
1527 	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1528 	fp = find_counter(cpu, "FP_ASSIST.ANY");
1529 	if (pos != -1) {
1530 		fpd = fp->vals[pos] * 1.0;
1531 		un = inst->vals[pos] * 1.0;
1532 	} else {
1533 		fpd = fp->sum * 1.0;
1534 		un = inst->sum * 1.0;
1535 	}
1536 	res = fpd/un;
1537 	ret = printf("%1.3f", res);
1538 	return(ret);
1539 }
1540 
1541 static int
1542 otherassistavx(struct counters *cpu, int pos)
1543 {
1544 	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1545 	int ret;
1546 	struct counters *oth;
1547 	struct counters *unhalt;
1548 	double un, ot, con, res;
1549 
1550 	con = 75.0;
1551 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1552 	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1553 	if (pos != -1) {
1554 		ot = oth->vals[pos] * 1.0;
1555 		un = unhalt->vals[pos] * 1.0;
1556 	} else {
1557 		ot = oth->sum * 1.0;
1558 		un = unhalt->sum * 1.0;
1559 	}
1560 	res = (ot * con)/un;
1561 	ret = printf("%1.3f", res);
1562 	return(ret);
1563 }
1564 
1565 static int
1566 otherassistsse(struct counters *cpu, int pos)
1567 {
1568 
1569 	int ret;
1570 	struct counters *oth;
1571 	struct counters *unhalt;
1572 	double un, ot, con, res;
1573 
1574 	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1575 	con = 75.0;
1576 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1577 	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1578 	if (pos != -1) {
1579 		ot = oth->vals[pos] * 1.0;
1580 		un = unhalt->vals[pos] * 1.0;
1581 	} else {
1582 		ot = oth->sum * 1.0;
1583 		un = unhalt->sum * 1.0;
1584 	}
1585 	res = (ot * con)/un;
1586 	ret = printf("%1.3f", res);
1587 	return(ret);
1588 }
1589 
1590 static int
1591 efficiency1(struct counters *cpu, int pos)
1592 {
1593 
1594 	int ret;
1595 	struct counters *uops;
1596 	struct counters *unhalt;
1597 	double un, ot, con, res;
1598 
1599         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1600 	con = 4.0;
1601 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1602 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1603 	if (pos != -1) {
1604 		ot = uops->vals[pos] * 1.0;
1605 		un = unhalt->vals[pos] * 1.0;
1606 	} else {
1607 		ot = uops->sum * 1.0;
1608 		un = unhalt->sum * 1.0;
1609 	}
1610 	res = ot/(con * un);
1611 	ret = printf("%1.3f", res);
1612 	return(ret);
1613 }
1614 
1615 static int
1616 efficiency2(struct counters *cpu, int pos)
1617 {
1618 
1619 	int ret;
1620 	struct counters *uops;
1621 	struct counters *unhalt;
1622 	double un, ot, res;
1623 
1624         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1625 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1626 	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1627 	if (pos != -1) {
1628 		ot = uops->vals[pos] * 1.0;
1629 		un = unhalt->vals[pos] * 1.0;
1630 	} else {
1631 		ot = uops->sum * 1.0;
1632 		un = unhalt->sum * 1.0;
1633 	}
1634 	res = un/ot;
1635 	ret = printf("%1.3f", res);
1636 	return(ret);
1637 }
1638 
1639 #define SANDY_BRIDGE_COUNT 20
1640 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1641 /*01*/	{ "allocstall1", "thresh > .05",
1642 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1643 	  allocstall1, 2 },
1644 /* -- not defined for SB right (partial-rat_stalls) 02*/
1645         { "allocstall2", "thresh > .05",
1646 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1647 	  allocstall2, 2 },
1648 /*03*/	{ "br_miss", "thresh >= .2",
1649 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1650 	  br_mispredict, 2 },
1651 /*04*/	{ "splitload", "thresh >= .1",
1652 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1653 	  splitload_sb, 2 },
1654 /* 05*/	{ "splitstore", "thresh >= .01",
1655 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1656 	  splitstore_sb, 2 },
1657 /*06*/	{ "contested", "thresh >= .05",
1658 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659 	  contested, 2 },
1660 /*07*/	{ "blockstorefwd", "thresh >= .05",
1661 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662 	  blockstoreforward, 2 },
1663 /*08*/	{ "cache2", "thresh >= .2",
1664 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665 	  cache2, 4 },
1666 /*09*/	{ "cache1", "thresh >= .2",
1667 	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668 	  cache1, 2 },
1669 /*10*/	{ "dtlbmissload", "thresh >= .1",
1670 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671 	  dtlb_missload, 3 },
1672 /*11*/	{ "dtlbmissstore", "thresh >= .05",
1673 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674 	  dtlb_missstore, 3 },
1675 /*12*/	{ "frontendstall", "thresh >= .15",
1676 	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677 	  frontendstall, 2 },
1678 /*13*/	{ "clears", "thresh >= .02",
1679 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680 	  clears, 4 },
1681 /*14*/	{ "microassist", "thresh >= .05",
1682 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683 	  microassist, 2 },
1684 /*15*/	{ "aliasing_4k", "thresh >= .1",
1685 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1686 	  aliasing, 2 },
1687 /*16*/	{ "fpassist", "look for a excessive value",
1688 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1689 	  fpassists, 2 },
1690 /*17*/	{ "otherassistavx", "look for a excessive value",
1691 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692 	  otherassistavx, 2},
1693 /*18*/	{ "otherassistsse", "look for a excessive value",
1694 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695 	  otherassistsse, 2 },
1696 /*19*/	{ "eff1", "thresh < .9",
1697 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698 	  efficiency1, 2 },
1699 /*20*/	{ "eff2", "thresh > 1.0",
1700 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1701 	  efficiency2, 2 },
1702 };
1703 
1704 
1705 #define IVY_BRIDGE_COUNT 21
1706 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1707 /*1*/	{ "eff1", "thresh < .75",
1708 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709 	  efficiency1, 2 },
1710 /*2*/	{ "eff2", "thresh > 1.0",
1711 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712 	  efficiency2, 2 },
1713 /*3*/	{ "itlbmiss", "thresh > .05",
1714 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715 	  itlb_miss, 2 },
1716 /*4*/	{ "icachemiss", "thresh > .05",
1717 	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718 	  icache_miss, 3 },
1719 /*5*/	{ "lcpstall", "thresh > .05",
1720 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721 	  lcp_stall, 2 },
1722 /*6*/	{ "cache1", "thresh >= .2",
1723 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724 	  cache1ib, 2 },
1725 /*7*/	{ "cache2", "thresh >= .2",
1726 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727 	  cache2ib, 2 },
1728 /*8*/	{ "contested", "thresh >= .05",
1729 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730 	  contested, 2 },
1731 /*9*/	{ "datashare", "thresh >= .05",
1732 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733 	  datasharing, 2 },
1734 /*10*/	{ "blockstorefwd", "thresh >= .05",
1735 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1736 	  blockstoreforward, 2 },
1737 /*11*/	{ "splitload", "thresh >= .1",
1738 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1739 	  splitloadib, 4 },
1740 /*12*/	{ "splitstore", "thresh >= .01",
1741 	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1742 	  splitstore, 2 },
1743 /*13*/	{ "aliasing_4k", "thresh >= .1",
1744 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745 	  aliasing, 2 },
1746 /*14*/	{ "dtlbmissload", "thresh >= .1",
1747 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748 	  dtlb_missload , 3},
1749 /*15*/	{ "dtlbmissstore", "thresh >= .05",
1750 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1751 	  dtlb_missstore, 3 },
1752 /*16*/	{ "br_miss", "thresh >= .2",
1753 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1754 	  br_mispredictib, 8 },
1755 /*17*/	{ "clears", "thresh >= .02",
1756 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757 	  clears, 4 },
1758 /*18*/	{ "microassist", "thresh >= .05",
1759 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1760 	  microassist, 2 },
1761 /*19*/	{ "fpassist", "look for a excessive value",
1762 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1763 	  fpassists, 2 },
1764 /*20*/	{ "otherassistavx", "look for a excessive value",
1765 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766 	  otherassistavx , 2},
1767 /*21*/	{ "otherassistsse", "look for a excessive value",
1768 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1769 	  otherassistsse, 2 },
1770 };
1771 
1772 #define HASWELL_COUNT 20
1773 static struct cpu_entry haswell[HASWELL_COUNT] = {
1774 /*1*/	{ "eff1", "thresh < .75",
1775 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776 	  efficiency1, 2 },
1777 /*2*/	{ "eff2", "thresh > 1.0",
1778 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779 	  efficiency2, 2 },
1780 /*3*/	{ "itlbmiss", "thresh > .05",
1781 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782 	  itlb_miss, 2 },
1783 /*4*/	{ "icachemiss", "thresh > .05",
1784 	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785 	  icache_miss_has, 2 },
1786 /*5*/	{ "lcpstall", "thresh > .05",
1787 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788 	  lcp_stall, 2 },
1789 /*6*/	{ "cache1", "thresh >= .2",
1790 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791 	  cache1ib, 2 },
1792 /*7*/	{ "cache2", "thresh >= .2",
1793 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794 	  cache2has, 4 },
1795 /*8*/	{ "contested", "thresh >= .05",
1796 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797 	  contested_has, 2 },
1798 /*9*/	{ "datashare", "thresh >= .05",
1799 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800 	  datasharing_has, 2 },
1801 /*10*/	{ "blockstorefwd", "thresh >= .05",
1802 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1803 	  blockstoreforward, 2 },
1804 /*11*/	{ "splitload", "thresh >= .1",
1805 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1806 	  splitload , 2},
1807 /*12*/	{ "splitstore", "thresh >= .01",
1808 	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1809 	  splitstore, 2 },
1810 /*13*/	{ "aliasing_4k", "thresh >= .1",
1811 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812 	  aliasing, 2 },
1813 /*14*/	{ "dtlbmissload", "thresh >= .1",
1814 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1815 	  dtlb_missload, 3 },
1816 /*15*/	{ "br_miss", "thresh >= .2",
1817 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1818 	  br_mispredict, 2 },
1819 /*16*/	{ "clears", "thresh >= .02",
1820 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821 	  clears, 4 },
1822 /*17*/	{ "microassist", "thresh >= .05",
1823 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1824 	  microassist, 2 },
1825 /*18*/	{ "fpassist", "look for a excessive value",
1826 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1827 	  fpassists, 2 },
1828 /*19*/	{ "otherassistavx", "look for a excessive value",
1829 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830 	  otherassistavx, 2 },
1831 /*20*/	{ "otherassistsse", "look for a excessive value",
1832 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1833 	  otherassistsse, 2 },
1834 };
1835 
1836 
1837 static void
1838 explain_name_broad(const char *name)
1839 {
1840 	const char *mythresh;
1841 	if (strcmp(name, "eff1") == 0) {
1842 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1843 		mythresh = "thresh < .75";
1844 	} else if (strcmp(name, "eff2") == 0) {
1845 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1846 		mythresh = "thresh > 1.0";
1847 	} else if (strcmp(name, "itlbmiss") == 0) {
1848 		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1849 		mythresh = "thresh > .05";
1850 	} else if (strcmp(name, "icachemiss") == 0) {
1851 		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1852 		mythresh = "thresh > .05";
1853 	} else if (strcmp(name, "lcpstall") == 0) {
1854 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1855 		mythresh = "thresh > .05";
1856 	} else if (strcmp(name, "cache1") == 0) {
1857 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1858 		mythresh = "thresh >= .1";
1859 	} else if (strcmp(name, "cache2") == 0) {
1860 		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1861 		mythresh = "thresh >= .2";
1862 	} else if (strcmp(name, "contested") == 0) {
1863 		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1864 		mythresh = "thresh >= .05";
1865 	} else if (strcmp(name, "datashare") == 0) {
1866 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1867 		mythresh = "thresh > .05";
1868 	} else if (strcmp(name, "blockstorefwd") == 0) {
1869 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1870 		mythresh = "thresh >= .05";
1871 	} else if (strcmp(name, "aliasing_4k") == 0) {
1872 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1873 		mythresh = "thresh >= .1";
1874 	} else if (strcmp(name, "dtlbmissload") == 0) {
1875 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1876 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1877 		mythresh = "thresh >= .1";
1878 
1879 	} else if (strcmp(name, "br_miss") == 0) {
1880 		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1881 		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1882 		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1883 		mythresh = "thresh >= .2";
1884 	} else if (strcmp(name, "clears") == 0) {
1885 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1886 		printf("          MACHINE_CLEARS.SMC + \n");
1887 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1888 		mythresh = "thresh >= .02";
1889 	} else if (strcmp(name, "fpassist") == 0) {
1890 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1891 		mythresh = "look for a excessive value";
1892 	} else if (strcmp(name, "otherassistavx") == 0) {
1893 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1894 		mythresh = "look for a excessive value";
1895 	} else if (strcmp(name, "microassist") == 0) {
1896 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1897 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1898 		mythresh = "thresh >= .05";
1899 	} else {
1900 		printf("Unknown name:%s\n", name);
1901 		mythresh = "unknown entry";
1902         }
1903 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1904 }
1905 
1906 
1907 #define BROADWELL_COUNT 17
1908 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1909 /*1*/	{ "eff1", "thresh < .75",
1910 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911 	  efficiency1, 2 },
1912 /*2*/	{ "eff2", "thresh > 1.0",
1913 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1914 	  efficiency2, 2 },
1915 /*3*/	{ "itlbmiss", "thresh > .05",
1916 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1917 	  itlb_miss_broad, 3 },
1918 /*4*/	{ "icachemiss", "thresh > .05",
1919 	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920 	  icache_miss_has, 2 },
1921 /*5*/	{ "lcpstall", "thresh > .05",
1922 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923 	  lcp_stall, 2 },
1924 /*6*/	{ "cache1", "thresh >= .1",
1925 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926 	  cache1broad, 2 },
1927 /*7*/	{ "cache2", "thresh >= .2",
1928 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1929 	  cache2broad, 2 },
1930 /*8*/	{ "contested", "thresh >= .05",
1931 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1932 	  contestedbroad, 2 },
1933 /*9*/	{ "datashare", "thresh >= .05",
1934 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935 	  datasharing_has, 2 },
1936 /*10*/	{ "blockstorefwd", "thresh >= .05",
1937 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938 	  blockstoreforward, 2 },
1939 /*11*/	{ "aliasing_4k", "thresh >= .1",
1940 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941 	  aliasing_broad, 2 },
1942 /*12*/	{ "dtlbmissload", "thresh >= .1",
1943 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1944 	  dtlb_missload, 3 },
1945 /*13*/	{ "br_miss", "thresh >= .2",
1946 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1947 	  br_mispredict_broad, 7 },
1948 /*14*/	{ "clears", "thresh >= .02",
1949 	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1950 	  clears_broad, 5 },
1951 /*15*/	{ "fpassist", "look for a excessive value",
1952 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1953 	  fpassists, 2 },
1954 /*16*/	{ "otherassistavx", "look for a excessive value",
1955 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1956 	  otherassistavx, 2 },
1957 /*17*/	{ "microassist", "thresh >= .2",
1958 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1959 	  microassist_broad, 4 },
1960 };
1961 
1962 
1963 static void
1964 set_sandybridge(void)
1965 {
1966 	strcpy(the_cpu.cputype, "SandyBridge PMC");
1967 	the_cpu.number = SANDY_BRIDGE_COUNT;
1968 	the_cpu.ents = sandy_bridge;
1969 	the_cpu.explain = explain_name_sb;
1970 }
1971 
1972 static void
1973 set_ivybridge(void)
1974 {
1975 	strcpy(the_cpu.cputype, "IvyBridge PMC");
1976 	the_cpu.number = IVY_BRIDGE_COUNT;
1977 	the_cpu.ents = ivy_bridge;
1978 	the_cpu.explain = explain_name_ib;
1979 }
1980 
1981 
1982 static void
1983 set_haswell(void)
1984 {
1985 	strcpy(the_cpu.cputype, "HASWELL PMC");
1986 	the_cpu.number = HASWELL_COUNT;
1987 	the_cpu.ents = haswell;
1988 	the_cpu.explain = explain_name_has;
1989 }
1990 
1991 
1992 static void
1993 set_broadwell(void)
1994 {
1995 	strcpy(the_cpu.cputype, "HASWELL PMC");
1996 	the_cpu.number = BROADWELL_COUNT;
1997 	the_cpu.ents = broadwell;
1998 	the_cpu.explain = explain_name_broad;
1999 }
2000 
2001 
2002 static int
2003 set_expression(const char *name)
2004 {
2005 	int found = 0, i;
2006 	for(i=0 ; i< the_cpu.number; i++) {
2007 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2008 			found = 1;
2009 			expression = the_cpu.ents[i].func;
2010 			command = the_cpu.ents[i].command;
2011 			threshold = the_cpu.ents[i].thresh;
2012 			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2013 				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2014 				       the_cpu.ents[i].name,
2015 				       the_cpu.ents[i].counters_required, max_pmc_counters);
2016 				printf("Sorry this test can not be run\n");
2017 				if (run_all == 0) {
2018 					exit(-1);
2019 				} else {
2020 					return(-1);
2021 				}
2022 			}
2023 			break;
2024 		}
2025 	}
2026 	if (!found) {
2027 		printf("For CPU type %s we have no expression:%s\n",
2028 		       the_cpu.cputype, name);
2029 		exit(-1);
2030 	}
2031 	return(0);
2032 }
2033 
2034 
2035 
2036 
2037 
2038 static int
2039 validate_expression(char *name)
2040 {
2041 	int i, found;
2042 
2043 	found = 0;
2044 	for(i=0 ; i< the_cpu.number; i++) {
2045 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2046 			found = 1;
2047 			break;
2048 		}
2049 	}
2050 	if (!found) {
2051 		return(-1);
2052 	}
2053 	return (0);
2054 }
2055 
2056 static void
2057 do_expression(struct counters *cpu, int pos)
2058 {
2059 	if (expression == NULL)
2060 		return;
2061 	(*expression)(cpu, pos);
2062 }
2063 
2064 static void
2065 process_header(int idx, char *p)
2066 {
2067 	struct counters *up;
2068 	int i, len, nlen;
2069 	/*
2070 	 * Given header element idx, at p in
2071 	 * form 's/NN/nameof'
2072 	 * process the entry to pull out the name and
2073 	 * the CPU number.
2074 	 */
2075 	if (strncmp(p, "s/", 2)) {
2076 		printf("Check -- invalid header no s/ in %s\n",
2077 		       p);
2078 		return;
2079 	}
2080 	up = &cnts[idx];
2081 	up->cpu = strtol(&p[2], NULL, 10);
2082 	len = strlen(p);
2083 	for (i=2; i<len; i++) {
2084 		if (p[i] == '/') {
2085 			nlen = strlen(&p[(i+1)]);
2086 			if (nlen < (MAX_NLEN-1)) {
2087 				strcpy(up->counter_name, &p[(i+1)]);
2088 			} else {
2089 				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2090 			}
2091 		}
2092 	}
2093 }
2094 
2095 static void
2096 build_counters_from_header(FILE *io)
2097 {
2098 	char buffer[8192], *p;
2099 	int i, len, cnt;
2100 	size_t mlen;
2101 
2102 	/* We have a new start, lets
2103 	 * setup our headers and cpus.
2104 	 */
2105 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2106 		printf("First line can't be read from file err:%d\n", errno);
2107 		return;
2108 	}
2109 	/*
2110 	 * Ok output is an array of counters. Once
2111 	 * we start to read the values in we must
2112 	 * put them in there slot to match there CPU and
2113 	 * counter being updated. We create a mass array
2114 	 * of the counters, filling in the CPU and
2115 	 * counter name.
2116 	 */
2117 	/* How many do we get? */
2118 	len = strlen(buffer);
2119 	for (i=0, cnt=0; i<len; i++) {
2120 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2121 			cnt++;
2122 			for(;i<len;i++) {
2123 				if (buffer[i] == ' ')
2124 					break;
2125 			}
2126 		}
2127 	}
2128 	mlen = sizeof(struct counters) * cnt;
2129 	cnts = malloc(mlen);
2130 	ncnts = cnt;
2131 	if (cnts == NULL) {
2132 		printf("No memory err:%d\n", errno);
2133 		return;
2134 	}
2135 	memset(cnts, 0, mlen);
2136 	for (i=0, cnt=0; i<len; i++) {
2137 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2138 			p = &buffer[i];
2139 			for(;i<len;i++) {
2140 				if (buffer[i] == ' ') {
2141 					buffer[i] = 0;
2142 					break;
2143 				}
2144 			}
2145 			process_header(cnt, p);
2146 			cnt++;
2147 		}
2148 	}
2149 	if (verbose)
2150 		printf("We have %d entries\n", cnt);
2151 }
2152 extern int max_to_collect;
2153 int max_to_collect = MAX_COUNTER_SLOTS;
2154 
2155 static int
2156 read_a_line(FILE *io)
2157 {
2158 	char buffer[8192], *p, *stop;
2159 	int pos, i;
2160 
2161 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2162 		return(0);
2163 	}
2164 	p = buffer;
2165 	for (i=0; i<ncnts; i++) {
2166 		pos = cnts[i].pos;
2167 		cnts[i].vals[pos] = strtol(p, &stop, 0);
2168 		cnts[i].pos++;
2169 		cnts[i].sum += cnts[i].vals[pos];
2170 		p = stop;
2171 	}
2172 	return (1);
2173 }
2174 
2175 extern int cpu_count_out;
2176 int cpu_count_out=0;
2177 
2178 static void
2179 print_header(void)
2180 {
2181 	int i, cnt, printed_cnt;
2182 
2183 	printf("*********************************\n");
2184 	for(i=0, cnt=0; i<MAX_CPU; i++) {
2185 		if (glob_cpu[i]) {
2186 			cnt++;
2187 		}
2188 	}
2189 	cpu_count_out = cnt;
2190 	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2191 		if (glob_cpu[i]) {
2192 			printf("CPU%d", i);
2193 			printed_cnt++;
2194 		}
2195 		if (printed_cnt == cnt) {
2196 			printf("\n");
2197 			break;
2198 		} else {
2199 			printf("\t");
2200 		}
2201 	}
2202 }
2203 
2204 static void
2205 lace_cpus_together(void)
2206 {
2207 	int i, j, lace_cpu;
2208 	struct counters *cpat, *at;
2209 
2210 	for(i=0; i<ncnts; i++) {
2211 		cpat = &cnts[i];
2212 		if (cpat->next_cpu) {
2213 			/* Already laced in */
2214 			continue;
2215 		}
2216 		lace_cpu = cpat->cpu;
2217 		if (lace_cpu >= MAX_CPU) {
2218 			printf("CPU %d to big\n", lace_cpu);
2219 			continue;
2220 		}
2221 		if (glob_cpu[lace_cpu] == NULL) {
2222 			glob_cpu[lace_cpu] = cpat;
2223 		} else {
2224 			/* Already processed this cpu */
2225 			continue;
2226 		}
2227 		/* Ok look forward for cpu->cpu and link in */
2228 		for(j=(i+1); j<ncnts; j++) {
2229 			at = &cnts[j];
2230 			if (at->next_cpu) {
2231 				continue;
2232 			}
2233 			if (at->cpu == lace_cpu) {
2234 				/* Found one */
2235 				cpat->next_cpu = at;
2236 				cpat = at;
2237 			}
2238 		}
2239 	}
2240 }
2241 
2242 
2243 static void
2244 process_file(char *filename)
2245 {
2246 	FILE *io;
2247 	int i;
2248 	int line_at, not_done;
2249 	pid_t pid_of_command=0;
2250 
2251 	if (filename ==  NULL) {
2252 		io = my_popen(command, "r", &pid_of_command);
2253 	} else {
2254 		io = fopen(filename, "r");
2255 		if (io == NULL) {
2256 			printf("Can't process file %s err:%d\n",
2257 			       filename, errno);
2258 			return;
2259 		}
2260 	}
2261 	build_counters_from_header(io);
2262 	if (cnts == NULL) {
2263 		/* Nothing we can do */
2264 		printf("Nothing to do -- no counters built\n");
2265 		if (io) {
2266 			fclose(io);
2267 		}
2268 		return;
2269 	}
2270 	lace_cpus_together();
2271 	print_header();
2272 	if (verbose) {
2273 		for (i=0; i<ncnts; i++) {
2274 			printf("Counter:%s cpu:%d index:%d\n",
2275 			       cnts[i].counter_name,
2276 			       cnts[i].cpu, i);
2277 		}
2278 	}
2279 	line_at = 0;
2280 	not_done = 1;
2281 	while(not_done) {
2282 		if (read_a_line(io)) {
2283 			line_at++;
2284 		} else {
2285 			break;
2286 		}
2287 		if (line_at >= max_to_collect) {
2288 			not_done = 0;
2289 		}
2290 		if (filename == NULL) {
2291 			int cnt;
2292 			/* For the ones we dynamically open we print now */
2293 			for(i=0, cnt=0; i<MAX_CPU; i++) {
2294 				do_expression(glob_cpu[i], (line_at-1));
2295 				cnt++;
2296 				if (cnt == cpu_count_out) {
2297 					printf("\n");
2298 					break;
2299 				} else {
2300 					printf("\t");
2301 				}
2302 			}
2303 		}
2304 	}
2305 	if (filename) {
2306 		fclose(io);
2307 	} else {
2308 		my_pclose(io, pid_of_command);
2309 	}
2310 }
2311 #if defined(__amd64__)
2312 #define cpuid(in,a,b,c,d)\
2313   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2314 
2315 static __inline void
2316 do_cpuid(u_int ax, u_int cx, u_int *p)
2317 {
2318 	__asm __volatile("cpuid"
2319 			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2320 			 :  "0" (ax), "c" (cx) );
2321 }
2322 
2323 #else
2324 #define cpuid(in, a, b, c, d)
2325 #define do_cpuid(ax, cx, p)
2326 #endif
2327 
2328 static void
2329 get_cpuid_set(void)
2330 {
2331 	unsigned long eax, ebx, ecx, edx;
2332 	int model;
2333 	pid_t pid_of_command=0;
2334 	size_t sz, len;
2335 	FILE *io;
2336 	char linebuf[1024], *str;
2337 	u_int reg[4];
2338 
2339 	eax = ebx = ecx = edx = 0;
2340 
2341 	cpuid(0, eax, ebx, ecx, edx);
2342 	if (ebx == 0x68747541) {
2343 		printf("AMD processors are not supported by this program\n");
2344 		printf("Sorry\n");
2345 		exit(0);
2346 	} else if (ebx == 0x6972794) {
2347 		printf("Cyrix processors are not supported by this program\n");
2348 		printf("Sorry\n");
2349 		exit(0);
2350 	} else if (ebx == 0x756e6547) {
2351 		printf("Genuine Intel\n");
2352 	} else {
2353 		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2354 		exit(0);
2355 	}
2356 	cpuid(1, eax, ebx, ecx, edx);
2357 	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2358 	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2359 	switch (eax & 0xF00) {
2360 	case 0x500:		/* Pentium family processors */
2361 		printf("Intel Pentium P5\n");
2362 		goto not_supported;
2363 		break;
2364 	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2365 		switch (model) {
2366 		case 0x1:
2367 			printf("Intel Pentium P6\n");
2368 			goto not_supported;
2369 			break;
2370 		case 0x3:
2371 		case 0x5:
2372 			printf("Intel PII\n");
2373 			goto not_supported;
2374 			break;
2375 		case 0x6: case 0x16:
2376 			printf("Intel CL\n");
2377 			goto not_supported;
2378 			break;
2379 		case 0x7: case 0x8: case 0xA: case 0xB:
2380 			printf("Intel PIII\n");
2381 			goto not_supported;
2382 			break;
2383 		case 0x9: case 0xD:
2384 			printf("Intel PM\n");
2385 			goto not_supported;
2386 			break;
2387 		case 0xE:
2388 			printf("Intel CORE\n");
2389 			goto not_supported;
2390 			break;
2391 		case 0xF:
2392 			printf("Intel CORE2\n");
2393 			goto not_supported;
2394 			break;
2395 		case 0x17:
2396 			printf("Intel CORE2EXTREME\n");
2397 			goto not_supported;
2398 			break;
2399 		case 0x1C:	/* Per Intel document 320047-002. */
2400 			printf("Intel ATOM\n");
2401 			goto not_supported;
2402 			break;
2403 		case 0x1A:
2404 		case 0x1E:	/*
2405 				 * Per Intel document 253669-032 9/2009,
2406 				 * pages A-2 and A-57
2407 				 */
2408 		case 0x1F:	/*
2409 				 * Per Intel document 253669-032 9/2009,
2410 				 * pages A-2 and A-57
2411 				 */
2412 			printf("Intel COREI7\n");
2413 			goto not_supported;
2414 			break;
2415 		case 0x2E:
2416 			printf("Intel NEHALEM\n");
2417 			goto not_supported;
2418 			break;
2419 		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2420 		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2421 			printf("Intel WESTMERE\n");
2422 			goto not_supported;
2423 			break;
2424 		case 0x2F:	/* Westmere-EX, seen in wild */
2425 			printf("Intel WESTMERE\n");
2426 			goto not_supported;
2427 			break;
2428 		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2429 			printf("Intel SANDYBRIDGE\n");
2430 			set_sandybridge();
2431 			break;
2432 		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2433 			printf("Intel SANDYBRIDGE_XEON\n");
2434 			set_sandybridge();
2435 			break;
2436 		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2437 			printf("Intel IVYBRIDGE\n");
2438 			set_ivybridge();
2439 			break;
2440 		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2441 			printf("Intel IVYBRIDGE_XEON\n");
2442 			set_ivybridge();
2443 			break;
2444 		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2445 			printf("Intel HASWELL (Xeon)\n");
2446 			set_haswell();
2447 			break;
2448 		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2449 		case 0x45:
2450 		case 0x46:
2451 			printf("Intel HASWELL\n");
2452 			set_haswell();
2453 			break;
2454 
2455 		case 0x4e:
2456 		case 0x5e:
2457 			printf("Intel SKY-LAKE\n");
2458 			goto not_supported;
2459 			break;
2460 		case 0x3D:
2461 		case 0x47:
2462 			printf("Intel BROADWELL\n");
2463 			set_broadwell();
2464 			break;
2465 		case 0x4f:
2466 		case 0x56:
2467 			printf("Intel BROADWEL (Xeon)\n");
2468 			set_broadwell();
2469 			break;
2470 
2471 		case 0x4D:
2472 			/* Per Intel document 330061-001 01/2014. */
2473 			printf("Intel ATOM_SILVERMONT\n");
2474 			goto not_supported;
2475 			break;
2476 		default:
2477 			printf("Intel model 0x%x is not known -- sorry\n",
2478 			       model);
2479 			goto not_supported;
2480 			break;
2481 		}
2482 		break;
2483 	case 0xF00:		/* P4 */
2484 		printf("Intel unknown model %d\n", model);
2485 		goto not_supported;
2486 		break;
2487 	}
2488 	do_cpuid(0xa, 0, reg);
2489 	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2490 	printf("We have %d PMC counters to work with\n", max_pmc_counters);
2491 	/* Ok lets load the list of all known PMC's */
2492 	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2493 	if (valid_pmcs == NULL) {
2494 		/* Likely */
2495 		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2496 		sz = sizeof(char *) * pmc_allocated_cnt;
2497 		valid_pmcs = malloc(sz);
2498 		if (valid_pmcs == NULL) {
2499 			printf("No memory allocation fails at startup?\n");
2500 			exit(-1);
2501 		}
2502 		memset(valid_pmcs, 0, sz);
2503 	}
2504 
2505 	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2506 		if (linebuf[0] != '\t') {
2507 			/* sometimes headers ;-) */
2508 			continue;
2509 		}
2510 		len = strlen(linebuf);
2511 		if (linebuf[(len-1)] == '\n') {
2512 			/* Likely */
2513 			linebuf[(len-1)] = 0;
2514 		}
2515 		str = &linebuf[1];
2516 		len = strlen(str) + 1;
2517 		valid_pmcs[valid_pmc_cnt] = malloc(len);
2518 		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2519 			printf("No memory2 allocation fails at startup?\n");
2520 			exit(-1);
2521 		}
2522 		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2523 		strcpy(valid_pmcs[valid_pmc_cnt], str);
2524 		valid_pmc_cnt++;
2525 		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2526 			/* Got to expand -- unlikely */
2527 			char **more;
2528 
2529 			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2530 			more = malloc(sz);
2531 			if (more == NULL) {
2532 				printf("No memory3 allocation fails at startup?\n");
2533 				exit(-1);
2534 			}
2535 			memset(more, 0, sz);
2536 			memcpy(more, valid_pmcs, sz);
2537 			pmc_allocated_cnt *= 2;
2538 			free(valid_pmcs);
2539 			valid_pmcs = more;
2540 		}
2541 	}
2542 	my_pclose(io, pid_of_command);
2543 	return;
2544 not_supported:
2545 	printf("Not supported\n");
2546 	exit(-1);
2547 }
2548 
2549 static void
2550 explain_all(void)
2551 {
2552 	int i;
2553 	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2554 	printf("-------------------------------------------------------------\n");
2555 	for(i=0; i<the_cpu.number; i++){
2556 		printf("For -e %s ", the_cpu.ents[i].name);
2557 		(*the_cpu.explain)(the_cpu.ents[i].name);
2558 		printf("----------------------------\n");
2559 	}
2560 }
2561 
2562 static void
2563 test_for_a_pmc(const char *pmc, int out_so_far)
2564 {
2565 	FILE *io;
2566 	pid_t pid_of_command=0;
2567 	char my_command[1024];
2568 	char line[1024];
2569 	char resp[1024];
2570 	int len, llen, i;
2571 
2572 	if (out_so_far < 50) {
2573 		len = 50 - out_so_far;
2574 		for(i=0; i<len; i++) {
2575 			printf(" ");
2576 		}
2577 	}
2578 	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2579 	io = my_popen(my_command, "r", &pid_of_command);
2580 	if (io == NULL) {
2581 		printf("Failed -- popen fails\n");
2582 		return;
2583 	}
2584 	/* Setup what we expect */
2585 	len = sprintf(resp, "%s", pmc);
2586 	if (fgets(line, sizeof(line), io) == NULL) {
2587 		printf("Failed -- no output from pmstat\n");
2588 		goto out;
2589 	}
2590 	llen = strlen(line);
2591 	if (line[(llen-1)] == '\n') {
2592 		line[(llen-1)] = 0;
2593 		llen--;
2594 	}
2595 	for(i=2; i<(llen-len); i++) {
2596 		if (strncmp(&line[i], "ERROR", 5) == 0) {
2597 			printf("Failed %s\n", line);
2598 			goto out;
2599 		} else if (strncmp(&line[i], resp, len) == 0) {
2600 			int j, k;
2601 
2602 			if (fgets(line, sizeof(line), io) == NULL) {
2603 				printf("Failed -- no second output from pmstat\n");
2604 				goto out;
2605 			}
2606 			len = strlen(line);
2607 			for (j=0; j<len; j++) {
2608 				if (line[j] == ' ') {
2609 					j++;
2610 				} else {
2611 					break;
2612 				}
2613 			}
2614 			printf("Pass");
2615 			len = strlen(&line[j]);
2616 			if (len < 20) {
2617 				for(k=0; k<(20-len); k++) {
2618 					printf(" ");
2619 				}
2620 			}
2621 			if (len) {
2622 				printf("%s", &line[j]);
2623 			} else {
2624 				printf("\n");
2625 			}
2626 			goto out;
2627 		}
2628 	}
2629 	printf("Failed -- '%s' not '%s'\n", line, resp);
2630 out:
2631 	my_pclose(io, pid_of_command);
2632 
2633 }
2634 
2635 static int
2636 add_it_to(char **vars, int cur_cnt, char *name)
2637 {
2638 	int i;
2639 	size_t len;
2640 	for(i=0; i<cur_cnt; i++) {
2641 		if (strcmp(vars[i], name) == 0) {
2642 			/* Already have */
2643 			return(0);
2644 		}
2645 	}
2646 	if (vars[cur_cnt] != NULL) {
2647 		printf("Cur_cnt:%d filled with %s??\n",
2648 		       cur_cnt, vars[cur_cnt]);
2649 		exit(-1);
2650 	}
2651 	/* Ok its new */
2652 	len = strlen(name) + 1;
2653 	vars[cur_cnt] = malloc(len);
2654 	if (vars[cur_cnt] == NULL) {
2655 		printf("No memory %s\n", __FUNCTION__);
2656 		exit(-1);
2657 	}
2658 	memset(vars[cur_cnt], 0, len);
2659 	strcpy(vars[cur_cnt], name);
2660 	return(1);
2661 }
2662 
2663 static char *
2664 build_command_for_exp(struct expression *exp)
2665 {
2666 	/*
2667 	 * Build the pmcstat command to handle
2668 	 * the passed in expression.
2669 	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2670 	 * where NNN and QQQ represent the PMC's in the expression
2671 	 * uniquely..
2672 	 */
2673 	char forming[1024];
2674 	int cnt_pmc, alloced_pmcs, i;
2675 	struct expression *at;
2676 	char **vars, *cmd;
2677 	size_t mal;
2678 
2679 	alloced_pmcs = cnt_pmc = 0;
2680 	/* first how many do we have */
2681 	at = exp;
2682 	while (at) {
2683 		if (at->type == TYPE_VALUE_PMC) {
2684 			cnt_pmc++;
2685 		}
2686 		at = at->next;
2687 	}
2688 	if (cnt_pmc == 0) {
2689 		printf("No PMC's in your expression -- nothing to do!!\n");
2690 		exit(0);
2691 	}
2692 	mal = cnt_pmc * sizeof(char *);
2693 	vars = malloc(mal);
2694 	if (vars == NULL) {
2695 		printf("No memory\n");
2696 		exit(-1);
2697 	}
2698 	memset(vars, 0, mal);
2699 	at = exp;
2700 	while (at) {
2701 		if (at->type == TYPE_VALUE_PMC) {
2702 			if(add_it_to(vars, alloced_pmcs, at->name)) {
2703 				alloced_pmcs++;
2704 			}
2705 		}
2706 		at = at->next;
2707 	}
2708 	/* Now we have a unique list in vars so create our command */
2709 	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2710 	for(i=0; i<alloced_pmcs; i++) {
2711 		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2712 	}
2713 	cmd = malloc((mal+2));
2714 	if (cmd == NULL) {
2715 		printf("%s out of mem\n", __FUNCTION__);
2716 		exit(-1);
2717 	}
2718 	memset(cmd, 0, (mal+2));
2719 	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2720 	at = exp;
2721 	for(i=0; i<alloced_pmcs; i++) {
2722 		sprintf(forming, " -s %s", vars[i]);
2723 		strcat(cmd, forming);
2724 		free(vars[i]);
2725 		vars[i] = NULL;
2726 	}
2727 	free(vars);
2728 	return(cmd);
2729 }
2730 
2731 static int
2732 user_expr(struct counters *cpu, int pos)
2733 {
2734 	int ret;
2735 	double res;
2736 	struct counters *var;
2737 	struct expression *at;
2738 
2739 	at = master_exp;
2740 	while (at) {
2741 		if (at->type == TYPE_VALUE_PMC) {
2742 			var = find_counter(cpu, at->name);
2743 			if (var == NULL) {
2744 				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2745 				exit(-1);
2746 			}
2747 			if (pos != -1) {
2748 				at->value = var->vals[pos] * 1.0;
2749 			} else {
2750 				at->value = var->sum * 1.0;
2751 			}
2752 		}
2753 		at = at->next;
2754 	}
2755 	res = run_expr(master_exp, 1, NULL);
2756 	ret = printf("%1.3f", res);
2757 	return(ret);
2758 }
2759 
2760 
2761 static void
2762 set_manual_exp(struct expression *exp)
2763 {
2764 	expression = user_expr;
2765 	command = build_command_for_exp(exp);
2766 	threshold = "User defined threshold";
2767 }
2768 
2769 static void
2770 run_tests(void)
2771 {
2772 	int i, lenout;
2773 	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2774 	printf("------------------------------------------------------------------------\n");
2775 	for(i=0; i<valid_pmc_cnt; i++) {
2776 		lenout = printf("%s", valid_pmcs[i]);
2777 		fflush(stdout);
2778 		test_for_a_pmc(valid_pmcs[i], lenout);
2779 	}
2780 }
2781 static void
2782 list_all(void)
2783 {
2784 	int i, cnt, j;
2785 	printf("PMC                                               Abbreviation\n");
2786 	printf("--------------------------------------------------------------\n");
2787 	for(i=0; i<valid_pmc_cnt; i++) {
2788 		cnt = printf("%s", valid_pmcs[i]);
2789 		for(j=cnt; j<52; j++) {
2790 			printf(" ");
2791 		}
2792 		printf("%%%d\n", i);
2793 	}
2794 }
2795 
2796 
2797 int
2798 main(int argc, char **argv)
2799 {
2800 	int i, j, cnt;
2801 	char *filename=NULL;
2802 	const char *name=NULL;
2803 	int help_only = 0;
2804 	int test_mode = 0;
2805 	int test_at = 0;
2806 
2807 	get_cpuid_set();
2808 	memset(glob_cpu, 0, sizeof(glob_cpu));
2809 	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2810 		switch (i) {
2811 		case 'A':
2812 			run_all = 1;
2813 			break;
2814 		case 'L':
2815 			list_all();
2816 			return(0);
2817 		case 'H':
2818 			printf("**********************************\n");
2819 			explain_all();
2820 			printf("**********************************\n");
2821 			return(0);
2822 			break;
2823 		case 'T':
2824 			test_mode = 1;
2825 			break;
2826 		case 'E':
2827 			master_exp = parse_expression(optarg);
2828 			if (master_exp) {
2829 				set_manual_exp(master_exp);
2830 			}
2831 			break;
2832 		case 'e':
2833 			if (validate_expression(optarg)) {
2834 				printf("Unknown expression %s\n", optarg);
2835 				return(0);
2836 			}
2837 			name = optarg;
2838 			set_expression(optarg);
2839 			break;
2840 		case 'm':
2841 			max_to_collect = strtol(optarg, NULL, 0);
2842 			if (max_to_collect > MAX_COUNTER_SLOTS) {
2843 				/* You can't collect more than max in array */
2844 				max_to_collect = MAX_COUNTER_SLOTS;
2845 			}
2846 			break;
2847 		case 'v':
2848 			verbose++;
2849 			break;
2850 		case 'h':
2851 			help_only = 1;
2852 			break;
2853 		case 'i':
2854 			filename = optarg;
2855 			break;
2856 		case '?':
2857 		default:
2858 		use:
2859 			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2860 			       argv[0]);
2861 			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2862 			printf("-v -- verbose dump debug type things -- you don't want this\n");
2863 			printf("-m N -- maximum to collect is N measurments\n");
2864 			printf("-e expr-name -- Do expression expr-name\n");
2865 			printf("-E 'your expression' -- Do your expression\n");
2866 			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2867 			printf("-H -- Don't run anything, just explain all canned expressions\n");
2868 			printf("-T -- Test all PMC's defined by this processor\n");
2869 			printf("-A -- Run all canned tests\n");
2870 			return(0);
2871 			break;
2872 		}
2873 	}
2874 	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2875 	    (test_mode == 0) && (master_exp == NULL)) {
2876 		printf("Without setting an expression we cannot dynamically gather information\n");
2877 		printf("you must supply a filename (and you probably want verbosity)\n");
2878 		goto use;
2879 	}
2880 	if (run_all && max_to_collect > 10) {
2881 		max_to_collect = 3;
2882 	}
2883 	if (test_mode) {
2884 		run_tests();
2885 		return(0);
2886 	}
2887 	printf("*********************************\n");
2888 	if ((master_exp == NULL) && name) {
2889 		(*the_cpu.explain)(name);
2890 	} else if (master_exp) {
2891 		printf("Examine your expression ");
2892 		print_exp(master_exp);
2893 		printf("User defined threshold\n");
2894 	}
2895 	if (help_only) {
2896 		return(0);
2897 	}
2898 	if (run_all) {
2899 	more:
2900 		name = the_cpu.ents[test_at].name;
2901 		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2902 		test_at++;
2903 		if (set_expression(name) == -1) {
2904 			if (test_at >= the_cpu.number) {
2905 				goto done;
2906 			} else
2907 				goto more;
2908 		}
2909 
2910 	}
2911 	process_file(filename);
2912 	if (verbose >= 2) {
2913 		for (i=0; i<ncnts; i++) {
2914 			printf("Counter:%s cpu:%d index:%d\n",
2915 			       cnts[i].counter_name,
2916 			       cnts[i].cpu, i);
2917 			for(j=0; j<cnts[i].pos; j++) {
2918 				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2919 			}
2920 			printf(" sum - %ld\n", (long int)cnts[i].sum);
2921 		}
2922 	}
2923 	if (expression == NULL) {
2924 		return(0);
2925 	}
2926 	if (max_to_collect > 1) {
2927 		for(i=0, cnt=0; i<MAX_CPU; i++) {
2928 			if (glob_cpu[i]) {
2929 				do_expression(glob_cpu[i], -1);
2930 				cnt++;
2931 				if (cnt == cpu_count_out) {
2932 					printf("\n");
2933 					break;
2934 				} else {
2935 					printf("\t");
2936 				}
2937 			}
2938 		}
2939 	}
2940 	if (run_all && (test_at < the_cpu.number)) {
2941 		memset(glob_cpu, 0, sizeof(glob_cpu));
2942 		ncnts = 0;
2943 		printf("*********************************\n");
2944 		goto more;
2945 	} else if (run_all) {
2946 	done:
2947 		printf("*********************************\n");
2948 	}
2949 	return(0);
2950 }
2951