xref: /freebsd/usr.sbin/pmcstudy/pmcstudy.c (revision ade40db3a2731d88f3c10a811f982c98495f522a)
1 /*-
2  * Copyright (c) 2014, 2015 Netflix Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/errno.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <getopt.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
40 
41 #define MAX_COUNTER_SLOTS 1024
42 #define MAX_NLEN 64
43 #define MAX_CPU 64
44 static int verbose = 0;
45 
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
49 
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
54 int valid_pmc_cnt=0;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
57 
58 /*
59  * The following two varients on popen and pclose with
60  * the cavet that they get you the PID so that you
61  * can supply it to pclose so it can send a SIGTERM
62  *  to the process.
63  */
64 static FILE *
65 my_popen(const char *command, const char *dir, pid_t *p_pid)
66 {
67 	FILE *io_out, *io_in;
68 	int pdesin[2], pdesout[2];
69 	char *argv[4];
70 	pid_t pid;
71 	char cmd[4];
72 	char cmd2[1024];
73 	char arg1[4];
74 
75 	if ((strcmp(dir, "r") != 0) &&
76 	    (strcmp(dir, "w") != 0)) {
77 		errno = EINVAL;
78 		return(NULL);
79 	}
80 	if (pipe(pdesin) < 0)
81 		return (NULL);
82 
83 	if (pipe(pdesout) < 0) {
84 		(void)close(pdesin[0]);
85 		(void)close(pdesin[1]);
86 		return (NULL);
87 	}
88 	strcpy(cmd, "sh");
89 	strcpy(arg1, "-c");
90 	strcpy(cmd2, command);
91 	argv[0] = cmd;
92 	argv[1] = arg1;
93 	argv[2] = cmd2;
94 	argv[3] = NULL;
95 
96 	switch (pid = fork()) {
97 	case -1:			/* Error. */
98 		(void)close(pdesin[0]);
99 		(void)close(pdesin[1]);
100 		(void)close(pdesout[0]);
101 		(void)close(pdesout[1]);
102 		return (NULL);
103 		/* NOTREACHED */
104 	case 0:				/* Child. */
105 		/* Close out un-used sides */
106 		(void)close(pdesin[1]);
107 		(void)close(pdesout[0]);
108 		/* Now prepare the stdin of the process */
109 		close(0);
110 		(void)dup(pdesin[0]);
111 		(void)close(pdesin[0]);
112 		/* Now prepare the stdout of the process */
113 		close(1);
114 		(void)dup(pdesout[1]);
115 		/* And lets do stderr just in case */
116 		close(2);
117 		(void)dup(pdesout[1]);
118 		(void)close(pdesout[1]);
119 		/* Now run it */
120 		execve("/bin/sh", argv, environ);
121 		exit(127);
122 		/* NOTREACHED */
123 	}
124 	/* Parent; assume fdopen can't fail. */
125 	/* Store the pid */
126 	*p_pid = pid;
127 	if (strcmp(dir, "r") != 0) {
128 		io_out = fdopen(pdesin[1], "w");
129 		(void)close(pdesin[0]);
130 		(void)close(pdesout[0]);
131 		(void)close(pdesout[1]);
132 		return(io_out);
133  	} else {
134 		/* Prepare the input stream */
135 		io_in = fdopen(pdesout[0], "r");
136 		(void)close(pdesout[1]);
137 		(void)close(pdesin[0]);
138 		(void)close(pdesin[1]);
139 		return (io_in);
140 	}
141 }
142 
143 /*
144  * pclose --
145  *	Pclose returns -1 if stream is not associated with a `popened' command,
146  *	if already `pclosed', or waitpid returns an error.
147  */
148 static void
149 my_pclose(FILE *io, pid_t the_pid)
150 {
151 	int pstat;
152 	pid_t pid;
153 
154 	/*
155 	 * Find the appropriate file pointer and remove it from the list.
156 	 */
157 	(void)fclose(io);
158 	/* Die if you are not dead! */
159 	kill(the_pid, SIGTERM);
160 	do {
161 		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162 	} while (pid == -1 && errno == EINTR);
163 }
164 
165 struct counters {
166 	struct counters *next_cpu;
167 	char counter_name[MAX_NLEN];		/* Name of counter */
168 	int cpu;				/* CPU we are on */
169 	int pos;				/* Index we are filling to. */
170 	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
171 	uint64_t sum;				/* Summary of entries */
172 };
173 
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
176 
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
179 
180 extern int ncnts;
181 int ncnts=0;
182 
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
185 
186 static const char *threshold=NULL;
187 static const char *command;
188 
189 struct cpu_entry {
190 	const char *name;
191 	const char *thresh;
192 	const char *command;
193 	int (*func)(struct counters *, int);
194 };
195 
196 
197 struct cpu_type {
198 	char cputype[32];
199 	int number;
200 	struct cpu_entry *ents;
201 	void (*explain)(const char *name);
202 };
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
205 
206 static void
207 explain_name_sb(const char *name)
208 {
209 	const char *mythresh;
210 	if (strcmp(name, "allocstall1") == 0) {
211 		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212 		mythresh = "thresh > .05";
213 	} else if (strcmp(name, "allocstall2") == 0) {
214 		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215 		mythresh = "thresh > .05";
216 	} else if (strcmp(name, "br_miss") == 0) {
217 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218 		mythresh = "thresh >= .2";
219 	} else if (strcmp(name, "splitload") == 0) {
220 		printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221 		mythresh = "thresh >= .1";
222 	} else if (strcmp(name, "splitstore") == 0) {
223 		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
224 		mythresh = "thresh >= .01";
225 	} else if (strcmp(name, "contested") == 0) {
226 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227 		mythresh = "thresh >= .05";
228 	} else if (strcmp(name, "blockstorefwd") == 0) {
229 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230 		mythresh = "thresh >= .05";
231 	} else if (strcmp(name, "cache2") == 0) {
232 		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235 		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236 		mythresh = "thresh >= .2";
237 	} else if (strcmp(name, "cache1") == 0) {
238 		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239 		mythresh = "thresh >= .2";
240 	} else if (strcmp(name, "dtlbmissload") == 0) {
241 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243 		mythresh = "thresh >= .1";
244 	} else if (strcmp(name, "frontendstall") == 0) {
245 		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246 		mythresh = "thresh >= .15";
247 	} else if (strcmp(name, "clears") == 0) {
248 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249 		printf("          MACHINE_CLEARS.SMC + \n");
250 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251 		mythresh = "thresh >= .02";
252 	} else if (strcmp(name, "microassist") == 0) {
253 		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255 		mythresh = "thresh >= .05";
256 	} else if (strcmp(name, "aliasing_4k") == 0) {
257 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258 		mythresh = "thresh >= .1";
259 	} else if (strcmp(name, "fpassist") == 0) {
260 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261 		mythresh = "look for a excessive value";
262 	} else if (strcmp(name, "otherassistavx") == 0) {
263 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264 		mythresh = "look for a excessive value";
265 	} else if (strcmp(name, "otherassistsse") == 0) {
266 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 		mythresh = "look for a excessive value";
268 	} else if (strcmp(name, "eff1") == 0) {
269 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270 		mythresh = "thresh < .9";
271 	} else if (strcmp(name, "eff2") == 0) {
272 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273 		mythresh = "thresh > 1.0";
274 	} else if (strcmp(name, "dtlbmissstore") == 0) {
275 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277 		mythresh = "thresh >= .05";
278 	} else {
279 		printf("Unknown name:%s\n", name);
280 		mythresh = "unknown entry";
281         }
282 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283 }
284 
285 static void
286 explain_name_ib(const char *name)
287 {
288 	const char *mythresh;
289 	if (strcmp(name, "br_miss") == 0) {
290 		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291 		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292 		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293 		mythresh = "thresh >= .2";
294 	} else if (strcmp(name, "eff1") == 0) {
295 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296 		mythresh = "thresh < .9";
297 	} else if (strcmp(name, "eff2") == 0) {
298 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299 		mythresh = "thresh > 1.0";
300 	} else if (strcmp(name, "cache1") == 0) {
301 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302 		mythresh = "thresh >= .2";
303 	} else if (strcmp(name, "cache2") == 0) {
304 		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305 		mythresh = "thresh >= .2";
306 	} else if (strcmp(name, "itlbmiss") == 0) {
307 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308 		mythresh = "thresh > .05";
309 	} else if (strcmp(name, "icachemiss") == 0) {
310 		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311 		mythresh = "thresh > .05";
312 	} else if (strcmp(name, "lcpstall") == 0) {
313 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314 		mythresh = "thresh > .05";
315 	} else if (strcmp(name, "datashare") == 0) {
316 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317 		mythresh = "thresh > .05";
318 	} else if (strcmp(name, "blockstorefwd") == 0) {
319 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320 		mythresh = "thresh >= .05";
321 	} else if (strcmp(name, "splitload") == 0) {
322 		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323 		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324 		mythresh = "thresh >= .1";
325 	} else if (strcmp(name, "splitstore") == 0) {
326 		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
327 		mythresh = "thresh >= .01";
328 	} else if (strcmp(name, "aliasing_4k") == 0) {
329 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330 		mythresh = "thresh >= .1";
331 	} else if (strcmp(name, "dtlbmissload") == 0) {
332 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334 		mythresh = "thresh >= .1";
335 	} else if (strcmp(name, "dtlbmissstore") == 0) {
336 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338 		mythresh = "thresh >= .05";
339 	} else if (strcmp(name, "contested") == 0) {
340 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341 		mythresh = "thresh >= .05";
342 	} else if (strcmp(name, "clears") == 0) {
343 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344 		printf("          MACHINE_CLEARS.SMC + \n");
345 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346 		mythresh = "thresh >= .02";
347 	} else if (strcmp(name, "microassist") == 0) {
348 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350 		mythresh = "thresh >= .05";
351 	} else if (strcmp(name, "fpassist") == 0) {
352 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353 		mythresh = "look for a excessive value";
354 	} else if (strcmp(name, "otherassistavx") == 0) {
355 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356 		mythresh = "look for a excessive value";
357 	} else if (strcmp(name, "otherassistsse") == 0) {
358 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 		mythresh = "look for a excessive value";
360 	} else {
361 		printf("Unknown name:%s\n", name);
362 		mythresh = "unknown entry";
363         }
364 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365 }
366 
367 
368 static void
369 explain_name_has(const char *name)
370 {
371 	const char *mythresh;
372 	if (strcmp(name, "eff1") == 0) {
373 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374 		mythresh = "thresh < .75";
375 	} else if (strcmp(name, "eff2") == 0) {
376 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377 		mythresh = "thresh > 1.0";
378 	} else if (strcmp(name, "itlbmiss") == 0) {
379 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380 		mythresh = "thresh > .05";
381 	} else if (strcmp(name, "icachemiss") == 0) {
382 		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383 		mythresh = "thresh > .05";
384 	} else if (strcmp(name, "lcpstall") == 0) {
385 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386 		mythresh = "thresh > .05";
387 	} else if (strcmp(name, "cache1") == 0) {
388 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389 		mythresh = "thresh >= .2";
390 	} else if (strcmp(name, "cache2") == 0) {
391 		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394 		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395 		mythresh = "thresh >= .2";
396 	} else if (strcmp(name, "contested") == 0) {
397 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398 		mythresh = "thresh >= .05";
399 	} else if (strcmp(name, "datashare") == 0) {
400 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401 		mythresh = "thresh > .05";
402 	} else if (strcmp(name, "blockstorefwd") == 0) {
403 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404 		mythresh = "thresh >= .05";
405 	} else if (strcmp(name, "splitload") == 0) {
406 		printf("Examine  (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407 		mythresh = "thresh >= .1";
408 	} else if (strcmp(name, "splitstore") == 0) {
409 		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
410 		mythresh = "thresh >= .01";
411 	} else if (strcmp(name, "aliasing_4k") == 0) {
412 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413 		mythresh = "thresh >= .1";
414 	} else if (strcmp(name, "dtlbmissload") == 0) {
415 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417 		mythresh = "thresh >= .1";
418 	} else if (strcmp(name, "br_miss") == 0) {
419 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420 		mythresh = "thresh >= .2";
421 	} else if (strcmp(name, "clears") == 0) {
422 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423 		printf("          MACHINE_CLEARS.SMC + \n");
424 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425 		mythresh = "thresh >= .02";
426 	} else if (strcmp(name, "microassist") == 0) {
427 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429 		mythresh = "thresh >= .05";
430 	} else if (strcmp(name, "fpassist") == 0) {
431 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432 		mythresh = "look for a excessive value";
433 	} else if (strcmp(name, "otherassistavx") == 0) {
434 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435 		mythresh = "look for a excessive value";
436 	} else if (strcmp(name, "otherassistsse") == 0) {
437 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 		mythresh = "look for a excessive value";
439 	} else {
440 		printf("Unknown name:%s\n", name);
441 		mythresh = "unknown entry";
442         }
443 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444 }
445 
446 static struct counters *
447 find_counter(struct counters *base, const char *name)
448 {
449 	struct counters *at;
450 	int len;
451 
452 	at = base;
453 	len = strlen(name);
454 	while(at) {
455 		if (strncmp(at->counter_name, name, len) == 0) {
456 			return(at);
457 		}
458 		at = at->next_cpu;
459 	}
460 	printf("Can't find counter %s\n", name);
461 	printf("We have:\n");
462 	at = base;
463 	while(at) {
464 		printf("- %s\n", at->counter_name);
465 		at = at->next_cpu;
466 	}
467 	exit(-1);
468 }
469 
470 static int
471 allocstall1(struct counters *cpu, int pos)
472 {
473 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
474 	int ret;
475 	struct counters *partial;
476 	struct counters *unhalt;
477 	double un, par, res;
478 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
479 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
480 	if (pos != -1) {
481 		par = partial->vals[pos] * 1.0;
482 		un = unhalt->vals[pos] * 1.0;
483 	} else {
484 		par = partial->sum * 1.0;
485 		un = unhalt->sum * 1.0;
486 	}
487 	res = par/un;
488 	ret = printf("%1.3f", res);
489 	return(ret);
490 }
491 
492 static int
493 allocstall2(struct counters *cpu, int pos)
494 {
495 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
496 	int ret;
497 	struct counters *partial;
498 	struct counters *unhalt;
499 	double un, par, res;
500 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
501 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
502 	if (pos != -1) {
503 		par = partial->vals[pos] * 1.0;
504 		un = unhalt->vals[pos] * 1.0;
505 	} else {
506 		par = partial->sum * 1.0;
507 		un = unhalt->sum * 1.0;
508 	}
509 	res = par/un;
510 	ret = printf("%1.3f", res);
511 	return(ret);
512 }
513 
514 static int
515 br_mispredict(struct counters *cpu, int pos)
516 {
517 	struct counters *brctr;
518 	struct counters *unhalt;
519 	int ret;
520 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
521 	double br, un, con, res;
522 	con = 20.0;
523 
524 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
525         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
526 	if (pos != -1) {
527 		br = brctr->vals[pos] * 1.0;
528 		un = unhalt->vals[pos] * 1.0;
529 	} else {
530 		br = brctr->sum * 1.0;
531 		un = unhalt->sum * 1.0;
532 	}
533 	res = (con * br)/un;
534  	ret = printf("%1.3f", res);
535 	return(ret);
536 }
537 
538 static int
539 br_mispredictib(struct counters *cpu, int pos)
540 {
541 	struct counters *brctr;
542 	struct counters *unhalt;
543 	struct counters *clear, *clear2, *clear3;
544 	struct counters *uops;
545 	struct counters *recv;
546 	struct counters *iss;
547 /*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
548 	int ret;
549         /*
550 	 * (BR_MISP_RETIRED.ALL_BRANCHES /
551 	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
552 	 *          MACHINE_CLEAR.COUNT) *
553 	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
554 	 *
555 	 */
556 	double br, cl, cl2, cl3, uo, re, un, con, res, is;
557 	con = 4.0;
558 
559 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
560         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
561 	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
562 	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
563 	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
564 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
565 	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
566 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
567 	if (pos != -1) {
568 		br = brctr->vals[pos] * 1.0;
569 		cl = clear->vals[pos] * 1.0;
570 		cl2 = clear2->vals[pos] * 1.0;
571 		cl3 = clear3->vals[pos] * 1.0;
572 		uo = uops->vals[pos] * 1.0;
573 		re = recv->vals[pos] * 1.0;
574 		is = iss->vals[pos] * 1.0;
575 		un = unhalt->vals[pos] * 1.0;
576 	} else {
577 		br = brctr->sum * 1.0;
578 		cl = clear->sum * 1.0;
579 		cl2 = clear2->sum * 1.0;
580 		cl3 = clear3->sum * 1.0;
581 		uo = uops->sum * 1.0;
582 		re = recv->sum * 1.0;
583 		is = iss->sum * 1.0;
584 		un = unhalt->sum * 1.0;
585 	}
586 	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
587  	ret = printf("%1.3f", res);
588 	return(ret);
589 }
590 
591 static int
592 br_mispredict_broad(struct counters *cpu, int pos)
593 {
594 	struct counters *brctr;
595 	struct counters *unhalt;
596 	struct counters *clear;
597 	struct counters *uops;
598 	struct counters *uops_ret;
599 	struct counters *recv;
600 	int ret;
601 	double br, cl, uo, uo_r, re, con, un, res;
602 
603 	con = 4.0;
604 
605 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
606         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
607 	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
608 	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
609 	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
610 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
611 
612 	if (pos != -1) {
613 		un = unhalt->vals[pos] * 1.0;
614 		br = brctr->vals[pos] * 1.0;
615 		cl = clear->vals[pos] * 1.0;
616 		uo = uops->vals[pos] * 1.0;
617 		uo_r = uops_ret->vals[pos] * 1.0;
618 		re = recv->vals[pos] * 1.0;
619 	} else {
620 		un = unhalt->sum * 1.0;
621 		br = brctr->sum * 1.0;
622 		cl = clear->sum * 1.0;
623 		uo = uops->sum * 1.0;
624 		uo_r = uops_ret->sum * 1.0;
625 		re = recv->sum * 1.0;
626 	}
627 	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
628  	ret = printf("%1.3f", res);
629 	return(ret);
630 }
631 
632 static int
633 splitloadib(struct counters *cpu, int pos)
634 {
635 	int ret;
636 	struct counters *mem;
637 	struct counters *l1d, *ldblock;
638 	struct counters *unhalt;
639 	double un, memd, res, l1, ldb;
640         /*
641 	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
642 	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
643 	 */
644 
645 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
646 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
647 	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
648 	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
649 	if (pos != -1) {
650 		memd = mem->vals[pos] * 1.0;
651 		l1 = l1d->vals[pos] * 1.0;
652 		ldb = ldblock->vals[pos] * 1.0;
653 		un = unhalt->vals[pos] * 1.0;
654 	} else {
655 		memd = mem->sum * 1.0;
656 		l1 = l1d->sum * 1.0;
657 		ldb = ldblock->sum * 1.0;
658 		un = unhalt->sum * 1.0;
659 	}
660 	res = ((l1 / memd) * ldb)/un;
661 	ret = printf("%1.3f", res);
662 	return(ret);
663 }
664 
665 static int
666 splitload(struct counters *cpu, int pos)
667 {
668 	int ret;
669 	struct counters *mem;
670 	struct counters *unhalt;
671 	double con, un, memd, res;
672 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
673 
674 	con = 5.0;
675 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
676 	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
677 	if (pos != -1) {
678 		memd = mem->vals[pos] * 1.0;
679 		un = unhalt->vals[pos] * 1.0;
680 	} else {
681 		memd = mem->sum * 1.0;
682 		un = unhalt->sum * 1.0;
683 	}
684 	res = (memd * con)/un;
685 	ret = printf("%1.3f", res);
686 	return(ret);
687 }
688 
689 static int
690 splitstore(struct counters *cpu, int pos)
691 {
692         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
693 	int ret;
694 	struct counters *mem_split;
695 	struct counters *mem_stores;
696 	double memsplit, memstore, res;
697 	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
698 	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
699 	if (pos != -1) {
700 		memsplit = mem_split->vals[pos] * 1.0;
701 		memstore = mem_stores->vals[pos] * 1.0;
702 	} else {
703 		memsplit = mem_split->sum * 1.0;
704 		memstore = mem_stores->sum * 1.0;
705 	}
706 	res = memsplit/memstore;
707 	ret = printf("%1.3f", res);
708 	return(ret);
709 }
710 
711 
712 static int
713 contested(struct counters *cpu, int pos)
714 {
715         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
716 	int ret;
717 	struct counters *mem;
718 	struct counters *unhalt;
719 	double con, un, memd, res;
720 
721 	con = 60.0;
722 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
723 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
724 	if (pos != -1) {
725 		memd = mem->vals[pos] * 1.0;
726 		un = unhalt->vals[pos] * 1.0;
727 	} else {
728 		memd = mem->sum * 1.0;
729 		un = unhalt->sum * 1.0;
730 	}
731 	res = (memd * con)/un;
732 	ret = printf("%1.3f", res);
733 	return(ret);
734 }
735 
736 static int
737 contested_has(struct counters *cpu, int pos)
738 {
739         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
740 	int ret;
741 	struct counters *mem;
742 	struct counters *unhalt;
743 	double con, un, memd, res;
744 
745 	con = 84.0;
746 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
747 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
748 	if (pos != -1) {
749 		memd = mem->vals[pos] * 1.0;
750 		un = unhalt->vals[pos] * 1.0;
751 	} else {
752 		memd = mem->sum * 1.0;
753 		un = unhalt->sum * 1.0;
754 	}
755 	res = (memd * con)/un;
756 	ret = printf("%1.3f", res);
757 	return(ret);
758 }
759 
760 static int
761 contestedbroad(struct counters *cpu, int pos)
762 {
763         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
764 	int ret;
765 	struct counters *mem;
766 	struct counters *mem2;
767 	struct counters *unhalt;
768 	double con, un, memd, memtoo, res;
769 
770 	con = 84.0;
771 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
772 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
773 	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
774 
775 	if (pos != -1) {
776 		memd = mem->vals[pos] * 1.0;
777 		memtoo = mem2->vals[pos] * 1.0;
778 		un = unhalt->vals[pos] * 1.0;
779 	} else {
780 		memd = mem->sum * 1.0;
781 		memtoo = mem2->sum * 1.0;
782 		un = unhalt->sum * 1.0;
783 	}
784 	res = ((memd * con) + memtoo)/un;
785 	ret = printf("%1.3f", res);
786 	return(ret);
787 }
788 
789 
790 static int
791 blockstoreforward(struct counters *cpu, int pos)
792 {
793         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
794 	int ret;
795 	struct counters *ldb;
796 	struct counters *unhalt;
797 	double con, un, ld, res;
798 
799 	con = 13.0;
800 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
801 	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
802 	if (pos != -1) {
803 		ld = ldb->vals[pos] * 1.0;
804 		un = unhalt->vals[pos] * 1.0;
805 	} else {
806 		ld = ldb->sum * 1.0;
807 		un = unhalt->sum * 1.0;
808 	}
809 	res = (ld * con)/un;
810 	ret = printf("%1.3f", res);
811 	return(ret);
812 }
813 
814 static int
815 cache2(struct counters *cpu, int pos)
816 {
817 	/* ** Suspect ***
818 	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
819 	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
820 	 */
821 	int ret;
822 	struct counters *mem1, *mem2, *mem3;
823 	struct counters *unhalt;
824 	double con1, con2, con3, un, me_1, me_2, me_3, res;
825 
826 	con1 = 26.0;
827 	con2 = 43.0;
828 	con3 = 60.0;
829 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
830 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
831 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
832 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
833 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
834 	if (pos != -1) {
835 		me_1 = mem1->vals[pos] * 1.0;
836 		me_2 = mem2->vals[pos] * 1.0;
837 		me_3 = mem3->vals[pos] * 1.0;
838 		un = unhalt->vals[pos] * 1.0;
839 	} else {
840 		me_1 = mem1->sum * 1.0;
841 		me_2 = mem2->sum * 1.0;
842 		me_3 = mem3->sum * 1.0;
843 		un = unhalt->sum * 1.0;
844 	}
845 	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
846 	ret = printf("%1.3f", res);
847 	return(ret);
848 }
849 
850 static int
851 datasharing(struct counters *cpu, int pos)
852 {
853 	/*
854 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
855 	 */
856 	int ret;
857 	struct counters *mem;
858 	struct counters *unhalt;
859 	double con, res, me, un;
860 
861 	con = 43.0;
862 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
863 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
864 	if (pos != -1) {
865 		me = mem->vals[pos] * 1.0;
866 		un = unhalt->vals[pos] * 1.0;
867 	} else {
868 		me = mem->sum * 1.0;
869 		un = unhalt->sum * 1.0;
870 	}
871 	res = (me * con)/un;
872 	ret = printf("%1.3f", res);
873 	return(ret);
874 
875 }
876 
877 
878 static int
879 datasharing_has(struct counters *cpu, int pos)
880 {
881 	/*
882 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
883 	 */
884 	int ret;
885 	struct counters *mem;
886 	struct counters *unhalt;
887 	double con, res, me, un;
888 
889 	con = 72.0;
890 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
891 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
892 	if (pos != -1) {
893 		me = mem->vals[pos] * 1.0;
894 		un = unhalt->vals[pos] * 1.0;
895 	} else {
896 		me = mem->sum * 1.0;
897 		un = unhalt->sum * 1.0;
898 	}
899 	res = (me * con)/un;
900 	ret = printf("%1.3f", res);
901 	return(ret);
902 
903 }
904 
905 
906 static int
907 cache2ib(struct counters *cpu, int pos)
908 {
909         /*
910 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
911 	 */
912 	int ret;
913 	struct counters *mem;
914 	struct counters *unhalt;
915 	double con, un, me, res;
916 
917 	con = 29.0;
918 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
919 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
920 	if (pos != -1) {
921 		me = mem->vals[pos] * 1.0;
922 		un = unhalt->vals[pos] * 1.0;
923 	} else {
924 		me = mem->sum * 1.0;
925 		un = unhalt->sum * 1.0;
926 	}
927 	res = (con * me)/un;
928 	ret = printf("%1.3f", res);
929 	return(ret);
930 }
931 
932 static int
933 cache2has(struct counters *cpu, int pos)
934 {
935 	/*
936 	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
937 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
938 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
939 	 *           / CPU_CLK_UNHALTED.THREAD_P
940 	 */
941 	int ret;
942 	struct counters *mem1, *mem2, *mem3;
943 	struct counters *unhalt;
944 	double con1, con2, con3, un, me1, me2, me3, res;
945 
946 	con1 = 36.0;
947 	con2 = 72.0;
948 	con3 = 84.0;
949 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
950 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
951 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
952 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
953 	if (pos != -1) {
954 		me1 = mem1->vals[pos] * 1.0;
955 		me2 = mem2->vals[pos] * 1.0;
956 		me3 = mem3->vals[pos] * 1.0;
957 		un = unhalt->vals[pos] * 1.0;
958 	} else {
959 		me1 = mem1->sum * 1.0;
960 		me2 = mem2->sum * 1.0;
961 		me3 = mem3->sum * 1.0;
962 		un = unhalt->sum * 1.0;
963 	}
964 	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
965 	ret = printf("%1.3f", res);
966 	return(ret);
967 }
968 
969 static int
970 cache2broad(struct counters *cpu, int pos)
971 {
972         /*
973 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
974 	 */
975 	int ret;
976 	struct counters *mem;
977 	struct counters *unhalt;
978 	double con, un, me, res;
979 
980 	con = 36.0;
981 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
982 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
983 	if (pos != -1) {
984 		me = mem->vals[pos] * 1.0;
985 		un = unhalt->vals[pos] * 1.0;
986 	} else {
987 		me = mem->sum * 1.0;
988 		un = unhalt->sum * 1.0;
989 	}
990 	res = (con * me)/un;
991 	ret = printf("%1.3f", res);
992 	return(ret);
993 }
994 
995 
996 static int
997 cache1(struct counters *cpu, int pos)
998 {
999 	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1000 	int ret;
1001 	struct counters *mem;
1002 	struct counters *unhalt;
1003 	double con, un, me, res;
1004 
1005 	con = 180.0;
1006 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1007 	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1008 	if (pos != -1) {
1009 		me = mem->vals[pos] * 1.0;
1010 		un = unhalt->vals[pos] * 1.0;
1011 	} else {
1012 		me = mem->sum * 1.0;
1013 		un = unhalt->sum * 1.0;
1014 	}
1015 	res = (me * con)/un;
1016 	ret = printf("%1.3f", res);
1017 	return(ret);
1018 }
1019 
1020 static int
1021 cache1ib(struct counters *cpu, int pos)
1022 {
1023 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1024 	int ret;
1025 	struct counters *mem;
1026 	struct counters *unhalt;
1027 	double con, un, me, res;
1028 
1029 	con = 180.0;
1030 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1031 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1032 	if (pos != -1) {
1033 		me = mem->vals[pos] * 1.0;
1034 		un = unhalt->vals[pos] * 1.0;
1035 	} else {
1036 		me = mem->sum * 1.0;
1037 		un = unhalt->sum * 1.0;
1038 	}
1039 	res = (me * con)/un;
1040 	ret = printf("%1.3f", res);
1041 	return(ret);
1042 }
1043 
1044 
1045 static int
1046 cache1broad(struct counters *cpu, int pos)
1047 {
1048 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1049 	int ret;
1050 	struct counters *mem;
1051 	struct counters *unhalt;
1052 	double con, un, me, res;
1053 
1054 	con = 180.0;
1055 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1056 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1057 	if (pos != -1) {
1058 		me = mem->vals[pos] * 1.0;
1059 		un = unhalt->vals[pos] * 1.0;
1060 	} else {
1061 		me = mem->sum * 1.0;
1062 		un = unhalt->sum * 1.0;
1063 	}
1064 	res = (me * con)/un;
1065 	ret = printf("%1.3f", res);
1066 	return(ret);
1067 }
1068 
1069 
1070 static int
1071 dtlb_missload(struct counters *cpu, int pos)
1072 {
1073 	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1074 	int ret;
1075 	struct counters *dtlb_m, *dtlb_d;
1076 	struct counters *unhalt;
1077 	double con, un, d1, d2, res;
1078 
1079 	con = 7.0;
1080 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1081 	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1082 	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1083 	if (pos != -1) {
1084 		d1 = dtlb_m->vals[pos] * 1.0;
1085 		d2 = dtlb_d->vals[pos] * 1.0;
1086 		un = unhalt->vals[pos] * 1.0;
1087 	} else {
1088 		d1 = dtlb_m->sum * 1.0;
1089 		d2 = dtlb_d->sum * 1.0;
1090 		un = unhalt->sum * 1.0;
1091 	}
1092 	res = ((d1 * con) + d2)/un;
1093 	ret = printf("%1.3f", res);
1094 	return(ret);
1095 }
1096 
1097 static int
1098 dtlb_missstore(struct counters *cpu, int pos)
1099 {
1100         /*
1101 	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1102 	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1103 	 */
1104         int ret;
1105         struct counters *dtsb_m, *dtsb_d;
1106         struct counters *unhalt;
1107         double con, un, d1, d2, res;
1108 
1109         con = 7.0;
1110         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1111         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1112         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1113         if (pos != -1) {
1114                 d1 = dtsb_m->vals[pos] * 1.0;
1115                 d2 = dtsb_d->vals[pos] * 1.0;
1116                 un = unhalt->vals[pos] * 1.0;
1117         } else {
1118                 d1 = dtsb_m->sum * 1.0;
1119                 d2 = dtsb_d->sum * 1.0;
1120                 un = unhalt->sum * 1.0;
1121         }
1122         res = ((d1 * con) + d2)/un;
1123         ret = printf("%1.3f", res);
1124         return(ret);
1125 }
1126 
1127 static int
1128 itlb_miss(struct counters *cpu, int pos)
1129 {
1130 	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1131 	int ret;
1132 	struct counters *itlb;
1133 	struct counters *unhalt;
1134 	double un, d1, res;
1135 
1136 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1137 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1138 	if (pos != -1) {
1139 		d1 = itlb->vals[pos] * 1.0;
1140 		un = unhalt->vals[pos] * 1.0;
1141 	} else {
1142 		d1 = itlb->sum * 1.0;
1143 		un = unhalt->sum * 1.0;
1144 	}
1145 	res = d1/un;
1146 	ret = printf("%1.3f", res);
1147 	return(ret);
1148 }
1149 
1150 
1151 static int
1152 itlb_miss_broad(struct counters *cpu, int pos)
1153 {
1154 	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1155 	int ret;
1156 	struct counters *itlb;
1157 	struct counters *unhalt;
1158 	struct counters *four_k;
1159 	double un, d1, res, k;
1160 
1161 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1162 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1163 	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1164 	if (pos != -1) {
1165 		d1 = itlb->vals[pos] * 1.0;
1166 		un = unhalt->vals[pos] * 1.0;
1167 		k = four_k->vals[pos] * 1.0;
1168 	} else {
1169 		d1 = itlb->sum * 1.0;
1170 		un = unhalt->sum * 1.0;
1171 		k = four_k->sum * 1.0;
1172 	}
1173 	res = (7.0 * k + d1)/un;
1174 	ret = printf("%1.3f", res);
1175 	return(ret);
1176 }
1177 
1178 
1179 static int
1180 icache_miss(struct counters *cpu, int pos)
1181 {
1182 	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1183 
1184 	int ret;
1185 	struct counters *itlb, *icache;
1186 	struct counters *unhalt;
1187 	double un, d1, ic, res;
1188 
1189 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1190 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1191 	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1192 	if (pos != -1) {
1193 		d1 = itlb->vals[pos] * 1.0;
1194 		ic = icache->vals[pos] * 1.0;
1195 		un = unhalt->vals[pos] * 1.0;
1196 	} else {
1197 		d1 = itlb->sum * 1.0;
1198 		ic = icache->sum * 1.0;
1199 		un = unhalt->sum * 1.0;
1200 	}
1201 	res = (ic-d1)/un;
1202 	ret = printf("%1.3f", res);
1203 	return(ret);
1204 
1205 }
1206 
1207 static int
1208 icache_miss_has(struct counters *cpu, int pos)
1209 {
1210 	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1211 
1212 	int ret;
1213 	struct counters *icache;
1214 	struct counters *unhalt;
1215 	double un, con, ic, res;
1216 
1217 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1218 	icache = find_counter(cpu, "ICACHE.MISSES");
1219 	con = 36.0;
1220 	if (pos != -1) {
1221 		ic = icache->vals[pos] * 1.0;
1222 		un = unhalt->vals[pos] * 1.0;
1223 	} else {
1224 		ic = icache->sum * 1.0;
1225 		un = unhalt->sum * 1.0;
1226 	}
1227 	res = (con * ic)/un;
1228 	ret = printf("%1.3f", res);
1229 	return(ret);
1230 
1231 }
1232 
1233 static int
1234 lcp_stall(struct counters *cpu, int pos)
1235 {
1236          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1237 	int ret;
1238 	struct counters *ild;
1239 	struct counters *unhalt;
1240 	double un, d1, res;
1241 
1242 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1243 	ild = find_counter(cpu, "ILD_STALL.LCP");
1244 	if (pos != -1) {
1245 		d1 = ild->vals[pos] * 1.0;
1246 		un = unhalt->vals[pos] * 1.0;
1247 	} else {
1248 		d1 = ild->sum * 1.0;
1249 		un = unhalt->sum * 1.0;
1250 	}
1251 	res = d1/un;
1252 	ret = printf("%1.3f", res);
1253 	return(ret);
1254 
1255 }
1256 
1257 
1258 static int
1259 frontendstall(struct counters *cpu, int pos)
1260 {
1261       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1262 	int ret;
1263 	struct counters *idq;
1264 	struct counters *unhalt;
1265 	double con, un, id, res;
1266 
1267 	con = 4.0;
1268 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1269 	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1270 	if (pos != -1) {
1271 		id = idq->vals[pos] * 1.0;
1272 		un = unhalt->vals[pos] * 1.0;
1273 	} else {
1274 		id = idq->sum * 1.0;
1275 		un = unhalt->sum * 1.0;
1276 	}
1277 	res = id/(un * con);
1278 	ret = printf("%1.3f", res);
1279 	return(ret);
1280 }
1281 
1282 static int
1283 clears(struct counters *cpu, int pos)
1284 {
1285 	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1286 	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1287 
1288 	int ret;
1289 	struct counters *clr1, *clr2, *clr3;
1290 	struct counters *unhalt;
1291 	double con, un, cl1, cl2, cl3, res;
1292 
1293 	con = 100.0;
1294 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1295 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1296 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1297 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1298 
1299 	if (pos != -1) {
1300 		cl1 = clr1->vals[pos] * 1.0;
1301 		cl2 = clr2->vals[pos] * 1.0;
1302 		cl3 = clr3->vals[pos] * 1.0;
1303 		un = unhalt->vals[pos] * 1.0;
1304 	} else {
1305 		cl1 = clr1->sum * 1.0;
1306 		cl2 = clr2->sum * 1.0;
1307 		cl3 = clr3->sum * 1.0;
1308 		un = unhalt->sum * 1.0;
1309 	}
1310 	res = ((cl1 + cl2 + cl3) * con)/un;
1311 	ret = printf("%1.3f", res);
1312 	return(ret);
1313 }
1314 
1315 static int
1316 clears_broad(struct counters *cpu, int pos)
1317 {
1318 	int ret;
1319 	struct counters *clr1, *clr2, *clr3, *cyc;
1320 	struct counters *unhalt;
1321 	double con, un, cl1, cl2, cl3, cy, res;
1322 
1323 	con = 100.0;
1324 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1325 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1326 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1327 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1328 	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1329 	if (pos != -1) {
1330 		cl1 = clr1->vals[pos] * 1.0;
1331 		cl2 = clr2->vals[pos] * 1.0;
1332 		cl3 = clr3->vals[pos] * 1.0;
1333 		cy = cyc->vals[pos] * 1.0;
1334 		un = unhalt->vals[pos] * 1.0;
1335 	} else {
1336 		cl1 = clr1->sum * 1.0;
1337 		cl2 = clr2->sum * 1.0;
1338 		cl3 = clr3->sum * 1.0;
1339 		cy = cyc->sum * 1.0;
1340 		un = unhalt->sum * 1.0;
1341 	}
1342 	/* Formula not listed but extrapulated to add the cy ?? */
1343 	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1344 	ret = printf("%1.3f", res);
1345 	return(ret);
1346 }
1347 
1348 static int
1349 microassist(struct counters *cpu, int pos)
1350 {
1351 	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1352 	int ret;
1353 	struct counters *idq;
1354 	struct counters *unhalt;
1355 	double un, id, res, con;
1356 
1357 	con = 4.0;
1358 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1359 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1360 	if (pos != -1) {
1361 		id = idq->vals[pos] * 1.0;
1362 		un = unhalt->vals[pos] * 1.0;
1363 	} else {
1364 		id = idq->sum * 1.0;
1365 		un = unhalt->sum * 1.0;
1366 	}
1367 	res = id/(un * con);
1368 	ret = printf("%1.3f", res);
1369 	return(ret);
1370 }
1371 
1372 static int
1373 microassist_broad(struct counters *cpu, int pos)
1374 {
1375 	int ret;
1376 	struct counters *idq;
1377 	struct counters *unhalt;
1378 	struct counters *uopiss;
1379 	struct counters *uopret;
1380 	double un, id, res, con, uoi, uor;
1381 
1382 	con = 4.0;
1383 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1384 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1385 	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1386 	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1387 	if (pos != -1) {
1388 		id = idq->vals[pos] * 1.0;
1389 		un = unhalt->vals[pos] * 1.0;
1390 		uoi = uopiss->vals[pos] * 1.0;
1391 		uor = uopret->vals[pos] * 1.0;
1392 	} else {
1393 		id = idq->sum * 1.0;
1394 		un = unhalt->sum * 1.0;
1395 		uoi = uopiss->sum * 1.0;
1396 		uor = uopret->sum * 1.0;
1397 	}
1398 	res = (uor/uoi) * (id/(un * con));
1399 	ret = printf("%1.3f", res);
1400 	return(ret);
1401 }
1402 
1403 static int
1404 aliasing(struct counters *cpu, int pos)
1405 {
1406 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1407 	int ret;
1408 	struct counters *ld;
1409 	struct counters *unhalt;
1410 	double un, lds, con, res;
1411 
1412 	con = 5.0;
1413 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1414 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1415 	if (pos != -1) {
1416 		lds = ld->vals[pos] * 1.0;
1417 		un = unhalt->vals[pos] * 1.0;
1418 	} else {
1419 		lds = ld->sum * 1.0;
1420 		un = unhalt->sum * 1.0;
1421 	}
1422 	res = (lds * con)/un;
1423 	ret = printf("%1.3f", res);
1424 	return(ret);
1425 }
1426 
1427 static int
1428 aliasing_broad(struct counters *cpu, int pos)
1429 {
1430 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1431 	int ret;
1432 	struct counters *ld;
1433 	struct counters *unhalt;
1434 	double un, lds, con, res;
1435 
1436 	con = 7.0;
1437 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1438 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1439 	if (pos != -1) {
1440 		lds = ld->vals[pos] * 1.0;
1441 		un = unhalt->vals[pos] * 1.0;
1442 	} else {
1443 		lds = ld->sum * 1.0;
1444 		un = unhalt->sum * 1.0;
1445 	}
1446 	res = (lds * con)/un;
1447 	ret = printf("%1.3f", res);
1448 	return(ret);
1449 }
1450 
1451 
1452 static int
1453 fpassists(struct counters *cpu, int pos)
1454 {
1455 	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1456 	int ret;
1457 	struct counters *fp;
1458 	struct counters *inst;
1459 	double un, fpd, res;
1460 
1461 	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1462 	fp = find_counter(cpu, "FP_ASSIST.ANY");
1463 	if (pos != -1) {
1464 		fpd = fp->vals[pos] * 1.0;
1465 		un = inst->vals[pos] * 1.0;
1466 	} else {
1467 		fpd = fp->sum * 1.0;
1468 		un = inst->sum * 1.0;
1469 	}
1470 	res = fpd/un;
1471 	ret = printf("%1.3f", res);
1472 	return(ret);
1473 }
1474 
1475 static int
1476 otherassistavx(struct counters *cpu, int pos)
1477 {
1478 	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1479 	int ret;
1480 	struct counters *oth;
1481 	struct counters *unhalt;
1482 	double un, ot, con, res;
1483 
1484 	con = 75.0;
1485 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1486 	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1487 	if (pos != -1) {
1488 		ot = oth->vals[pos] * 1.0;
1489 		un = unhalt->vals[pos] * 1.0;
1490 	} else {
1491 		ot = oth->sum * 1.0;
1492 		un = unhalt->sum * 1.0;
1493 	}
1494 	res = (ot * con)/un;
1495 	ret = printf("%1.3f", res);
1496 	return(ret);
1497 }
1498 
1499 static int
1500 otherassistsse(struct counters *cpu, int pos)
1501 {
1502 
1503 	int ret;
1504 	struct counters *oth;
1505 	struct counters *unhalt;
1506 	double un, ot, con, res;
1507 
1508 	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1509 	con = 75.0;
1510 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1511 	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1512 	if (pos != -1) {
1513 		ot = oth->vals[pos] * 1.0;
1514 		un = unhalt->vals[pos] * 1.0;
1515 	} else {
1516 		ot = oth->sum * 1.0;
1517 		un = unhalt->sum * 1.0;
1518 	}
1519 	res = (ot * con)/un;
1520 	ret = printf("%1.3f", res);
1521 	return(ret);
1522 }
1523 
1524 static int
1525 efficiency1(struct counters *cpu, int pos)
1526 {
1527 
1528 	int ret;
1529 	struct counters *uops;
1530 	struct counters *unhalt;
1531 	double un, ot, con, res;
1532 
1533         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1534 	con = 4.0;
1535 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1536 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1537 	if (pos != -1) {
1538 		ot = uops->vals[pos] * 1.0;
1539 		un = unhalt->vals[pos] * 1.0;
1540 	} else {
1541 		ot = uops->sum * 1.0;
1542 		un = unhalt->sum * 1.0;
1543 	}
1544 	res = ot/(con * un);
1545 	ret = printf("%1.3f", res);
1546 	return(ret);
1547 }
1548 
1549 static int
1550 efficiency2(struct counters *cpu, int pos)
1551 {
1552 
1553 	int ret;
1554 	struct counters *uops;
1555 	struct counters *unhalt;
1556 	double un, ot, res;
1557 
1558         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1559 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1560 	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1561 	if (pos != -1) {
1562 		ot = uops->vals[pos] * 1.0;
1563 		un = unhalt->vals[pos] * 1.0;
1564 	} else {
1565 		ot = uops->sum * 1.0;
1566 		un = unhalt->sum * 1.0;
1567 	}
1568 	res = un/ot;
1569 	ret = printf("%1.3f", res);
1570 	return(ret);
1571 }
1572 
1573 #define SANDY_BRIDGE_COUNT 20
1574 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1575 /*01*/	{ "allocstall1", "thresh > .05",
1576 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1577 	  allocstall1 },
1578 /*02*/	{ "allocstall2", "thresh > .05",
1579 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
1580 	  allocstall2 },
1581 /*03*/	{ "br_miss", "thresh >= .2",
1582 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1583 	  br_mispredict },
1584 /*04*/	{ "splitload", "thresh >= .1",
1585 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1586 	  splitload },
1587 /*05*/	{ "splitstore", "thresh >= .01",
1588 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1589 	  splitstore },
1590 /*06*/	{ "contested", "thresh >= .05",
1591 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1592 	  contested },
1593 /*07*/	{ "blockstorefwd", "thresh >= .05",
1594 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1595 	  blockstoreforward },
1596 /*08*/	{ "cache2", "thresh >= .2",
1597 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1598 	  cache2 },
1599 /*09*/	{ "cache1", "thresh >= .2",
1600 	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1601 	  cache1 },
1602 /*10*/	{ "dtlbmissload", "thresh >= .1",
1603 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1604 	  dtlb_missload },
1605 /*11*/	{ "dtlbmissstore", "thresh >= .05",
1606 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1607 	  dtlb_missstore },
1608 /*12*/	{ "frontendstall", "thresh >= .15",
1609 	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1610 	  frontendstall },
1611 /*13*/	{ "clears", "thresh >= .02",
1612 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1613 	  clears },
1614 /*14*/	{ "microassist", "thresh >= .05",
1615 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1616 	  microassist },
1617 /*15*/	{ "aliasing_4k", "thresh >= .1",
1618 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1619 	  aliasing },
1620 /*16*/	{ "fpassist", "look for a excessive value",
1621 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1622 	  fpassists },
1623 /*17*/	{ "otherassistavx", "look for a excessive value",
1624 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1625 	  otherassistavx },
1626 /*18*/	{ "otherassistsse", "look for a excessive value",
1627 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1628 	  otherassistsse },
1629 /*19*/	{ "eff1", "thresh < .9",
1630 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1631 	  efficiency1 },
1632 /*20*/	{ "eff2", "thresh > 1.0",
1633 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1634 	  efficiency2 },
1635 };
1636 
1637 
1638 #define IVY_BRIDGE_COUNT 21
1639 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1640 /*1*/	{ "eff1", "thresh < .75",
1641 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1642 	  efficiency1 },
1643 /*2*/	{ "eff2", "thresh > 1.0",
1644 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1645 	  efficiency2 },
1646 /*3*/	{ "itlbmiss", "thresh > .05",
1647 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1648 	  itlb_miss },
1649 /*4*/	{ "icachemiss", "thresh > .05",
1650 	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1651 	  icache_miss },
1652 /*5*/	{ "lcpstall", "thresh > .05",
1653 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1654 	  lcp_stall },
1655 /*6*/	{ "cache1", "thresh >= .2",
1656 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1657 	  cache1ib },
1658 /*7*/	{ "cache2", "thresh >= .2",
1659 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1660 	  cache2ib },
1661 /*8*/	{ "contested", "thresh >= .05",
1662 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1663 	  contested },
1664 /*9*/	{ "datashare", "thresh >= .05",
1665 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1666 	  datasharing },
1667 /*10*/	{ "blockstorefwd", "thresh >= .05",
1668 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1669 	  blockstoreforward },
1670 /*11*/	{ "splitload", "thresh >= .1",
1671 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1672 	  splitloadib },
1673 /*12*/	{ "splitstore", "thresh >= .01",
1674 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1675 	  splitstore },
1676 /*13*/	{ "aliasing_4k", "thresh >= .1",
1677 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1678 	  aliasing },
1679 /*14*/	{ "dtlbmissload", "thresh >= .1",
1680 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1681 	  dtlb_missload },
1682 /*15*/	{ "dtlbmissstore", "thresh >= .05",
1683 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1684 	  dtlb_missstore },
1685 /*16*/	{ "br_miss", "thresh >= .2",
1686 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1687 	  br_mispredictib },
1688 /*17*/	{ "clears", "thresh >= .02",
1689 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1690 	  clears },
1691 /*18*/	{ "microassist", "thresh >= .05",
1692 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1693 	  microassist },
1694 /*19*/	{ "fpassist", "look for a excessive value",
1695 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1696 	  fpassists },
1697 /*20*/	{ "otherassistavx", "look for a excessive value",
1698 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1699 	  otherassistavx },
1700 /*21*/	{ "otherassistsse", "look for a excessive value",
1701 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1702 	  otherassistsse },
1703 };
1704 
1705 #define HASWELL_COUNT 20
1706 static struct cpu_entry haswell[HASWELL_COUNT] = {
1707 /*1*/	{ "eff1", "thresh < .75",
1708 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709 	  efficiency1 },
1710 /*2*/	{ "eff2", "thresh > 1.0",
1711 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712 	  efficiency2 },
1713 /*3*/	{ "itlbmiss", "thresh > .05",
1714 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715 	  itlb_miss },
1716 /*4*/	{ "icachemiss", "thresh > .05",
1717 	  "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718 	  icache_miss_has },
1719 /*5*/	{ "lcpstall", "thresh > .05",
1720 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721 	  lcp_stall },
1722 /*6*/	{ "cache1", "thresh >= .2",
1723 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724 	  cache1ib },
1725 /*7*/	{ "cache2", "thresh >= .2",
1726 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727 	  cache2has },
1728 /*8*/	{ "contested", "thresh >= .05",
1729 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730 	  contested_has },
1731 /*9*/	{ "datashare", "thresh >= .05",
1732 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733 	  datasharing_has },
1734 /*10*/	{ "blockstorefwd", "thresh >= .05",
1735 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1736 	  blockstoreforward },
1737 /*11*/	{ "splitload", "thresh >= .1",
1738 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1739 	  splitload },
1740 /*12*/	{ "splitstore", "thresh >= .01",
1741 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1742 	  splitstore },
1743 /*13*/	{ "aliasing_4k", "thresh >= .1",
1744 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745 	  aliasing },
1746 /*14*/	{ "dtlbmissload", "thresh >= .1",
1747 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748 	  dtlb_missload },
1749 /*15*/	{ "br_miss", "thresh >= .2",
1750 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1751 	  br_mispredict },
1752 /*16*/	{ "clears", "thresh >= .02",
1753 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1754 	  clears },
1755 /*17*/	{ "microassist", "thresh >= .05",
1756 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757 	  microassist },
1758 /*18*/	{ "fpassist", "look for a excessive value",
1759 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1760 	  fpassists },
1761 /*19*/	{ "otherassistavx", "look for a excessive value",
1762 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1763 	  otherassistavx },
1764 /*20*/	{ "otherassistsse", "look for a excessive value",
1765 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766 	  otherassistsse },
1767 };
1768 
1769 
1770 static void
1771 explain_name_broad(const char *name)
1772 {
1773 	const char *mythresh;
1774 	if (strcmp(name, "eff1") == 0) {
1775 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1776 		mythresh = "thresh < .75";
1777 	} else if (strcmp(name, "eff2") == 0) {
1778 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1779 		mythresh = "thresh > 1.0";
1780 	} else if (strcmp(name, "itlbmiss") == 0) {
1781 		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1782 		mythresh = "thresh > .05";
1783 	} else if (strcmp(name, "icachemiss") == 0) {
1784 		printf("Examine ( 36.0 * CACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1785 		mythresh = "thresh > .05";
1786 	} else if (strcmp(name, "lcpstall") == 0) {
1787 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1788 		mythresh = "thresh > .05";
1789 	} else if (strcmp(name, "cache1") == 0) {
1790 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1791 		mythresh = "thresh >= .1";
1792 	} else if (strcmp(name, "cache2") == 0) {
1793 		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1794 		mythresh = "thresh >= .2";
1795 	} else if (strcmp(name, "contested") == 0) {
1796 		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1797 		mythresh = "thresh >= .05";
1798 	} else if (strcmp(name, "datashare") == 0) {
1799 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1800 		mythresh = "thresh > .05";
1801 	} else if (strcmp(name, "blockstorefwd") == 0) {
1802 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1803 		mythresh = "thresh >= .05";
1804 	} else if (strcmp(name, "aliasing_4k") == 0) {
1805 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1806 		mythresh = "thresh >= .1";
1807 	} else if (strcmp(name, "dtlbmissload") == 0) {
1808 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1809 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1810 		mythresh = "thresh >= .1";
1811 
1812 	} else if (strcmp(name, "br_miss") == 0) {
1813 		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1814 		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1815 		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1816 		mythresh = "thresh >= .2";
1817 	} else if (strcmp(name, "clears") == 0) {
1818 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1819 		printf("          MACHINE_CLEARS.SMC + \n");
1820 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1821 		mythresh = "thresh >= .02";
1822 	} else if (strcmp(name, "fpassist") == 0) {
1823 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1824 		mythresh = "look for a excessive value";
1825 	} else if (strcmp(name, "otherassistavx") == 0) {
1826 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1827 		mythresh = "look for a excessive value";
1828 	} else if (strcmp(name, "microassist") == 0) {
1829 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1830 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1831 		mythresh = "thresh >= .05";
1832 	} else {
1833 		printf("Unknown name:%s\n", name);
1834 		mythresh = "unknown entry";
1835         }
1836 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1837 }
1838 
1839 
1840 #define BROADWELL_COUNT 17
1841 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1842 /*1*/	{ "eff1", "thresh < .75",
1843 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1844 	  efficiency1 },
1845 /*2*/	{ "eff2", "thresh > 1.0",
1846 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1847 	  efficiency2 },
1848 /*3*/	{ "itlbmiss", "thresh > .05",
1849 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1850 	  itlb_miss_broad },
1851 /*4*/	{ "icachemiss", "thresh > .05",
1852 	  "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1853 	  icache_miss_has },
1854 /*5*/	{ "lcpstall", "thresh > .05",
1855 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1856 	  lcp_stall },
1857 /*6*/	{ "cache1", "thresh >= .1",
1858 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1859 	  cache1broad },
1860 /*7*/	{ "cache2", "thresh >= .2",
1861 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1862 	  cache2broad },
1863 /*8*/	{ "contested", "thresh >= .05",
1864 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1865 	  contestedbroad },
1866 /*9*/	{ "datashare", "thresh >= .05",
1867 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1868 	  datasharing_has },
1869 /*10*/	{ "blockstorefwd", "thresh >= .05",
1870 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1871 	  blockstoreforward },
1872 /*11*/	{ "aliasing_4k", "thresh >= .1",
1873 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1874 	  aliasing_broad },
1875 /*12*/	{ "dtlbmissload", "thresh >= .1",
1876 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1877 	  dtlb_missload },
1878 /*13*/	{ "br_miss", "thresh >= .2",
1879 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1880 	  br_mispredict_broad },
1881 /*14*/	{ "clears", "thresh >= .02",
1882 	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1883 	  clears_broad },
1884 /*15*/	{ "fpassist", "look for a excessive value",
1885 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1886 	  fpassists },
1887 /*16*/	{ "otherassistavx", "look for a excessive value",
1888 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1889 	  otherassistavx },
1890 /*17*/	{ "microassist", "thresh >= .2",
1891 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1892 	  microassist_broad },
1893 };
1894 
1895 static void
1896 set_sandybridge(void)
1897 {
1898 	strcpy(the_cpu.cputype, "SandyBridge PMC");
1899 	the_cpu.number = SANDY_BRIDGE_COUNT;
1900 	the_cpu.ents = sandy_bridge;
1901 	the_cpu.explain = explain_name_sb;
1902 }
1903 
1904 static void
1905 set_ivybridge(void)
1906 {
1907 	strcpy(the_cpu.cputype, "IvyBridge PMC");
1908 	the_cpu.number = IVY_BRIDGE_COUNT;
1909 	the_cpu.ents = ivy_bridge;
1910 	the_cpu.explain = explain_name_ib;
1911 }
1912 
1913 
1914 static void
1915 set_haswell(void)
1916 {
1917 	strcpy(the_cpu.cputype, "HASWELL PMC");
1918 	the_cpu.number = HASWELL_COUNT;
1919 	the_cpu.ents = haswell;
1920 	the_cpu.explain = explain_name_has;
1921 }
1922 
1923 static void
1924 set_broadwell(void)
1925 {
1926 	strcpy(the_cpu.cputype, "HASWELL PMC");
1927 	the_cpu.number = BROADWELL_COUNT;
1928 	the_cpu.ents = broadwell;
1929 	the_cpu.explain = explain_name_broad;
1930 }
1931 
1932 static void
1933 set_expression(char *name)
1934 {
1935 	int found = 0, i;
1936 	for(i=0 ; i< the_cpu.number; i++) {
1937 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1938 			found = 1;
1939 			expression = the_cpu.ents[i].func;
1940 			command = the_cpu.ents[i].command;
1941 			threshold = the_cpu.ents[i].thresh;
1942 			break;
1943 		}
1944 	}
1945 	if (!found) {
1946 		printf("For CPU type %s we have no expression:%s\n",
1947 		       the_cpu.cputype, name);
1948 		exit(-1);
1949 	}
1950 }
1951 
1952 static int
1953 validate_expression(char *name)
1954 {
1955 	int i, found;
1956 
1957 	found = 0;
1958 	for(i=0 ; i< the_cpu.number; i++) {
1959 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1960 			found = 1;
1961 			break;
1962 		}
1963 	}
1964 	if (!found) {
1965 		return(-1);
1966 	}
1967 	return (0);
1968 }
1969 
1970 static void
1971 do_expression(struct counters *cpu, int pos)
1972 {
1973 	if (expression == NULL)
1974 		return;
1975 	(*expression)(cpu, pos);
1976 }
1977 
1978 static void
1979 process_header(int idx, char *p)
1980 {
1981 	struct counters *up;
1982 	int i, len, nlen;
1983 	/*
1984 	 * Given header element idx, at p in
1985 	 * form 's/NN/nameof'
1986 	 * process the entry to pull out the name and
1987 	 * the CPU number.
1988 	 */
1989 	if (strncmp(p, "s/", 2)) {
1990 		printf("Check -- invalid header no s/ in %s\n",
1991 		       p);
1992 		return;
1993 	}
1994 	up = &cnts[idx];
1995 	up->cpu = strtol(&p[2], NULL, 10);
1996 	len = strlen(p);
1997 	for (i=2; i<len; i++) {
1998 		if (p[i] == '/') {
1999 			nlen = strlen(&p[(i+1)]);
2000 			if (nlen < (MAX_NLEN-1)) {
2001 				strcpy(up->counter_name, &p[(i+1)]);
2002 			} else {
2003 				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2004 			}
2005 		}
2006 	}
2007 }
2008 
2009 static void
2010 build_counters_from_header(FILE *io)
2011 {
2012 	char buffer[8192], *p;
2013 	int i, len, cnt;
2014 	size_t mlen;
2015 
2016 	/* We have a new start, lets
2017 	 * setup our headers and cpus.
2018 	 */
2019 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2020 		printf("First line can't be read from file err:%d\n", errno);
2021 		return;
2022 	}
2023 	/*
2024 	 * Ok output is an array of counters. Once
2025 	 * we start to read the values in we must
2026 	 * put them in there slot to match there CPU and
2027 	 * counter being updated. We create a mass array
2028 	 * of the counters, filling in the CPU and
2029 	 * counter name.
2030 	 */
2031 	/* How many do we get? */
2032 	len = strlen(buffer);
2033 	for (i=0, cnt=0; i<len; i++) {
2034 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2035 			cnt++;
2036 			for(;i<len;i++) {
2037 				if (buffer[i] == ' ')
2038 					break;
2039 			}
2040 		}
2041 	}
2042 	mlen = sizeof(struct counters) * cnt;
2043 	cnts = malloc(mlen);
2044 	ncnts = cnt;
2045 	if (cnts == NULL) {
2046 		printf("No memory err:%d\n", errno);
2047 		return;
2048 	}
2049 	memset(cnts, 0, mlen);
2050 	for (i=0, cnt=0; i<len; i++) {
2051 		if (strncmp(&buffer[i], "s/", 2) == 0) {
2052 			p = &buffer[i];
2053 			for(;i<len;i++) {
2054 				if (buffer[i] == ' ') {
2055 					buffer[i] = 0;
2056 					break;
2057 				}
2058 			}
2059 			process_header(cnt, p);
2060 			cnt++;
2061 		}
2062 	}
2063 	if (verbose)
2064 		printf("We have %d entries\n", cnt);
2065 }
2066 extern int max_to_collect;
2067 int max_to_collect = MAX_COUNTER_SLOTS;
2068 
2069 static int
2070 read_a_line(FILE *io)
2071 {
2072 	char buffer[8192], *p, *stop;
2073 	int pos, i;
2074 
2075 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2076 		return(0);
2077 	}
2078 	p = buffer;
2079 	for (i=0; i<ncnts; i++) {
2080 		pos = cnts[i].pos;
2081 		cnts[i].vals[pos] = strtol(p, &stop, 0);
2082 		cnts[i].pos++;
2083 		cnts[i].sum += cnts[i].vals[pos];
2084 		p = stop;
2085 	}
2086 	return (1);
2087 }
2088 
2089 extern int cpu_count_out;
2090 int cpu_count_out=0;
2091 
2092 static void
2093 print_header(void)
2094 {
2095 	int i, cnt, printed_cnt;
2096 
2097 	printf("*********************************\n");
2098 	for(i=0, cnt=0; i<MAX_CPU; i++) {
2099 		if (glob_cpu[i]) {
2100 			cnt++;
2101 		}
2102 	}
2103 	cpu_count_out = cnt;
2104 	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2105 		if (glob_cpu[i]) {
2106 			printf("CPU%d", i);
2107 			printed_cnt++;
2108 		}
2109 		if (printed_cnt == cnt) {
2110 			printf("\n");
2111 			break;
2112 		} else {
2113 			printf("\t");
2114 		}
2115 	}
2116 }
2117 
2118 static void
2119 lace_cpus_together(void)
2120 {
2121 	int i, j, lace_cpu;
2122 	struct counters *cpat, *at;
2123 
2124 	for(i=0; i<ncnts; i++) {
2125 		cpat = &cnts[i];
2126 		if (cpat->next_cpu) {
2127 			/* Already laced in */
2128 			continue;
2129 		}
2130 		lace_cpu = cpat->cpu;
2131 		if (lace_cpu >= MAX_CPU) {
2132 			printf("CPU %d to big\n", lace_cpu);
2133 			continue;
2134 		}
2135 		if (glob_cpu[lace_cpu] == NULL) {
2136 			glob_cpu[lace_cpu] = cpat;
2137 		} else {
2138 			/* Already processed this cpu */
2139 			continue;
2140 		}
2141 		/* Ok look forward for cpu->cpu and link in */
2142 		for(j=(i+1); j<ncnts; j++) {
2143 			at = &cnts[j];
2144 			if (at->next_cpu) {
2145 				continue;
2146 			}
2147 			if (at->cpu == lace_cpu) {
2148 				/* Found one */
2149 				cpat->next_cpu = at;
2150 				cpat = at;
2151 			}
2152 		}
2153 	}
2154 }
2155 
2156 
2157 static void
2158 process_file(char *filename)
2159 {
2160 	FILE *io;
2161 	int i;
2162 	int line_at, not_done;
2163 	pid_t pid_of_command=0;
2164 
2165 	if (filename ==  NULL) {
2166 		io = my_popen(command, "r", &pid_of_command);
2167 		if (io == NULL) {
2168 			printf("Can't popen the command %s\n", command);
2169 			return;
2170 		}
2171 	} else {
2172 		io = fopen(filename, "r");
2173 		if (io == NULL) {
2174 			printf("Can't process file %s err:%d\n",
2175 			       filename, errno);
2176 			return;
2177 		}
2178 	}
2179 	build_counters_from_header(io);
2180 	if (cnts == NULL) {
2181 		/* Nothing we can do */
2182 		printf("Nothing to do -- no counters built\n");
2183 		if (filename) {
2184 			fclose(io);
2185 		} else {
2186 			my_pclose(io, pid_of_command);
2187 		}
2188 		return;
2189 	}
2190 	lace_cpus_together();
2191 	print_header();
2192 	if (verbose) {
2193 		for (i=0; i<ncnts; i++) {
2194 			printf("Counter:%s cpu:%d index:%d\n",
2195 			       cnts[i].counter_name,
2196 			       cnts[i].cpu, i);
2197 		}
2198 	}
2199 	line_at = 0;
2200 	not_done = 1;
2201 	while(not_done) {
2202 		if (read_a_line(io)) {
2203 			line_at++;
2204 		} else {
2205 			break;
2206 		}
2207 		if (line_at >= max_to_collect) {
2208 			not_done = 0;
2209 		}
2210 		if (filename == NULL) {
2211 			int cnt;
2212 			/* For the ones we dynamically open we print now */
2213 			for(i=0, cnt=0; i<MAX_CPU; i++) {
2214 				do_expression(glob_cpu[i], (line_at-1));
2215 				cnt++;
2216 				if (cnt == cpu_count_out) {
2217 					printf("\n");
2218 					break;
2219 				} else {
2220 					printf("\t");
2221 				}
2222 			}
2223 		}
2224 	}
2225 	if (filename) {
2226 		fclose(io);
2227 	} else {
2228 		my_pclose(io, pid_of_command);
2229 	}
2230 }
2231 #if defined(__amd64__)
2232 #define cpuid(in,a,b,c,d)\
2233   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2234 #else
2235 #define cpuid(in, a, b, c, d)
2236 #endif
2237 
2238 static void
2239 get_cpuid_set(void)
2240 {
2241 	unsigned long eax, ebx, ecx, edx;
2242 	int model;
2243 	pid_t pid_of_command=0;
2244 	size_t sz, len;
2245 	FILE *io;
2246 	char linebuf[1024], *str;
2247 
2248 	eax = ebx = ecx = edx = 0;
2249 
2250 	cpuid(0, eax, ebx, ecx, edx);
2251 	if (ebx == 0x68747541) {
2252 		printf("AMD processors are not supported by this program\n");
2253 		printf("Sorry\n");
2254 		exit(0);
2255 	} else if (ebx == 0x6972794) {
2256 		printf("Cyrix processors are not supported by this program\n");
2257 		printf("Sorry\n");
2258 		exit(0);
2259 	} else if (ebx == 0x756e6547) {
2260 		printf("Genuine Intel\n");
2261 	} else {
2262 		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2263 		exit(0);
2264 	}
2265 	cpuid(1, eax, ebx, ecx, edx);
2266 	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2267 	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2268 	switch (eax & 0xF00) {
2269 	case 0x500:		/* Pentium family processors */
2270 		printf("Intel Pentium P5\n");
2271 		goto not_supported;
2272 		break;
2273 	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2274 		switch (model) {
2275 		case 0x1:
2276 			printf("Intel Pentium P6\n");
2277 			goto not_supported;
2278 			break;
2279 		case 0x3:
2280 		case 0x5:
2281 			printf("Intel PII\n");
2282 			goto not_supported;
2283 			break;
2284 		case 0x6: case 0x16:
2285 			printf("Intel CL\n");
2286 			goto not_supported;
2287 			break;
2288 		case 0x7: case 0x8: case 0xA: case 0xB:
2289 			printf("Intel PIII\n");
2290 			goto not_supported;
2291 			break;
2292 		case 0x9: case 0xD:
2293 			printf("Intel PM\n");
2294 			goto not_supported;
2295 			break;
2296 		case 0xE:
2297 			printf("Intel CORE\n");
2298 			goto not_supported;
2299 			break;
2300 		case 0xF:
2301 			printf("Intel CORE2\n");
2302 			goto not_supported;
2303 			break;
2304 		case 0x17:
2305 			printf("Intel CORE2EXTREME\n");
2306 			goto not_supported;
2307 			break;
2308 		case 0x1C:	/* Per Intel document 320047-002. */
2309 			printf("Intel ATOM\n");
2310 			goto not_supported;
2311 			break;
2312 		case 0x1A:
2313 		case 0x1E:	/*
2314 				 * Per Intel document 253669-032 9/2009,
2315 				 * pages A-2 and A-57
2316 				 */
2317 		case 0x1F:	/*
2318 				 * Per Intel document 253669-032 9/2009,
2319 				 * pages A-2 and A-57
2320 				 */
2321 			printf("Intel COREI7\n");
2322 			goto not_supported;
2323 			break;
2324 		case 0x2E:
2325 			printf("Intel NEHALEM\n");
2326 			goto not_supported;
2327 			break;
2328 		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2329 		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2330 			printf("Intel WESTMERE\n");
2331 			goto not_supported;
2332 			break;
2333 		case 0x2F:	/* Westmere-EX, seen in wild */
2334 			printf("Intel WESTMERE\n");
2335 			goto not_supported;
2336 			break;
2337 		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2338 			printf("Intel SANDYBRIDGE\n");
2339 			set_sandybridge();
2340 			break;
2341 		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2342 			printf("Intel SANDYBRIDGE_XEON\n");
2343 			set_sandybridge();
2344 			break;
2345 		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2346 			printf("Intel IVYBRIDGE\n");
2347 			set_ivybridge();
2348 			break;
2349 		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2350 			printf("Intel IVYBRIDGE_XEON\n");
2351 			set_ivybridge();
2352 			break;
2353 		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2354 			printf("Intel HASWELL (Xeon)\n");
2355 			set_haswell();
2356 			break;
2357 		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2358 		case 0x45:
2359 		case 0x46:
2360 			printf("Intel HASWELL\n");
2361 			set_haswell();
2362 			break;
2363 
2364 		case 0x4e:
2365 		case 0x5e:
2366 			printf("Intel SKY-LAKE\n");
2367 			goto not_supported;
2368 			break;
2369 		case 0x3D:
2370 		case 0x47:
2371 			printf("Intel BROADWELL\n");
2372 			set_broadwell();
2373 			break;
2374 		case 0x4f:
2375 		case 0x56:
2376 			printf("Intel BROADWEL (Xeon)L\n");
2377 			set_broadwell();
2378 			break;
2379 
2380 		case 0x4D:
2381 			/* Per Intel document 330061-001 01/2014. */
2382 			printf("Intel ATOM_SILVERMONT\n");
2383 			goto not_supported;
2384 			break;
2385 		default:
2386 			printf("Intel model 0x%x is not known -- sorry\n",
2387 			       model);
2388 			goto not_supported;
2389 			break;
2390 		}
2391 		break;
2392 	case 0xF00:		/* P4 */
2393 		printf("Intel unknown model %d\n", model);
2394 		goto not_supported;
2395 		break;
2396 	}
2397 	/* Ok lets load the list of all known PMC's */
2398 	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2399 	if (valid_pmcs == NULL) {
2400 		/* Likely */
2401 		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2402 		sz = sizeof(char *) * pmc_allocated_cnt;
2403 		valid_pmcs = malloc(sz);
2404 		if (valid_pmcs == NULL) {
2405 			printf("No memory allocation fails at startup?\n");
2406 			exit(-1);
2407 		}
2408 		memset(valid_pmcs, 0, sz);
2409 	}
2410 
2411 	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2412 		if (linebuf[0] != '\t') {
2413 			/* sometimes headers ;-) */
2414 			continue;
2415 		}
2416 		len = strlen(linebuf);
2417 		if (linebuf[(len-1)] == '\n') {
2418 			/* Likely */
2419 			linebuf[(len-1)] = 0;
2420 		}
2421 		str = &linebuf[1];
2422 		len = strlen(str) + 1;
2423 		valid_pmcs[valid_pmc_cnt] = malloc(len);
2424 		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2425 			printf("No memory2 allocation fails at startup?\n");
2426 			exit(-1);
2427 		}
2428 		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2429 		strcpy(valid_pmcs[valid_pmc_cnt], str);
2430 		valid_pmc_cnt++;
2431 		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2432 			/* Got to expand -- unlikely */
2433 			char **more;
2434 
2435 			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2436 			more = malloc(sz);
2437 			if (more == NULL) {
2438 				printf("No memory3 allocation fails at startup?\n");
2439 				exit(-1);
2440 			}
2441 			memset(more, 0, sz);
2442 			memcpy(more, valid_pmcs, sz);
2443 			pmc_allocated_cnt *= 2;
2444 			free(valid_pmcs);
2445 			valid_pmcs = more;
2446 		}
2447 	}
2448 	my_pclose(io, pid_of_command);
2449 	return;
2450 not_supported:
2451 	printf("Not supported\n");
2452 	exit(-1);
2453 }
2454 
2455 static void
2456 explain_all(void)
2457 {
2458 	int i;
2459 	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2460 	printf("-------------------------------------------------------------\n");
2461 	for(i=0; i<the_cpu.number; i++){
2462 		printf("For -e %s ", the_cpu.ents[i].name);
2463 		(*the_cpu.explain)(the_cpu.ents[i].name);
2464 		printf("----------------------------\n");
2465 	}
2466 }
2467 
2468 static void
2469 test_for_a_pmc(const char *pmc, int out_so_far)
2470 {
2471 	FILE *io;
2472 	pid_t pid_of_command=0;
2473 	char my_command[1024];
2474 	char line[1024];
2475 	char resp[1024];
2476 	int len, llen, i;
2477 
2478 	if (out_so_far < 50) {
2479 		len = 50 - out_so_far;
2480 		for(i=0; i<len; i++) {
2481 			printf(" ");
2482 		}
2483 	}
2484 	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2485 	io = my_popen(my_command, "r", &pid_of_command);
2486 	if (io == NULL) {
2487 		printf("Failed -- popen fails\n");
2488 		return;
2489 	}
2490 	/* Setup what we expect */
2491 	len = sprintf(resp, "%s", pmc);
2492 	if (fgets(line, sizeof(line), io) == NULL) {
2493 		printf("Failed -- no output from pmstat\n");
2494 		goto out;
2495 	}
2496 	llen = strlen(line);
2497 	if (line[(llen-1)] == '\n') {
2498 		line[(llen-1)] = 0;
2499 		llen--;
2500 	}
2501 	for(i=2; i<(llen-len); i++) {
2502 		if (strncmp(&line[i], "ERROR", 5) == 0) {
2503 			printf("Failed %s\n", line);
2504 			goto out;
2505 		} else if (strncmp(&line[i], resp, len) == 0) {
2506 			int j, k;
2507 
2508 			if (fgets(line, sizeof(line), io) == NULL) {
2509 				printf("Failed -- no second output from pmstat\n");
2510 				goto out;
2511 			}
2512 			len = strlen(line);
2513 			for (j=0; j<len; j++) {
2514 				if (line[j] == ' ') {
2515 					j++;
2516 				} else {
2517 					break;
2518 				}
2519 			}
2520 			printf("Pass");
2521 			len = strlen(&line[j]);
2522 			if (len < 20) {
2523 				for(k=0; k<(20-len); k++) {
2524 					printf(" ");
2525 				}
2526 			}
2527 			if (len) {
2528 				printf("%s", &line[j]);
2529 			} else {
2530 				printf("\n");
2531 			}
2532 			goto out;
2533 		}
2534 	}
2535 	printf("Failed -- '%s' not '%s'\n", line, resp);
2536 out:
2537 	my_pclose(io, pid_of_command);
2538 
2539 }
2540 
2541 static int
2542 add_it_to(char **vars, int cur_cnt, char *name)
2543 {
2544 	int i;
2545 	size_t len;
2546 	for(i=0; i<cur_cnt; i++) {
2547 		if (strcmp(vars[i], name) == 0) {
2548 			/* Already have */
2549 			return(0);
2550 		}
2551 	}
2552 	if (vars[cur_cnt] != NULL) {
2553 		printf("Cur_cnt:%d filled with %s??\n",
2554 		       cur_cnt, vars[cur_cnt]);
2555 		exit(-1);
2556 	}
2557 	/* Ok its new */
2558 	len = strlen(name) + 1;
2559 	vars[cur_cnt] = malloc(len);
2560 	if (vars[cur_cnt] == NULL) {
2561 		printf("No memory %s\n", __FUNCTION__);
2562 		exit(-1);
2563 	}
2564 	memset(vars[cur_cnt], 0, len);
2565 	strcpy(vars[cur_cnt], name);
2566 	return(1);
2567 }
2568 
2569 static char *
2570 build_command_for_exp(struct expression *exp)
2571 {
2572 	/*
2573 	 * Build the pmcstat command to handle
2574 	 * the passed in expression.
2575 	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2576 	 * where NNN and QQQ represent the PMC's in the expression
2577 	 * uniquely..
2578 	 */
2579 	char forming[1024];
2580 	int cnt_pmc, alloced_pmcs, i;
2581 	struct expression *at;
2582 	char **vars, *cmd;
2583 	size_t mal;
2584 
2585 	alloced_pmcs = cnt_pmc = 0;
2586 	/* first how many do we have */
2587 	at = exp;
2588 	while (at) {
2589 		if (at->type == TYPE_VALUE_PMC) {
2590 			cnt_pmc++;
2591 		}
2592 		at = at->next;
2593 	}
2594 	if (cnt_pmc == 0) {
2595 		printf("No PMC's in your expression -- nothing to do!!\n");
2596 		exit(0);
2597 	}
2598 	mal = cnt_pmc * sizeof(char *);
2599 	vars = malloc(mal);
2600 	if (vars == NULL) {
2601 		printf("No memory\n");
2602 		exit(-1);
2603 	}
2604 	memset(vars, 0, mal);
2605 	at = exp;
2606 	while (at) {
2607 		if (at->type == TYPE_VALUE_PMC) {
2608 			if(add_it_to(vars, alloced_pmcs, at->name)) {
2609 				alloced_pmcs++;
2610 			}
2611 		}
2612 		at = at->next;
2613 	}
2614 	/* Now we have a unique list in vars so create our command */
2615 	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2616 	for(i=0; i<alloced_pmcs; i++) {
2617 		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2618 	}
2619 	cmd = malloc((mal+2));
2620 	if (cmd == NULL) {
2621 		printf("%s out of mem\n", __FUNCTION__);
2622 		exit(-1);
2623 	}
2624 	memset(cmd, 0, (mal+2));
2625 	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2626 	at = exp;
2627 	for(i=0; i<alloced_pmcs; i++) {
2628 		sprintf(forming, " -s %s", vars[i]);
2629 		strcat(cmd, forming);
2630 		free(vars[i]);
2631 		vars[i] = NULL;
2632 	}
2633 	free(vars);
2634 	return(cmd);
2635 }
2636 
2637 static int
2638 user_expr(struct counters *cpu, int pos)
2639 {
2640 	int ret;
2641 	double res;
2642 	struct counters *var;
2643 	struct expression *at;
2644 
2645 	at = master_exp;
2646 	while (at) {
2647 		if (at->type == TYPE_VALUE_PMC) {
2648 			var = find_counter(cpu, at->name);
2649 			if (var == NULL) {
2650 				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2651 				exit(-1);
2652 			}
2653 			if (pos != -1) {
2654 				at->value = var->vals[pos] * 1.0;
2655 			} else {
2656 				at->value = var->sum * 1.0;
2657 			}
2658 		}
2659 		at = at->next;
2660 	}
2661 	res = run_expr(master_exp, 1, NULL);
2662 	ret = printf("%1.3f", res);
2663 	return(ret);
2664 }
2665 
2666 
2667 static void
2668 set_manual_exp(struct expression *exp)
2669 {
2670 	expression = user_expr;
2671 	command = build_command_for_exp(exp);
2672 	threshold = "User defined threshold";
2673 }
2674 
2675 static void
2676 run_tests(void)
2677 {
2678 	int i, lenout;
2679 	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2680 	printf("------------------------------------------------------------------------\n");
2681 	for(i=0; i<valid_pmc_cnt; i++) {
2682 		lenout = printf("%s", valid_pmcs[i]);
2683 		fflush(stdout);
2684 		test_for_a_pmc(valid_pmcs[i], lenout);
2685 	}
2686 }
2687 static void
2688 list_all(void)
2689 {
2690 	int i, cnt, j;
2691 	printf("PMC                                               Abbreviation\n");
2692 	printf("--------------------------------------------------------------\n");
2693 	for(i=0; i<valid_pmc_cnt; i++) {
2694 		cnt = printf("%s", valid_pmcs[i]);
2695 		for(j=cnt; j<52; j++) {
2696 			printf(" ");
2697 		}
2698 		printf("%%%d\n", i);
2699 	}
2700 }
2701 
2702 
2703 int
2704 main(int argc, char **argv)
2705 {
2706 	int i, j, cnt;
2707 	char *filename=NULL;
2708 	char *name=NULL;
2709 	int help_only = 0;
2710 	int test_mode = 0;
2711 
2712 	get_cpuid_set();
2713 	memset(glob_cpu, 0, sizeof(glob_cpu));
2714 	while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
2715 		switch (i) {
2716 		case 'L':
2717 			list_all();
2718 			return(0);
2719 		case 'H':
2720 			printf("**********************************\n");
2721 			explain_all();
2722 			printf("**********************************\n");
2723 			return(0);
2724 			break;
2725 		case 'T':
2726 			test_mode = 1;
2727 			break;
2728 		case 'E':
2729 			master_exp = parse_expression(optarg);
2730 			if (master_exp) {
2731 				set_manual_exp(master_exp);
2732 			}
2733 			break;
2734 		case 'e':
2735 			if (validate_expression(optarg)) {
2736 				printf("Unknown expression %s\n", optarg);
2737 				return(0);
2738 			}
2739 			name = optarg;
2740 			set_expression(optarg);
2741 			break;
2742 		case 'm':
2743 			max_to_collect = strtol(optarg, NULL, 0);
2744 			if (max_to_collect > MAX_COUNTER_SLOTS) {
2745 				/* You can't collect more than max in array */
2746 				max_to_collect = MAX_COUNTER_SLOTS;
2747 			}
2748 			break;
2749 		case 'v':
2750 			verbose++;
2751 			break;
2752 		case 'h':
2753 			help_only = 1;
2754 			break;
2755 		case 'i':
2756 			filename = optarg;
2757 			break;
2758 		case '?':
2759 		default:
2760 		use:
2761 			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2762 			       argv[0]);
2763 			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2764 			printf("-v -- verbose dump debug type things -- you don't want this\n");
2765 			printf("-m N -- maximum to collect is N measurments\n");
2766 			printf("-e expr-name -- Do expression expr-name\n");
2767 			printf("-E 'your expression' -- Do your expression\n");
2768 			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2769 			printf("-H -- Don't run anything, just explain all canned expressions\n");
2770 			printf("-T -- Test all PMC's defined by this processor\n");
2771 			return(0);
2772 			break;
2773 		};
2774 	}
2775 	if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
2776 		printf("Without setting an expression we cannot dynamically gather information\n");
2777 		printf("you must supply a filename (and you probably want verbosity)\n");
2778 		goto use;
2779 	}
2780 	if (test_mode) {
2781 		run_tests();
2782 		return(0);
2783 	}
2784 	printf("*********************************\n");
2785 	if (master_exp == NULL) {
2786 		(*the_cpu.explain)(name);
2787 	} else {
2788 		printf("Examine your expression ");
2789 		print_exp(master_exp);
2790 		printf("User defined threshold\n");
2791 	}
2792 	if (help_only) {
2793 		return(0);
2794 	}
2795 	process_file(filename);
2796 	if (verbose >= 2) {
2797 		for (i=0; i<ncnts; i++) {
2798 			printf("Counter:%s cpu:%d index:%d\n",
2799 			       cnts[i].counter_name,
2800 			       cnts[i].cpu, i);
2801 			for(j=0; j<cnts[i].pos; j++) {
2802 				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2803 			}
2804 			printf(" sum - %ld\n", (long int)cnts[i].sum);
2805 		}
2806 	}
2807 	if (expression == NULL) {
2808 		return(0);
2809 	}
2810 	for(i=0, cnt=0; i<MAX_CPU; i++) {
2811 		if (glob_cpu[i]) {
2812 			do_expression(glob_cpu[i], -1);
2813 			cnt++;
2814 			if (cnt == cpu_count_out) {
2815 				printf("\n");
2816 				break;
2817 			} else {
2818 				printf("\t");
2819 			}
2820 		}
2821 	}
2822 	return(0);
2823 }
2824