xref: /freebsd/usr.sbin/pmcstudy/pmcstudy.c (revision 4436b51dff5736e74da464946049ea6899a88938)
1 /*-
2  * Copyright (c) 2014, 2015 Netflix Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/errno.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <getopt.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
40 
41 #define MAX_COUNTER_SLOTS 1024
42 #define MAX_NLEN 64
43 #define MAX_CPU 64
44 static int verbose = 0;
45 
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
49 
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
54 int valid_pmc_cnt=0;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
57 
58 /*
59  * The following two varients on popen and pclose with
60  * the cavet that they get you the PID so that you
61  * can supply it to pclose so it can send a SIGTERM
62  *  to the process.
63  */
64 static FILE *
65 my_popen(const char *command, const char *dir, pid_t *p_pid)
66 {
67 	FILE *io_out, *io_in;
68 	int pdesin[2], pdesout[2];
69 	char *argv[4];
70 	pid_t pid;
71 	char cmd[4];
72 	char cmd2[1024];
73 	char arg1[4];
74 
75 	if ((strcmp(dir, "r") != 0) &&
76 	    (strcmp(dir, "w") != 0)) {
77 		errno = EINVAL;
78 		return(NULL);
79 	}
80 	if (pipe(pdesin) < 0)
81 		return (NULL);
82 
83 	if (pipe(pdesout) < 0) {
84 		(void)close(pdesin[0]);
85 		(void)close(pdesin[1]);
86 		return (NULL);
87 	}
88 	strcpy(cmd, "sh");
89 	strcpy(arg1, "-c");
90 	strcpy(cmd2, command);
91 	argv[0] = cmd;
92 	argv[1] = arg1;
93 	argv[2] = cmd2;
94 	argv[3] = NULL;
95 
96 	switch (pid = fork()) {
97 	case -1:			/* Error. */
98 		(void)close(pdesin[0]);
99 		(void)close(pdesin[1]);
100 		(void)close(pdesout[0]);
101 		(void)close(pdesout[1]);
102 		return (NULL);
103 		/* NOTREACHED */
104 	case 0:				/* Child. */
105 		/* Close out un-used sides */
106 		(void)close(pdesin[1]);
107 		(void)close(pdesout[0]);
108 		/* Now prepare the stdin of the process */
109 		close(0);
110 		(void)dup(pdesin[0]);
111 		(void)close(pdesin[0]);
112 		/* Now prepare the stdout of the process */
113 		close(1);
114 		(void)dup(pdesout[1]);
115 		/* And lets do stderr just in case */
116 		close(2);
117 		(void)dup(pdesout[1]);
118 		(void)close(pdesout[1]);
119 		/* Now run it */
120 		execve("/bin/sh", argv, environ);
121 		exit(127);
122 		/* NOTREACHED */
123 	}
124 	/* Parent; assume fdopen can't fail. */
125 	/* Store the pid */
126 	*p_pid = pid;
127 	if (strcmp(dir, "r") != 0) {
128 		io_out = fdopen(pdesin[1], "w");
129 		(void)close(pdesin[0]);
130 		(void)close(pdesout[0]);
131 		(void)close(pdesout[1]);
132 		return(io_out);
133  	} else {
134 		/* Prepare the input stream */
135 		io_in = fdopen(pdesout[0], "r");
136 		(void)close(pdesout[1]);
137 		(void)close(pdesin[0]);
138 		(void)close(pdesin[1]);
139 		return (io_in);
140 	}
141 }
142 
143 /*
144  * pclose --
145  *	Pclose returns -1 if stream is not associated with a `popened' command,
146  *	if already `pclosed', or waitpid returns an error.
147  */
148 static void
149 my_pclose(FILE *io, pid_t the_pid)
150 {
151 	int pstat;
152 	pid_t pid;
153 
154 	/*
155 	 * Find the appropriate file pointer and remove it from the list.
156 	 */
157 	(void)fclose(io);
158 	/* Die if you are not dead! */
159 	kill(the_pid, SIGTERM);
160 	do {
161 		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162 	} while (pid == -1 && errno == EINTR);
163 }
164 
165 struct counters {
166 	struct counters *next_cpu;
167 	char counter_name[MAX_NLEN];		/* Name of counter */
168 	int cpu;				/* CPU we are on */
169 	int pos;				/* Index we are filling to. */
170 	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
171 	uint64_t sum;				/* Summary of entries */
172 };
173 
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
176 
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
179 
180 extern int ncnts;
181 int ncnts=0;
182 
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
185 
186 static const char *threshold=NULL;
187 static const char *command;
188 
189 struct cpu_entry {
190 	const char *name;
191 	const char *thresh;
192 	const char *command;
193 	int (*func)(struct counters *, int);
194 };
195 
196 
197 struct cpu_type {
198 	char cputype[32];
199 	int number;
200 	struct cpu_entry *ents;
201 	void (*explain)(const char *name);
202 };
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
205 
206 static void
207 explain_name_sb(const char *name)
208 {
209 	const char *mythresh;
210 	if (strcmp(name, "allocstall1") == 0) {
211 		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212 		mythresh = "thresh > .05";
213 	} else if (strcmp(name, "allocstall2") == 0) {
214 		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215 		mythresh = "thresh > .05";
216 	} else if (strcmp(name, "br_miss") == 0) {
217 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218 		mythresh = "thresh >= .2";
219 	} else if (strcmp(name, "splitload") == 0) {
220 		printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221 		mythresh = "thresh >= .1";
222 	} else if (strcmp(name, "splitstore") == 0) {
223 		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
224 		mythresh = "thresh >= .01";
225 	} else if (strcmp(name, "contested") == 0) {
226 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227 		mythresh = "thresh >= .05";
228 	} else if (strcmp(name, "blockstorefwd") == 0) {
229 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230 		mythresh = "thresh >= .05";
231 	} else if (strcmp(name, "cache2") == 0) {
232 		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235 		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236 		mythresh = "thresh >= .2";
237 	} else if (strcmp(name, "cache1") == 0) {
238 		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239 		mythresh = "thresh >= .2";
240 	} else if (strcmp(name, "dtlbmissload") == 0) {
241 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243 		mythresh = "thresh >= .1";
244 	} else if (strcmp(name, "frontendstall") == 0) {
245 		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246 		mythresh = "thresh >= .15";
247 	} else if (strcmp(name, "clears") == 0) {
248 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249 		printf("          MACHINE_CLEARS.SMC + \n");
250 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251 		mythresh = "thresh >= .02";
252 	} else if (strcmp(name, "microassist") == 0) {
253 		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255 		mythresh = "thresh >= .05";
256 	} else if (strcmp(name, "aliasing_4k") == 0) {
257 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258 		mythresh = "thresh >= .1";
259 	} else if (strcmp(name, "fpassist") == 0) {
260 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261 		mythresh = "look for a excessive value";
262 	} else if (strcmp(name, "otherassistavx") == 0) {
263 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264 		mythresh = "look for a excessive value";
265 	} else if (strcmp(name, "otherassistsse") == 0) {
266 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 		mythresh = "look for a excessive value";
268 	} else if (strcmp(name, "eff1") == 0) {
269 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270 		mythresh = "thresh < .9";
271 	} else if (strcmp(name, "eff2") == 0) {
272 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273 		mythresh = "thresh > 1.0";
274 	} else if (strcmp(name, "dtlbmissstore") == 0) {
275 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277 		mythresh = "thresh >= .05";
278 	} else {
279 		printf("Unknown name:%s\n", name);
280 		mythresh = "unknown entry";
281         }
282 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283 }
284 
285 static void
286 explain_name_ib(const char *name)
287 {
288 	const char *mythresh;
289 	if (strcmp(name, "br_miss") == 0) {
290 		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291 		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292 		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293 		mythresh = "thresh >= .2";
294 	} else if (strcmp(name, "eff1") == 0) {
295 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296 		mythresh = "thresh < .9";
297 	} else if (strcmp(name, "eff2") == 0) {
298 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299 		mythresh = "thresh > 1.0";
300 	} else if (strcmp(name, "cache1") == 0) {
301 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302 		mythresh = "thresh >= .2";
303 	} else if (strcmp(name, "cache2") == 0) {
304 		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305 		mythresh = "thresh >= .2";
306 	} else if (strcmp(name, "itlbmiss") == 0) {
307 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308 		mythresh = "thresh > .05";
309 	} else if (strcmp(name, "icachemiss") == 0) {
310 		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311 		mythresh = "thresh > .05";
312 	} else if (strcmp(name, "lcpstall") == 0) {
313 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314 		mythresh = "thresh > .05";
315 	} else if (strcmp(name, "datashare") == 0) {
316 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317 		mythresh = "thresh > .05";
318 	} else if (strcmp(name, "blockstorefwd") == 0) {
319 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320 		mythresh = "thresh >= .05";
321 	} else if (strcmp(name, "splitload") == 0) {
322 		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323 		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324 		mythresh = "thresh >= .1";
325 	} else if (strcmp(name, "splitstore") == 0) {
326 		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
327 		mythresh = "thresh >= .01";
328 	} else if (strcmp(name, "aliasing_4k") == 0) {
329 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330 		mythresh = "thresh >= .1";
331 	} else if (strcmp(name, "dtlbmissload") == 0) {
332 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334 		mythresh = "thresh >= .1";
335 	} else if (strcmp(name, "dtlbmissstore") == 0) {
336 		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338 		mythresh = "thresh >= .05";
339 	} else if (strcmp(name, "contested") == 0) {
340 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341 		mythresh = "thresh >= .05";
342 	} else if (strcmp(name, "clears") == 0) {
343 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344 		printf("          MACHINE_CLEARS.SMC + \n");
345 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346 		mythresh = "thresh >= .02";
347 	} else if (strcmp(name, "microassist") == 0) {
348 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350 		mythresh = "thresh >= .05";
351 	} else if (strcmp(name, "fpassist") == 0) {
352 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353 		mythresh = "look for a excessive value";
354 	} else if (strcmp(name, "otherassistavx") == 0) {
355 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356 		mythresh = "look for a excessive value";
357 	} else if (strcmp(name, "otherassistsse") == 0) {
358 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 		mythresh = "look for a excessive value";
360 	} else {
361 		printf("Unknown name:%s\n", name);
362 		mythresh = "unknown entry";
363         }
364 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365 }
366 
367 
368 static void
369 explain_name_has(const char *name)
370 {
371 	const char *mythresh;
372 	if (strcmp(name, "eff1") == 0) {
373 		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374 		mythresh = "thresh < .75";
375 	} else if (strcmp(name, "eff2") == 0) {
376 		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377 		mythresh = "thresh > 1.0";
378 	} else if (strcmp(name, "itlbmiss") == 0) {
379 		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380 		mythresh = "thresh > .05";
381 	} else if (strcmp(name, "icachemiss") == 0) {
382 		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383 		mythresh = "thresh > .05";
384 	} else if (strcmp(name, "lcpstall") == 0) {
385 		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386 		mythresh = "thresh > .05";
387 	} else if (strcmp(name, "cache1") == 0) {
388 		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389 		mythresh = "thresh >= .2";
390 	} else if (strcmp(name, "cache2") == 0) {
391 		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393 		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394 		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395 		mythresh = "thresh >= .2";
396 	} else if (strcmp(name, "contested") == 0) {
397 		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398 		mythresh = "thresh >= .05";
399 	} else if (strcmp(name, "datashare") == 0) {
400 		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401 		mythresh = "thresh > .05";
402 	} else if (strcmp(name, "blockstorefwd") == 0) {
403 		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404 		mythresh = "thresh >= .05";
405 	} else if (strcmp(name, "splitload") == 0) {
406 		printf("Examine  (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407 		mythresh = "thresh >= .1";
408 	} else if (strcmp(name, "splitstore") == 0) {
409 		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
410 		mythresh = "thresh >= .01";
411 	} else if (strcmp(name, "aliasing_4k") == 0) {
412 		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413 		mythresh = "thresh >= .1";
414 	} else if (strcmp(name, "dtlbmissload") == 0) {
415 		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416 		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417 		mythresh = "thresh >= .1";
418 	} else if (strcmp(name, "br_miss") == 0) {
419 		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420 		mythresh = "thresh >= .2";
421 	} else if (strcmp(name, "clears") == 0) {
422 		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423 		printf("          MACHINE_CLEARS.SMC + \n");
424 		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425 		mythresh = "thresh >= .02";
426 	} else if (strcmp(name, "microassist") == 0) {
427 		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428 		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429 		mythresh = "thresh >= .05";
430 	} else if (strcmp(name, "fpassist") == 0) {
431 		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432 		mythresh = "look for a excessive value";
433 	} else if (strcmp(name, "otherassistavx") == 0) {
434 		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435 		mythresh = "look for a excessive value";
436 	} else if (strcmp(name, "otherassistsse") == 0) {
437 		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 		mythresh = "look for a excessive value";
439 	} else {
440 		printf("Unknown name:%s\n", name);
441 		mythresh = "unknown entry";
442         }
443 	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444 }
445 
446 
447 static struct counters *
448 find_counter(struct counters *base, const char *name)
449 {
450 	struct counters *at;
451 	int len;
452 
453 	at = base;
454 	len = strlen(name);
455 	while(at) {
456 		if (strncmp(at->counter_name, name, len) == 0) {
457 			return(at);
458 		}
459 		at = at->next_cpu;
460 	}
461 	printf("Can't find counter %s\n", name);
462 	printf("We have:\n");
463 	at = base;
464 	while(at) {
465 		printf("- %s\n", at->counter_name);
466 		at = at->next_cpu;
467 	}
468 	exit(-1);
469 }
470 
471 static int
472 allocstall1(struct counters *cpu, int pos)
473 {
474 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
475 	int ret;
476 	struct counters *partial;
477 	struct counters *unhalt;
478 	double un, par, res;
479 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
480 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
481 	if (pos != -1) {
482 		par = partial->vals[pos] * 1.0;
483 		un = unhalt->vals[pos] * 1.0;
484 	} else {
485 		par = partial->sum * 1.0;
486 		un = unhalt->sum * 1.0;
487 	}
488 	res = par/un;
489 	ret = printf("%1.3f", res);
490 	return(ret);
491 }
492 
493 static int
494 allocstall2(struct counters *cpu, int pos)
495 {
496 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
497 	int ret;
498 	struct counters *partial;
499 	struct counters *unhalt;
500 	double un, par, res;
501 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
502 	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
503 	if (pos != -1) {
504 		par = partial->vals[pos] * 1.0;
505 		un = unhalt->vals[pos] * 1.0;
506 	} else {
507 		par = partial->sum * 1.0;
508 		un = unhalt->sum * 1.0;
509 	}
510 	res = par/un;
511 	ret = printf("%1.3f", res);
512 	return(ret);
513 }
514 
515 static int
516 br_mispredict(struct counters *cpu, int pos)
517 {
518 	struct counters *brctr;
519 	struct counters *unhalt;
520 	int ret;
521 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
522 	double br, un, con, res;
523 	con = 20.0;
524 
525 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
526         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
527 	if (pos != -1) {
528 		br = brctr->vals[pos] * 1.0;
529 		un = unhalt->vals[pos] * 1.0;
530 	} else {
531 		br = brctr->sum * 1.0;
532 		un = unhalt->sum * 1.0;
533 	}
534 	res = (con * br)/un;
535  	ret = printf("%1.3f", res);
536 	return(ret);
537 }
538 
539 static int
540 br_mispredictib(struct counters *cpu, int pos)
541 {
542 	struct counters *brctr;
543 	struct counters *unhalt;
544 	struct counters *clear, *clear2, *clear3;
545 	struct counters *uops;
546 	struct counters *recv;
547 	struct counters *iss;
548 /*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
549 	int ret;
550         /*
551 	 * (BR_MISP_RETIRED.ALL_BRANCHES /
552 	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
553 	 *          MACHINE_CLEAR.COUNT) *
554 	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
555 	 *
556 	 */
557 	double br, cl, cl2, cl3, uo, re, un, con, res, is;
558 	con = 4.0;
559 
560 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
561         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
562 	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
563 	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
564 	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
565 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
566 	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
567 	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
568 	if (pos != -1) {
569 		br = brctr->vals[pos] * 1.0;
570 		cl = clear->vals[pos] * 1.0;
571 		cl2 = clear2->vals[pos] * 1.0;
572 		cl3 = clear3->vals[pos] * 1.0;
573 		uo = uops->vals[pos] * 1.0;
574 		re = recv->vals[pos] * 1.0;
575 		is = iss->vals[pos] * 1.0;
576 		un = unhalt->vals[pos] * 1.0;
577 	} else {
578 		br = brctr->sum * 1.0;
579 		cl = clear->sum * 1.0;
580 		cl2 = clear2->sum * 1.0;
581 		cl3 = clear3->sum * 1.0;
582 		uo = uops->sum * 1.0;
583 		re = recv->sum * 1.0;
584 		is = iss->sum * 1.0;
585 		un = unhalt->sum * 1.0;
586 	}
587 	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
588  	ret = printf("%1.3f", res);
589 	return(ret);
590 }
591 
592 static int
593 splitloadib(struct counters *cpu, int pos)
594 {
595 	int ret;
596 	struct counters *mem;
597 	struct counters *l1d, *ldblock;
598 	struct counters *unhalt;
599 	double un, memd, res, l1, ldb;
600         /*
601 	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
602 	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
603 	 */
604 
605 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
606 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
607 	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
608 	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
609 	if (pos != -1) {
610 		memd = mem->vals[pos] * 1.0;
611 		l1 = l1d->vals[pos] * 1.0;
612 		ldb = ldblock->vals[pos] * 1.0;
613 		un = unhalt->vals[pos] * 1.0;
614 	} else {
615 		memd = mem->sum * 1.0;
616 		l1 = l1d->sum * 1.0;
617 		ldb = ldblock->sum * 1.0;
618 		un = unhalt->sum * 1.0;
619 	}
620 	res = ((l1 / memd) * ldb)/un;
621 	ret = printf("%1.3f", res);
622 	return(ret);
623 }
624 
625 static int
626 splitload(struct counters *cpu, int pos)
627 {
628 	int ret;
629 	struct counters *mem;
630 	struct counters *unhalt;
631 	double con, un, memd, res;
632 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
633 
634 	con = 5.0;
635 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
636 	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
637 	if (pos != -1) {
638 		memd = mem->vals[pos] * 1.0;
639 		un = unhalt->vals[pos] * 1.0;
640 	} else {
641 		memd = mem->sum * 1.0;
642 		un = unhalt->sum * 1.0;
643 	}
644 	res = (memd * con)/un;
645 	ret = printf("%1.3f", res);
646 	return(ret);
647 }
648 
649 static int
650 splitstore(struct counters *cpu, int pos)
651 {
652         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
653 	int ret;
654 	struct counters *mem_split;
655 	struct counters *mem_stores;
656 	double memsplit, memstore, res;
657 	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
658 	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
659 	if (pos != -1) {
660 		memsplit = mem_split->vals[pos] * 1.0;
661 		memstore = mem_stores->vals[pos] * 1.0;
662 	} else {
663 		memsplit = mem_split->sum * 1.0;
664 		memstore = mem_stores->sum * 1.0;
665 	}
666 	res = memsplit/memstore;
667 	ret = printf("%1.3f", res);
668 	return(ret);
669 }
670 
671 
672 static int
673 contested(struct counters *cpu, int pos)
674 {
675         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
676 	int ret;
677 	struct counters *mem;
678 	struct counters *unhalt;
679 	double con, un, memd, res;
680 
681 	con = 60.0;
682 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
684 	if (pos != -1) {
685 		memd = mem->vals[pos] * 1.0;
686 		un = unhalt->vals[pos] * 1.0;
687 	} else {
688 		memd = mem->sum * 1.0;
689 		un = unhalt->sum * 1.0;
690 	}
691 	res = (memd * con)/un;
692 	ret = printf("%1.3f", res);
693 	return(ret);
694 }
695 
696 static int
697 contested_has(struct counters *cpu, int pos)
698 {
699         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
700 	int ret;
701 	struct counters *mem;
702 	struct counters *unhalt;
703 	double con, un, memd, res;
704 
705 	con = 84.0;
706 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
708 	if (pos != -1) {
709 		memd = mem->vals[pos] * 1.0;
710 		un = unhalt->vals[pos] * 1.0;
711 	} else {
712 		memd = mem->sum * 1.0;
713 		un = unhalt->sum * 1.0;
714 	}
715 	res = (memd * con)/un;
716 	ret = printf("%1.3f", res);
717 	return(ret);
718 }
719 
720 
721 static int
722 blockstoreforward(struct counters *cpu, int pos)
723 {
724         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
725 	int ret;
726 	struct counters *ldb;
727 	struct counters *unhalt;
728 	double con, un, ld, res;
729 
730 	con = 13.0;
731 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
732 	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
733 	if (pos != -1) {
734 		ld = ldb->vals[pos] * 1.0;
735 		un = unhalt->vals[pos] * 1.0;
736 	} else {
737 		ld = ldb->sum * 1.0;
738 		un = unhalt->sum * 1.0;
739 	}
740 	res = (ld * con)/un;
741 	ret = printf("%1.3f", res);
742 	return(ret);
743 }
744 
745 static int
746 cache2(struct counters *cpu, int pos)
747 {
748 	/* ** Suspect ***
749 	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
750 	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
751 	 */
752 	int ret;
753 	struct counters *mem1, *mem2, *mem3;
754 	struct counters *unhalt;
755 	double con1, con2, con3, un, me_1, me_2, me_3, res;
756 
757 	con1 = 26.0;
758 	con2 = 43.0;
759 	con3 = 60.0;
760 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
761 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
762 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
763 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
764 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
765 	if (pos != -1) {
766 		me_1 = mem1->vals[pos] * 1.0;
767 		me_2 = mem2->vals[pos] * 1.0;
768 		me_3 = mem3->vals[pos] * 1.0;
769 		un = unhalt->vals[pos] * 1.0;
770 	} else {
771 		me_1 = mem1->sum * 1.0;
772 		me_2 = mem2->sum * 1.0;
773 		me_3 = mem3->sum * 1.0;
774 		un = unhalt->sum * 1.0;
775 	}
776 	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
777 	ret = printf("%1.3f", res);
778 	return(ret);
779 }
780 
781 static int
782 datasharing(struct counters *cpu, int pos)
783 {
784 	/*
785 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
786 	 */
787 	int ret;
788 	struct counters *mem;
789 	struct counters *unhalt;
790 	double con, res, me, un;
791 
792 	con = 43.0;
793 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
794 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
795 	if (pos != -1) {
796 		me = mem->vals[pos] * 1.0;
797 		un = unhalt->vals[pos] * 1.0;
798 	} else {
799 		me = mem->sum * 1.0;
800 		un = unhalt->sum * 1.0;
801 	}
802 	res = (me * con)/un;
803 	ret = printf("%1.3f", res);
804 	return(ret);
805 
806 }
807 
808 
809 static int
810 datasharing_has(struct counters *cpu, int pos)
811 {
812 	/*
813 	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
814 	 */
815 	int ret;
816 	struct counters *mem;
817 	struct counters *unhalt;
818 	double con, res, me, un;
819 
820 	con = 72.0;
821 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
822 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
823 	if (pos != -1) {
824 		me = mem->vals[pos] * 1.0;
825 		un = unhalt->vals[pos] * 1.0;
826 	} else {
827 		me = mem->sum * 1.0;
828 		un = unhalt->sum * 1.0;
829 	}
830 	res = (me * con)/un;
831 	ret = printf("%1.3f", res);
832 	return(ret);
833 
834 }
835 
836 
837 static int
838 cache2ib(struct counters *cpu, int pos)
839 {
840         /*
841 	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
842 	 */
843 	int ret;
844 	struct counters *mem;
845 	struct counters *unhalt;
846 	double con, un, me, res;
847 
848 	con = 29.0;
849 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
850 	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
851 	if (pos != -1) {
852 		me = mem->vals[pos] * 1.0;
853 		un = unhalt->vals[pos] * 1.0;
854 	} else {
855 		me = mem->sum * 1.0;
856 		un = unhalt->sum * 1.0;
857 	}
858 	res = (con * me)/un;
859 	ret = printf("%1.3f", res);
860 	return(ret);
861 }
862 
863 static int
864 cache2has(struct counters *cpu, int pos)
865 {
866 	/*
867 	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
868 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
869 	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
870 	 *           / CPU_CLK_UNHALTED.THREAD_P
871 	 */
872 	int ret;
873 	struct counters *mem1, *mem2, *mem3;
874 	struct counters *unhalt;
875 	double con1, con2, con3, un, me1, me2, me3, res;
876 
877 	con1 = 36.0;
878 	con2 = 72.0;
879 	con3 = 84.0;
880 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
881 	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
882 	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
883 	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
884 	if (pos != -1) {
885 		me1 = mem1->vals[pos] * 1.0;
886 		me2 = mem2->vals[pos] * 1.0;
887 		me3 = mem3->vals[pos] * 1.0;
888 		un = unhalt->vals[pos] * 1.0;
889 	} else {
890 		me1 = mem1->sum * 1.0;
891 		me2 = mem2->sum * 1.0;
892 		me3 = mem3->sum * 1.0;
893 		un = unhalt->sum * 1.0;
894 	}
895 	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
896 	ret = printf("%1.3f", res);
897 	return(ret);
898 }
899 
900 static int
901 cache1(struct counters *cpu, int pos)
902 {
903 	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
904 	int ret;
905 	struct counters *mem;
906 	struct counters *unhalt;
907 	double con, un, me, res;
908 
909 	con = 180.0;
910 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
911 	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
912 	if (pos != -1) {
913 		me = mem->vals[pos] * 1.0;
914 		un = unhalt->vals[pos] * 1.0;
915 	} else {
916 		me = mem->sum * 1.0;
917 		un = unhalt->sum * 1.0;
918 	}
919 	res = (me * con)/un;
920 	ret = printf("%1.3f", res);
921 	return(ret);
922 }
923 
924 static int
925 cache1ib(struct counters *cpu, int pos)
926 {
927 	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
928 	int ret;
929 	struct counters *mem;
930 	struct counters *unhalt;
931 	double con, un, me, res;
932 
933 	con = 180.0;
934 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
935 	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
936 	if (pos != -1) {
937 		me = mem->vals[pos] * 1.0;
938 		un = unhalt->vals[pos] * 1.0;
939 	} else {
940 		me = mem->sum * 1.0;
941 		un = unhalt->sum * 1.0;
942 	}
943 	res = (me * con)/un;
944 	ret = printf("%1.3f", res);
945 	return(ret);
946 }
947 
948 
949 static int
950 dtlb_missload(struct counters *cpu, int pos)
951 {
952 	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
953 	int ret;
954 	struct counters *dtlb_m, *dtlb_d;
955 	struct counters *unhalt;
956 	double con, un, d1, d2, res;
957 
958 	con = 7.0;
959 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
960 	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
961 	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
962 	if (pos != -1) {
963 		d1 = dtlb_m->vals[pos] * 1.0;
964 		d2 = dtlb_d->vals[pos] * 1.0;
965 		un = unhalt->vals[pos] * 1.0;
966 	} else {
967 		d1 = dtlb_m->sum * 1.0;
968 		d2 = dtlb_d->sum * 1.0;
969 		un = unhalt->sum * 1.0;
970 	}
971 	res = ((d1 * con) + d2)/un;
972 	ret = printf("%1.3f", res);
973 	return(ret);
974 }
975 
976 static int
977 dtlb_missstore(struct counters *cpu, int pos)
978 {
979         /*
980 	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
981 	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
982 	 */
983         int ret;
984         struct counters *dtsb_m, *dtsb_d;
985         struct counters *unhalt;
986         double con, un, d1, d2, res;
987 
988         con = 7.0;
989         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
990         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
991         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
992         if (pos != -1) {
993                 d1 = dtsb_m->vals[pos] * 1.0;
994                 d2 = dtsb_d->vals[pos] * 1.0;
995                 un = unhalt->vals[pos] * 1.0;
996         } else {
997                 d1 = dtsb_m->sum * 1.0;
998                 d2 = dtsb_d->sum * 1.0;
999                 un = unhalt->sum * 1.0;
1000         }
1001         res = ((d1 * con) + d2)/un;
1002         ret = printf("%1.3f", res);
1003         return(ret);
1004 }
1005 
1006 static int
1007 itlb_miss(struct counters *cpu, int pos)
1008 {
1009 	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1010 	int ret;
1011 	struct counters *itlb;
1012 	struct counters *unhalt;
1013 	double un, d1, res;
1014 
1015 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1016 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1017 	if (pos != -1) {
1018 		d1 = itlb->vals[pos] * 1.0;
1019 		un = unhalt->vals[pos] * 1.0;
1020 	} else {
1021 		d1 = itlb->sum * 1.0;
1022 		un = unhalt->sum * 1.0;
1023 	}
1024 	res = d1/un;
1025 	ret = printf("%1.3f", res);
1026 	return(ret);
1027 }
1028 
1029 static int
1030 icache_miss(struct counters *cpu, int pos)
1031 {
1032 	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1033 
1034 	int ret;
1035 	struct counters *itlb, *icache;
1036 	struct counters *unhalt;
1037 	double un, d1, ic, res;
1038 
1039 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040 	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1041 	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1042 	if (pos != -1) {
1043 		d1 = itlb->vals[pos] * 1.0;
1044 		ic = icache->vals[pos] * 1.0;
1045 		un = unhalt->vals[pos] * 1.0;
1046 	} else {
1047 		d1 = itlb->sum * 1.0;
1048 		ic = icache->sum * 1.0;
1049 		un = unhalt->sum * 1.0;
1050 	}
1051 	res = (ic-d1)/un;
1052 	ret = printf("%1.3f", res);
1053 	return(ret);
1054 
1055 }
1056 
1057 static int
1058 icache_miss_has(struct counters *cpu, int pos)
1059 {
1060 	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1061 
1062 	int ret;
1063 	struct counters *icache;
1064 	struct counters *unhalt;
1065 	double un, con, ic, res;
1066 
1067 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1068 	icache = find_counter(cpu, "ICACHE.MISSES");
1069 	con = 36.0;
1070 	if (pos != -1) {
1071 		ic = icache->vals[pos] * 1.0;
1072 		un = unhalt->vals[pos] * 1.0;
1073 	} else {
1074 		ic = icache->sum * 1.0;
1075 		un = unhalt->sum * 1.0;
1076 	}
1077 	res = (con * ic)/un;
1078 	ret = printf("%1.3f", res);
1079 	return(ret);
1080 
1081 }
1082 
1083 static int
1084 lcp_stall(struct counters *cpu, int pos)
1085 {
1086          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1087 	int ret;
1088 	struct counters *ild;
1089 	struct counters *unhalt;
1090 	double un, d1, res;
1091 
1092 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1093 	ild = find_counter(cpu, "ILD_STALL.LCP");
1094 	if (pos != -1) {
1095 		d1 = ild->vals[pos] * 1.0;
1096 		un = unhalt->vals[pos] * 1.0;
1097 	} else {
1098 		d1 = ild->sum * 1.0;
1099 		un = unhalt->sum * 1.0;
1100 	}
1101 	res = d1/un;
1102 	ret = printf("%1.3f", res);
1103 	return(ret);
1104 
1105 }
1106 
1107 
1108 static int
1109 frontendstall(struct counters *cpu, int pos)
1110 {
1111       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1112 	int ret;
1113 	struct counters *idq;
1114 	struct counters *unhalt;
1115 	double con, un, id, res;
1116 
1117 	con = 4.0;
1118 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1119 	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1120 	if (pos != -1) {
1121 		id = idq->vals[pos] * 1.0;
1122 		un = unhalt->vals[pos] * 1.0;
1123 	} else {
1124 		id = idq->sum * 1.0;
1125 		un = unhalt->sum * 1.0;
1126 	}
1127 	res = id/(un * con);
1128 	ret = printf("%1.3f", res);
1129 	return(ret);
1130 }
1131 
1132 static int
1133 clears(struct counters *cpu, int pos)
1134 {
1135 	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1136 	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1137 
1138 	int ret;
1139 	struct counters *clr1, *clr2, *clr3;
1140 	struct counters *unhalt;
1141 	double con, un, cl1, cl2, cl3, res;
1142 
1143 	con = 100.0;
1144 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1145 	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1146 	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1147 	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1148 
1149 	if (pos != -1) {
1150 		cl1 = clr1->vals[pos] * 1.0;
1151 		cl2 = clr2->vals[pos] * 1.0;
1152 		cl3 = clr3->vals[pos] * 1.0;
1153 		un = unhalt->vals[pos] * 1.0;
1154 	} else {
1155 		cl1 = clr1->sum * 1.0;
1156 		cl2 = clr2->sum * 1.0;
1157 		cl3 = clr3->sum * 1.0;
1158 		un = unhalt->sum * 1.0;
1159 	}
1160 	res = ((cl1 + cl2 + cl3) * con)/un;
1161 	ret = printf("%1.3f", res);
1162 	return(ret);
1163 }
1164 
1165 static int
1166 microassist(struct counters *cpu, int pos)
1167 {
1168 	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1169 	int ret;
1170 	struct counters *idq;
1171 	struct counters *unhalt;
1172 	double un, id, res, con;
1173 
1174 	con = 4.0;
1175 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1176 	idq = find_counter(cpu, "IDQ.MS_UOPS");
1177 	if (pos != -1) {
1178 		id = idq->vals[pos] * 1.0;
1179 		un = unhalt->vals[pos] * 1.0;
1180 	} else {
1181 		id = idq->sum * 1.0;
1182 		un = unhalt->sum * 1.0;
1183 	}
1184 	res = id/(un * con);
1185 	ret = printf("%1.3f", res);
1186 	return(ret);
1187 }
1188 
1189 
1190 static int
1191 aliasing(struct counters *cpu, int pos)
1192 {
1193 	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1194 	int ret;
1195 	struct counters *ld;
1196 	struct counters *unhalt;
1197 	double un, lds, con, res;
1198 
1199 	con = 5.0;
1200 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1201 	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1202 	if (pos != -1) {
1203 		lds = ld->vals[pos] * 1.0;
1204 		un = unhalt->vals[pos] * 1.0;
1205 	} else {
1206 		lds = ld->sum * 1.0;
1207 		un = unhalt->sum * 1.0;
1208 	}
1209 	res = (lds * con)/un;
1210 	ret = printf("%1.3f", res);
1211 	return(ret);
1212 }
1213 
1214 static int
1215 fpassists(struct counters *cpu, int pos)
1216 {
1217 	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1218 	int ret;
1219 	struct counters *fp;
1220 	struct counters *inst;
1221 	double un, fpd, res;
1222 
1223 	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1224 	fp = find_counter(cpu, "FP_ASSIST.ANY");
1225 	if (pos != -1) {
1226 		fpd = fp->vals[pos] * 1.0;
1227 		un = inst->vals[pos] * 1.0;
1228 	} else {
1229 		fpd = fp->sum * 1.0;
1230 		un = inst->sum * 1.0;
1231 	}
1232 	res = fpd/un;
1233 	ret = printf("%1.3f", res);
1234 	return(ret);
1235 }
1236 
1237 static int
1238 otherassistavx(struct counters *cpu, int pos)
1239 {
1240 	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1241 	int ret;
1242 	struct counters *oth;
1243 	struct counters *unhalt;
1244 	double un, ot, con, res;
1245 
1246 	con = 75.0;
1247 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248 	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1249 	if (pos != -1) {
1250 		ot = oth->vals[pos] * 1.0;
1251 		un = unhalt->vals[pos] * 1.0;
1252 	} else {
1253 		ot = oth->sum * 1.0;
1254 		un = unhalt->sum * 1.0;
1255 	}
1256 	res = (ot * con)/un;
1257 	ret = printf("%1.3f", res);
1258 	return(ret);
1259 }
1260 
1261 static int
1262 otherassistsse(struct counters *cpu, int pos)
1263 {
1264 
1265 	int ret;
1266 	struct counters *oth;
1267 	struct counters *unhalt;
1268 	double un, ot, con, res;
1269 
1270 	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1271 	con = 75.0;
1272 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273 	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1274 	if (pos != -1) {
1275 		ot = oth->vals[pos] * 1.0;
1276 		un = unhalt->vals[pos] * 1.0;
1277 	} else {
1278 		ot = oth->sum * 1.0;
1279 		un = unhalt->sum * 1.0;
1280 	}
1281 	res = (ot * con)/un;
1282 	ret = printf("%1.3f", res);
1283 	return(ret);
1284 }
1285 
1286 static int
1287 efficiency1(struct counters *cpu, int pos)
1288 {
1289 
1290 	int ret;
1291 	struct counters *uops;
1292 	struct counters *unhalt;
1293 	double un, ot, con, res;
1294 
1295         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1296 	con = 4.0;
1297 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298 	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1299 	if (pos != -1) {
1300 		ot = uops->vals[pos] * 1.0;
1301 		un = unhalt->vals[pos] * 1.0;
1302 	} else {
1303 		ot = uops->sum * 1.0;
1304 		un = unhalt->sum * 1.0;
1305 	}
1306 	res = ot/(con * un);
1307 	ret = printf("%1.3f", res);
1308 	return(ret);
1309 }
1310 
1311 static int
1312 efficiency2(struct counters *cpu, int pos)
1313 {
1314 
1315 	int ret;
1316 	struct counters *uops;
1317 	struct counters *unhalt;
1318 	double un, ot, res;
1319 
1320         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1321 	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1322 	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1323 	if (pos != -1) {
1324 		ot = uops->vals[pos] * 1.0;
1325 		un = unhalt->vals[pos] * 1.0;
1326 	} else {
1327 		ot = uops->sum * 1.0;
1328 		un = unhalt->sum * 1.0;
1329 	}
1330 	res = un/ot;
1331 	ret = printf("%1.3f", res);
1332 	return(ret);
1333 }
1334 
1335 #define SANDY_BRIDGE_COUNT 20
1336 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1337 /*01*/	{ "allocstall1", "thresh > .05",
1338 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1339 	  allocstall1 },
1340 /*02*/	{ "allocstall2", "thresh > .05",
1341 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
1342 	  allocstall2 },
1343 /*03*/	{ "br_miss", "thresh >= .2",
1344 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1345 	  br_mispredict },
1346 /*04*/	{ "splitload", "thresh >= .1",
1347 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1348 	  splitload },
1349 /*05*/	{ "splitstore", "thresh >= .01",
1350 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1351 	  splitstore },
1352 /*06*/	{ "contested", "thresh >= .05",
1353 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1354 	  contested },
1355 /*07*/	{ "blockstorefwd", "thresh >= .05",
1356 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1357 	  blockstoreforward },
1358 /*08*/	{ "cache2", "thresh >= .2",
1359 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1360 	  cache2 },
1361 /*09*/	{ "cache1", "thresh >= .2",
1362 	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1363 	  cache1 },
1364 /*10*/	{ "dtlbmissload", "thresh >= .1",
1365 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1366 	  dtlb_missload },
1367 /*11*/	{ "dtlbmissstore", "thresh >= .05",
1368 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1369 	  dtlb_missstore },
1370 /*12*/	{ "frontendstall", "thresh >= .15",
1371 	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1372 	  frontendstall },
1373 /*13*/	{ "clears", "thresh >= .02",
1374 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1375 	  clears },
1376 /*14*/	{ "microassist", "thresh >= .05",
1377 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1378 	  microassist },
1379 /*15*/	{ "aliasing_4k", "thresh >= .1",
1380 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1381 	  aliasing },
1382 /*16*/	{ "fpassist", "look for a excessive value",
1383 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1384 	  fpassists },
1385 /*17*/	{ "otherassistavx", "look for a excessive value",
1386 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1387 	  otherassistavx },
1388 /*18*/	{ "otherassistsse", "look for a excessive value",
1389 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1390 	  otherassistsse },
1391 /*19*/	{ "eff1", "thresh < .9",
1392 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1393 	  efficiency1 },
1394 /*20*/	{ "eff2", "thresh > 1.0",
1395 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1396 	  efficiency2 },
1397 };
1398 
1399 
1400 #define IVY_BRIDGE_COUNT 21
1401 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1402 /*1*/	{ "eff1", "thresh < .75",
1403 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1404 	  efficiency1 },
1405 /*2*/	{ "eff2", "thresh > 1.0",
1406 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1407 	  efficiency2 },
1408 /*3*/	{ "itlbmiss", "thresh > .05",
1409 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1410 	  itlb_miss },
1411 /*4*/	{ "icachemiss", "thresh > .05",
1412 	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1413 	  icache_miss },
1414 /*5*/	{ "lcpstall", "thresh > .05",
1415 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1416 	  lcp_stall },
1417 /*6*/	{ "cache1", "thresh >= .2",
1418 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1419 	  cache1ib },
1420 /*7*/	{ "cache2", "thresh >= .2",
1421 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1422 	  cache2ib },
1423 /*8*/	{ "contested", "thresh >= .05",
1424 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1425 	  contested },
1426 /*9*/	{ "datashare", "thresh >= .05",
1427 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1428 	  datasharing },
1429 /*10*/	{ "blockstorefwd", "thresh >= .05",
1430 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1431 	  blockstoreforward },
1432 /*11*/	{ "splitload", "thresh >= .1",
1433 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1434 	  splitloadib },
1435 /*12*/	{ "splitstore", "thresh >= .01",
1436 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1437 	  splitstore },
1438 /*13*/	{ "aliasing_4k", "thresh >= .1",
1439 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1440 	  aliasing },
1441 /*14*/	{ "dtlbmissload", "thresh >= .1",
1442 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1443 	  dtlb_missload },
1444 /*15*/	{ "dtlbmissstore", "thresh >= .05",
1445 	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1446 	  dtlb_missstore },
1447 /*16*/	{ "br_miss", "thresh >= .2",
1448 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1449 	  br_mispredictib },
1450 /*17*/	{ "clears", "thresh >= .02",
1451 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1452 	  clears },
1453 /*18*/	{ "microassist", "thresh >= .05",
1454 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1455 	  microassist },
1456 /*19*/	{ "fpassist", "look for a excessive value",
1457 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1458 	  fpassists },
1459 /*20*/	{ "otherassistavx", "look for a excessive value",
1460 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1461 	  otherassistavx },
1462 /*21*/	{ "otherassistsse", "look for a excessive value",
1463 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1464 	  otherassistsse },
1465 };
1466 
1467 #define HASWELL_COUNT 20
1468 static struct cpu_entry haswell[HASWELL_COUNT] = {
1469 /*1*/	{ "eff1", "thresh < .75",
1470 	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1471 	  efficiency1 },
1472 /*2*/	{ "eff2", "thresh > 1.0",
1473 	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1474 	  efficiency2 },
1475 /*3*/	{ "itlbmiss", "thresh > .05",
1476 	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1477 	  itlb_miss },
1478 /*4*/	{ "icachemiss", "thresh > .05",
1479 	  "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1480 	  icache_miss_has },
1481 /*5*/	{ "lcpstall", "thresh > .05",
1482 	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1483 	  lcp_stall },
1484 /*6*/	{ "cache1", "thresh >= .2",
1485 	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1486 	  cache1ib },
1487 /*7*/	{ "cache2", "thresh >= .2",
1488 	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1489 	  cache2has },
1490 /*8*/	{ "contested", "thresh >= .05",
1491 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1492 	  contested_has },
1493 /*9*/	{ "datashare", "thresh >= .05",
1494 	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1495 	  datasharing_has },
1496 /*10*/	{ "blockstorefwd", "thresh >= .05",
1497 	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1498 	  blockstoreforward },
1499 /*11*/	{ "splitload", "thresh >= .1",
1500 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1501 	  splitload },
1502 /*12*/	{ "splitstore", "thresh >= .01",
1503 	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1504 	  splitstore },
1505 /*13*/	{ "aliasing_4k", "thresh >= .1",
1506 	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1507 	  aliasing },
1508 /*14*/	{ "dtlbmissload", "thresh >= .1",
1509 	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1510 	  dtlb_missload },
1511 /*15*/	{ "br_miss", "thresh >= .2",
1512 	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1513 	  br_mispredict },
1514 /*16*/	{ "clears", "thresh >= .02",
1515 	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1516 	  clears },
1517 /*17*/	{ "microassist", "thresh >= .05",
1518 	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1519 	  microassist },
1520 /*18*/	{ "fpassist", "look for a excessive value",
1521 	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1522 	  fpassists },
1523 /*19*/	{ "otherassistavx", "look for a excessive value",
1524 	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1525 	  otherassistavx },
1526 /*20*/	{ "otherassistsse", "look for a excessive value",
1527 	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1528 	  otherassistsse },
1529 };
1530 
1531 
1532 static void
1533 set_sandybridge(void)
1534 {
1535 	strcpy(the_cpu.cputype, "SandyBridge PMC");
1536 	the_cpu.number = SANDY_BRIDGE_COUNT;
1537 	the_cpu.ents = sandy_bridge;
1538 	the_cpu.explain = explain_name_sb;
1539 }
1540 
1541 static void
1542 set_ivybridge(void)
1543 {
1544 	strcpy(the_cpu.cputype, "IvyBridge PMC");
1545 	the_cpu.number = IVY_BRIDGE_COUNT;
1546 	the_cpu.ents = ivy_bridge;
1547 	the_cpu.explain = explain_name_ib;
1548 }
1549 
1550 
1551 static void
1552 set_haswell(void)
1553 {
1554 	strcpy(the_cpu.cputype, "HASWELL PMC");
1555 	the_cpu.number = HASWELL_COUNT;
1556 	the_cpu.ents = haswell;
1557 	the_cpu.explain = explain_name_has;
1558 }
1559 
1560 static void
1561 set_expression(char *name)
1562 {
1563 	int found = 0, i;
1564 	for(i=0 ; i< the_cpu.number; i++) {
1565 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1566 			found = 1;
1567 			expression = the_cpu.ents[i].func;
1568 			command = the_cpu.ents[i].command;
1569 			threshold = the_cpu.ents[i].thresh;
1570 			break;
1571 		}
1572 	}
1573 	if (!found) {
1574 		printf("For CPU type %s we have no expression:%s\n",
1575 		       the_cpu.cputype, name);
1576 		exit(-1);
1577 	}
1578 }
1579 
1580 
1581 
1582 
1583 
1584 static int
1585 validate_expression(char *name)
1586 {
1587 	int i, found;
1588 
1589 	found = 0;
1590 	for(i=0 ; i< the_cpu.number; i++) {
1591 		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1592 			found = 1;
1593 			break;
1594 		}
1595 	}
1596 	if (!found) {
1597 		return(-1);
1598 	}
1599 	return (0);
1600 }
1601 
1602 static void
1603 do_expression(struct counters *cpu, int pos)
1604 {
1605 	if (expression == NULL)
1606 		return;
1607 	(*expression)(cpu, pos);
1608 }
1609 
1610 static void
1611 process_header(int idx, char *p)
1612 {
1613 	struct counters *up;
1614 	int i, len, nlen;
1615 	/*
1616 	 * Given header element idx, at p in
1617 	 * form 's/NN/nameof'
1618 	 * process the entry to pull out the name and
1619 	 * the CPU number.
1620 	 */
1621 	if (strncmp(p, "s/", 2)) {
1622 		printf("Check -- invalid header no s/ in %s\n",
1623 		       p);
1624 		return;
1625 	}
1626 	up = &cnts[idx];
1627 	up->cpu = strtol(&p[2], NULL, 10);
1628 	len = strlen(p);
1629 	for (i=2; i<len; i++) {
1630 		if (p[i] == '/') {
1631 			nlen = strlen(&p[(i+1)]);
1632 			if (nlen < (MAX_NLEN-1)) {
1633 				strcpy(up->counter_name, &p[(i+1)]);
1634 			} else {
1635 				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
1636 			}
1637 		}
1638 	}
1639 }
1640 
1641 static void
1642 build_counters_from_header(FILE *io)
1643 {
1644 	char buffer[8192], *p;
1645 	int i, len, cnt;
1646 	size_t mlen;
1647 
1648 	/* We have a new start, lets
1649 	 * setup our headers and cpus.
1650 	 */
1651 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
1652 		printf("First line can't be read from file err:%d\n", errno);
1653 		return;
1654 	}
1655 	/*
1656 	 * Ok output is an array of counters. Once
1657 	 * we start to read the values in we must
1658 	 * put them in there slot to match there CPU and
1659 	 * counter being updated. We create a mass array
1660 	 * of the counters, filling in the CPU and
1661 	 * counter name.
1662 	 */
1663 	/* How many do we get? */
1664 	len = strlen(buffer);
1665 	for (i=0, cnt=0; i<len; i++) {
1666 		if (strncmp(&buffer[i], "s/", 2) == 0) {
1667 			cnt++;
1668 			for(;i<len;i++) {
1669 				if (buffer[i] == ' ')
1670 					break;
1671 			}
1672 		}
1673 	}
1674 	mlen = sizeof(struct counters) * cnt;
1675 	cnts = malloc(mlen);
1676 	ncnts = cnt;
1677 	if (cnts == NULL) {
1678 		printf("No memory err:%d\n", errno);
1679 		return;
1680 	}
1681 	memset(cnts, 0, mlen);
1682 	for (i=0, cnt=0; i<len; i++) {
1683 		if (strncmp(&buffer[i], "s/", 2) == 0) {
1684 			p = &buffer[i];
1685 			for(;i<len;i++) {
1686 				if (buffer[i] == ' ') {
1687 					buffer[i] = 0;
1688 					break;
1689 				}
1690 			}
1691 			process_header(cnt, p);
1692 			cnt++;
1693 		}
1694 	}
1695 	if (verbose)
1696 		printf("We have %d entries\n", cnt);
1697 }
1698 extern int max_to_collect;
1699 int max_to_collect = MAX_COUNTER_SLOTS;
1700 
1701 static int
1702 read_a_line(FILE *io)
1703 {
1704 	char buffer[8192], *p, *stop;
1705 	int pos, i;
1706 
1707 	if (fgets(buffer, sizeof(buffer), io) == NULL) {
1708 		return(0);
1709 	}
1710 	p = buffer;
1711 	for (i=0; i<ncnts; i++) {
1712 		pos = cnts[i].pos;
1713 		cnts[i].vals[pos] = strtol(p, &stop, 0);
1714 		cnts[i].pos++;
1715 		cnts[i].sum += cnts[i].vals[pos];
1716 		p = stop;
1717 	}
1718 	return (1);
1719 }
1720 
1721 extern int cpu_count_out;
1722 int cpu_count_out=0;
1723 
1724 static void
1725 print_header(void)
1726 {
1727 	int i, cnt, printed_cnt;
1728 
1729 	printf("*********************************\n");
1730 	for(i=0, cnt=0; i<MAX_CPU; i++) {
1731 		if (glob_cpu[i]) {
1732 			cnt++;
1733 		}
1734 	}
1735 	cpu_count_out = cnt;
1736 	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
1737 		if (glob_cpu[i]) {
1738 			printf("CPU%d", i);
1739 			printed_cnt++;
1740 		}
1741 		if (printed_cnt == cnt) {
1742 			printf("\n");
1743 			break;
1744 		} else {
1745 			printf("\t");
1746 		}
1747 	}
1748 }
1749 
1750 static void
1751 lace_cpus_together(void)
1752 {
1753 	int i, j, lace_cpu;
1754 	struct counters *cpat, *at;
1755 
1756 	for(i=0; i<ncnts; i++) {
1757 		cpat = &cnts[i];
1758 		if (cpat->next_cpu) {
1759 			/* Already laced in */
1760 			continue;
1761 		}
1762 		lace_cpu = cpat->cpu;
1763 		if (lace_cpu >= MAX_CPU) {
1764 			printf("CPU %d to big\n", lace_cpu);
1765 			continue;
1766 		}
1767 		if (glob_cpu[lace_cpu] == NULL) {
1768 			glob_cpu[lace_cpu] = cpat;
1769 		} else {
1770 			/* Already processed this cpu */
1771 			continue;
1772 		}
1773 		/* Ok look forward for cpu->cpu and link in */
1774 		for(j=(i+1); j<ncnts; j++) {
1775 			at = &cnts[j];
1776 			if (at->next_cpu) {
1777 				continue;
1778 			}
1779 			if (at->cpu == lace_cpu) {
1780 				/* Found one */
1781 				cpat->next_cpu = at;
1782 				cpat = at;
1783 			}
1784 		}
1785 	}
1786 }
1787 
1788 
1789 static void
1790 process_file(char *filename)
1791 {
1792 	FILE *io;
1793 	int i;
1794 	int line_at, not_done;
1795 	pid_t pid_of_command=0;
1796 
1797 	if (filename ==  NULL) {
1798 		io = my_popen(command, "r", &pid_of_command);
1799 		if (io == NULL) {
1800 			printf("Can't popen the command %s\n", command);
1801 			return;
1802 		}
1803 	} else {
1804 		io = fopen(filename, "r");
1805 		if (io == NULL) {
1806 			printf("Can't process file %s err:%d\n",
1807 			       filename, errno);
1808 			return;
1809 		}
1810 	}
1811 	build_counters_from_header(io);
1812 	if (cnts == NULL) {
1813 		/* Nothing we can do */
1814 		printf("Nothing to do -- no counters built\n");
1815 		if (filename) {
1816 			fclose(io);
1817 		} else {
1818 			my_pclose(io, pid_of_command);
1819 		}
1820 		return;
1821 	}
1822 	lace_cpus_together();
1823 	print_header();
1824 	if (verbose) {
1825 		for (i=0; i<ncnts; i++) {
1826 			printf("Counter:%s cpu:%d index:%d\n",
1827 			       cnts[i].counter_name,
1828 			       cnts[i].cpu, i);
1829 		}
1830 	}
1831 	line_at = 0;
1832 	not_done = 1;
1833 	while(not_done) {
1834 		if (read_a_line(io)) {
1835 			line_at++;
1836 		} else {
1837 			break;
1838 		}
1839 		if (line_at >= max_to_collect) {
1840 			not_done = 0;
1841 		}
1842 		if (filename == NULL) {
1843 			int cnt;
1844 			/* For the ones we dynamically open we print now */
1845 			for(i=0, cnt=0; i<MAX_CPU; i++) {
1846 				do_expression(glob_cpu[i], (line_at-1));
1847 				cnt++;
1848 				if (cnt == cpu_count_out) {
1849 					printf("\n");
1850 					break;
1851 				} else {
1852 					printf("\t");
1853 				}
1854 			}
1855 		}
1856 	}
1857 	if (filename) {
1858 		fclose(io);
1859 	} else {
1860 		my_pclose(io, pid_of_command);
1861 	}
1862 }
1863 #if defined(__amd64__)
1864 #define cpuid(in,a,b,c,d)\
1865   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
1866 #else
1867 #define cpuid(in, a, b, c, d)
1868 #endif
1869 
1870 static void
1871 get_cpuid_set(void)
1872 {
1873 	unsigned long eax, ebx, ecx, edx;
1874 	int model;
1875 	pid_t pid_of_command=0;
1876 	size_t sz, len;
1877 	FILE *io;
1878 	char linebuf[1024], *str;
1879 
1880 	eax = ebx = ecx = edx = 0;
1881 
1882 	cpuid(0, eax, ebx, ecx, edx);
1883 	if (ebx == 0x68747541) {
1884 		printf("AMD processors are not supported by this program\n");
1885 		printf("Sorry\n");
1886 		exit(0);
1887 	} else if (ebx == 0x6972794) {
1888 		printf("Cyrix processors are not supported by this program\n");
1889 		printf("Sorry\n");
1890 		exit(0);
1891 	} else if (ebx == 0x756e6547) {
1892 		printf("Genuine Intel\n");
1893 	} else {
1894 		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
1895 		exit(0);
1896 	}
1897 	cpuid(1, eax, ebx, ecx, edx);
1898 	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
1899 	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
1900 	switch (eax & 0xF00) {
1901 	case 0x500:		/* Pentium family processors */
1902 		printf("Intel Pentium P5\n");
1903 		goto not_supported;
1904 		break;
1905 	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
1906 		switch (model) {
1907 		case 0x1:
1908 			printf("Intel Pentium P6\n");
1909 			goto not_supported;
1910 			break;
1911 		case 0x3:
1912 		case 0x5:
1913 			printf("Intel PII\n");
1914 			goto not_supported;
1915 			break;
1916 		case 0x6: case 0x16:
1917 			printf("Intel CL\n");
1918 			goto not_supported;
1919 			break;
1920 		case 0x7: case 0x8: case 0xA: case 0xB:
1921 			printf("Intel PIII\n");
1922 			goto not_supported;
1923 			break;
1924 		case 0x9: case 0xD:
1925 			printf("Intel PM\n");
1926 			goto not_supported;
1927 			break;
1928 		case 0xE:
1929 			printf("Intel CORE\n");
1930 			goto not_supported;
1931 			break;
1932 		case 0xF:
1933 			printf("Intel CORE2\n");
1934 			goto not_supported;
1935 			break;
1936 		case 0x17:
1937 			printf("Intel CORE2EXTREME\n");
1938 			goto not_supported;
1939 			break;
1940 		case 0x1C:	/* Per Intel document 320047-002. */
1941 			printf("Intel ATOM\n");
1942 			goto not_supported;
1943 			break;
1944 		case 0x1A:
1945 		case 0x1E:	/*
1946 				 * Per Intel document 253669-032 9/2009,
1947 				 * pages A-2 and A-57
1948 				 */
1949 		case 0x1F:	/*
1950 				 * Per Intel document 253669-032 9/2009,
1951 				 * pages A-2 and A-57
1952 				 */
1953 			printf("Intel COREI7\n");
1954 			goto not_supported;
1955 			break;
1956 		case 0x2E:
1957 			printf("Intel NEHALEM\n");
1958 			goto not_supported;
1959 			break;
1960 		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
1961 		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
1962 			printf("Intel WESTMERE\n");
1963 			goto not_supported;
1964 			break;
1965 		case 0x2F:	/* Westmere-EX, seen in wild */
1966 			printf("Intel WESTMERE\n");
1967 			goto not_supported;
1968 			break;
1969 		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
1970 			printf("Intel SANDYBRIDGE\n");
1971 			set_sandybridge();
1972 			break;
1973 		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
1974 			printf("Intel SANDYBRIDGE_XEON\n");
1975 			set_sandybridge();
1976 			break;
1977 		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
1978 			printf("Intel IVYBRIDGE\n");
1979 			set_ivybridge();
1980 			break;
1981 		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
1982 			printf("Intel IVYBRIDGE_XEON\n");
1983 			set_ivybridge();
1984 			break;
1985 		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
1986 			printf("Intel HASWELL (Xeon)\n");
1987 			set_haswell();
1988 			break;
1989 		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
1990 		case 0x45:
1991 		case 0x46:
1992 			printf("Intel HASWELL\n");
1993 			set_haswell();
1994 			break;
1995 		case 0x4D:
1996 			/* Per Intel document 330061-001 01/2014. */
1997 			printf("Intel ATOM_SILVERMONT\n");
1998 			goto not_supported;
1999 			break;
2000 		default:
2001 			printf("Intel model 0x%x is not known -- sorry\n",
2002 			       model);
2003 			goto not_supported;
2004 			break;
2005 		}
2006 		break;
2007 	case 0xF00:		/* P4 */
2008 		printf("Intel unknown model %d\n", model);
2009 		goto not_supported;
2010 		break;
2011 	}
2012 	/* Ok lets load the list of all known PMC's */
2013 	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2014 	if (valid_pmcs == NULL) {
2015 		/* Likely */
2016 		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2017 		sz = sizeof(char *) * pmc_allocated_cnt;
2018 		valid_pmcs = malloc(sz);
2019 		if (valid_pmcs == NULL) {
2020 			printf("No memory allocation fails at startup?\n");
2021 			exit(-1);
2022 		}
2023 		memset(valid_pmcs, 0, sz);
2024 	}
2025 
2026 	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2027 		if (linebuf[0] != '\t') {
2028 			/* sometimes headers ;-) */
2029 			continue;
2030 		}
2031 		len = strlen(linebuf);
2032 		if (linebuf[(len-1)] == '\n') {
2033 			/* Likely */
2034 			linebuf[(len-1)] = 0;
2035 		}
2036 		str = &linebuf[1];
2037 		len = strlen(str) + 1;
2038 		valid_pmcs[valid_pmc_cnt] = malloc(len);
2039 		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2040 			printf("No memory2 allocation fails at startup?\n");
2041 			exit(-1);
2042 		}
2043 		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2044 		strcpy(valid_pmcs[valid_pmc_cnt], str);
2045 		valid_pmc_cnt++;
2046 		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2047 			/* Got to expand -- unlikely */
2048 			char **more;
2049 
2050 			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2051 			more = malloc(sz);
2052 			if (more == NULL) {
2053 				printf("No memory3 allocation fails at startup?\n");
2054 				exit(-1);
2055 			}
2056 			memset(more, 0, sz);
2057 			memcpy(more, valid_pmcs, sz);
2058 			pmc_allocated_cnt *= 2;
2059 			free(valid_pmcs);
2060 			valid_pmcs = more;
2061 		}
2062 	}
2063 	my_pclose(io, pid_of_command);
2064 	return;
2065 not_supported:
2066 	printf("Not supported\n");
2067 	exit(-1);
2068 }
2069 
2070 static void
2071 explain_all(void)
2072 {
2073 	int i;
2074 	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2075 	printf("-------------------------------------------------------------\n");
2076 	for(i=0; i<the_cpu.number; i++){
2077 		printf("For -e %s ", the_cpu.ents[i].name);
2078 		(*the_cpu.explain)(the_cpu.ents[i].name);
2079 		printf("----------------------------\n");
2080 	}
2081 }
2082 
2083 static void
2084 test_for_a_pmc(const char *pmc, int out_so_far)
2085 {
2086 	FILE *io;
2087 	pid_t pid_of_command=0;
2088 	char my_command[1024];
2089 	char line[1024];
2090 	char resp[1024];
2091 	int len, llen, i;
2092 
2093 	if (out_so_far < 50) {
2094 		len = 50 - out_so_far;
2095 		for(i=0; i<len; i++) {
2096 			printf(" ");
2097 		}
2098 	}
2099 	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2100 	io = my_popen(my_command, "r", &pid_of_command);
2101 	if (io == NULL) {
2102 		printf("Failed -- popen fails\n");
2103 		return;
2104 	}
2105 	/* Setup what we expect */
2106 	len = sprintf(resp, "%s", pmc);
2107 	if (fgets(line, sizeof(line), io) == NULL) {
2108 		printf("Failed -- no output from pmstat\n");
2109 		goto out;
2110 	}
2111 	llen = strlen(line);
2112 	if (line[(llen-1)] == '\n') {
2113 		line[(llen-1)] = 0;
2114 		llen--;
2115 	}
2116 	for(i=2; i<(llen-len); i++) {
2117 		if (strncmp(&line[i], "ERROR", 5) == 0) {
2118 			printf("Failed %s\n", line);
2119 			goto out;
2120 		} else if (strncmp(&line[i], resp, len) == 0) {
2121 			int j, k;
2122 
2123 			if (fgets(line, sizeof(line), io) == NULL) {
2124 				printf("Failed -- no second output from pmstat\n");
2125 				goto out;
2126 			}
2127 			len = strlen(line);
2128 			for (j=0; j<len; j++) {
2129 				if (line[j] == ' ') {
2130 					j++;
2131 				} else {
2132 					break;
2133 				}
2134 			}
2135 			printf("Pass");
2136 			len = strlen(&line[j]);
2137 			if (len < 20) {
2138 				for(k=0; k<(20-len); k++) {
2139 					printf(" ");
2140 				}
2141 			}
2142 			if (len) {
2143 				printf("%s", &line[j]);
2144 			} else {
2145 				printf("\n");
2146 			}
2147 			goto out;
2148 		}
2149 	}
2150 	printf("Failed -- '%s' not '%s'\n", line, resp);
2151 out:
2152 	my_pclose(io, pid_of_command);
2153 
2154 }
2155 
2156 static int
2157 add_it_to(char **vars, int cur_cnt, char *name)
2158 {
2159 	int i;
2160 	size_t len;
2161 	for(i=0; i<cur_cnt; i++) {
2162 		if (strcmp(vars[i], name) == 0) {
2163 			/* Already have */
2164 			return(0);
2165 		}
2166 	}
2167 	if (vars[cur_cnt] != NULL) {
2168 		printf("Cur_cnt:%d filled with %s??\n",
2169 		       cur_cnt, vars[cur_cnt]);
2170 		exit(-1);
2171 	}
2172 	/* Ok its new */
2173 	len = strlen(name) + 1;
2174 	vars[cur_cnt] = malloc(len);
2175 	if (vars[cur_cnt] == NULL) {
2176 		printf("No memory %s\n", __FUNCTION__);
2177 		exit(-1);
2178 	}
2179 	memset(vars[cur_cnt], 0, len);
2180 	strcpy(vars[cur_cnt], name);
2181 	return(1);
2182 }
2183 
2184 static char *
2185 build_command_for_exp(struct expression *exp)
2186 {
2187 	/*
2188 	 * Build the pmcstat command to handle
2189 	 * the passed in expression.
2190 	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2191 	 * where NNN and QQQ represent the PMC's in the expression
2192 	 * uniquely..
2193 	 */
2194 	char forming[1024];
2195 	int cnt_pmc, alloced_pmcs, i;
2196 	struct expression *at;
2197 	char **vars, *cmd;
2198 	size_t mal;
2199 
2200 	alloced_pmcs = cnt_pmc = 0;
2201 	/* first how many do we have */
2202 	at = exp;
2203 	while (at) {
2204 		if (at->type == TYPE_VALUE_PMC) {
2205 			cnt_pmc++;
2206 		}
2207 		at = at->next;
2208 	}
2209 	if (cnt_pmc == 0) {
2210 		printf("No PMC's in your expression -- nothing to do!!\n");
2211 		exit(0);
2212 	}
2213 	mal = cnt_pmc * sizeof(char *);
2214 	vars = malloc(mal);
2215 	if (vars == NULL) {
2216 		printf("No memory\n");
2217 		exit(-1);
2218 	}
2219 	memset(vars, 0, mal);
2220 	at = exp;
2221 	while (at) {
2222 		if (at->type == TYPE_VALUE_PMC) {
2223 			if(add_it_to(vars, alloced_pmcs, at->name)) {
2224 				alloced_pmcs++;
2225 			}
2226 		}
2227 		at = at->next;
2228 	}
2229 	/* Now we have a unique list in vars so create our command */
2230 	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2231 	for(i=0; i<alloced_pmcs; i++) {
2232 		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2233 	}
2234 	cmd = malloc((mal+2));
2235 	if (cmd == NULL) {
2236 		printf("%s out of mem\n", __FUNCTION__);
2237 		exit(-1);
2238 	}
2239 	memset(cmd, 0, (mal+2));
2240 	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2241 	at = exp;
2242 	for(i=0; i<alloced_pmcs; i++) {
2243 		sprintf(forming, " -s %s", vars[i]);
2244 		strcat(cmd, forming);
2245 		free(vars[i]);
2246 		vars[i] = NULL;
2247 	}
2248 	free(vars);
2249 	return(cmd);
2250 }
2251 
2252 static int
2253 user_expr(struct counters *cpu, int pos)
2254 {
2255 	int ret;
2256 	double res;
2257 	struct counters *var;
2258 	struct expression *at;
2259 
2260 	at = master_exp;
2261 	while (at) {
2262 		if (at->type == TYPE_VALUE_PMC) {
2263 			var = find_counter(cpu, at->name);
2264 			if (var == NULL) {
2265 				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2266 				exit(-1);
2267 			}
2268 			if (pos != -1) {
2269 				at->value = var->vals[pos] * 1.0;
2270 			} else {
2271 				at->value = var->sum * 1.0;
2272 			}
2273 		}
2274 		at = at->next;
2275 	}
2276 	res = run_expr(master_exp, 1, NULL);
2277 	ret = printf("%1.3f", res);
2278 	return(ret);
2279 }
2280 
2281 
2282 static void
2283 set_manual_exp(struct expression *exp)
2284 {
2285 	expression = user_expr;
2286 	command = build_command_for_exp(exp);
2287 	threshold = "User defined threshold";
2288 }
2289 
2290 static void
2291 run_tests(void)
2292 {
2293 	int i, lenout;
2294 	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2295 	printf("------------------------------------------------------------------------\n");
2296 	for(i=0; i<valid_pmc_cnt; i++) {
2297 		lenout = printf("%s", valid_pmcs[i]);
2298 		fflush(stdout);
2299 		test_for_a_pmc(valid_pmcs[i], lenout);
2300 	}
2301 }
2302 static void
2303 list_all(void)
2304 {
2305 	int i, cnt, j;
2306 	printf("PMC                                               Abbreviation\n");
2307 	printf("--------------------------------------------------------------\n");
2308 	for(i=0; i<valid_pmc_cnt; i++) {
2309 		cnt = printf("%s", valid_pmcs[i]);
2310 		for(j=cnt; j<52; j++) {
2311 			printf(" ");
2312 		}
2313 		printf("%%%d\n", i);
2314 	}
2315 }
2316 
2317 
2318 int
2319 main(int argc, char **argv)
2320 {
2321 	int i, j, cnt;
2322 	char *filename=NULL;
2323 	char *name=NULL;
2324 	int help_only = 0;
2325 	int test_mode = 0;
2326 
2327 	get_cpuid_set();
2328 	memset(glob_cpu, 0, sizeof(glob_cpu));
2329 	while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
2330 		switch (i) {
2331 		case 'L':
2332 			list_all();
2333 			return(0);
2334 		case 'H':
2335 			printf("**********************************\n");
2336 			explain_all();
2337 			printf("**********************************\n");
2338 			return(0);
2339 			break;
2340 		case 'T':
2341 			test_mode = 1;
2342 			break;
2343 		case 'E':
2344 			master_exp = parse_expression(optarg);
2345 			if (master_exp) {
2346 				set_manual_exp(master_exp);
2347 			}
2348 			break;
2349 		case 'e':
2350 			if (validate_expression(optarg)) {
2351 				printf("Unknown expression %s\n", optarg);
2352 				return(0);
2353 			}
2354 			name = optarg;
2355 			set_expression(optarg);
2356 			break;
2357 		case 'm':
2358 			max_to_collect = strtol(optarg, NULL, 0);
2359 			if (max_to_collect > MAX_COUNTER_SLOTS) {
2360 				/* You can't collect more than max in array */
2361 				max_to_collect = MAX_COUNTER_SLOTS;
2362 			}
2363 			break;
2364 		case 'v':
2365 			verbose++;
2366 			break;
2367 		case 'h':
2368 			help_only = 1;
2369 			break;
2370 		case 'i':
2371 			filename = optarg;
2372 			break;
2373 		case '?':
2374 		default:
2375 		use:
2376 			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2377 			       argv[0]);
2378 			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2379 			printf("-v -- verbose dump debug type things -- you don't want this\n");
2380 			printf("-m N -- maximum to collect is N measurments\n");
2381 			printf("-e expr-name -- Do expression expr-name\n");
2382 			printf("-E 'your expression' -- Do your expression\n");
2383 			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2384 			printf("-H -- Don't run anything, just explain all canned expressions\n");
2385 			printf("-T -- Test all PMC's defined by this processor\n");
2386 			return(0);
2387 			break;
2388 		};
2389 	}
2390 	if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
2391 		printf("Without setting an expression we cannot dynamically gather information\n");
2392 		printf("you must supply a filename (and you probably want verbosity)\n");
2393 		goto use;
2394 	}
2395 	if (test_mode) {
2396 		run_tests();
2397 		return(0);
2398 	}
2399 	printf("*********************************\n");
2400 	if (master_exp == NULL) {
2401 		(*the_cpu.explain)(name);
2402 	} else {
2403 		printf("Examine your expression ");
2404 		print_exp(master_exp);
2405 		printf("User defined threshold\n");
2406 	}
2407 	if (help_only) {
2408 		return(0);
2409 	}
2410 	process_file(filename);
2411 	if (verbose >= 2) {
2412 		for (i=0; i<ncnts; i++) {
2413 			printf("Counter:%s cpu:%d index:%d\n",
2414 			       cnts[i].counter_name,
2415 			       cnts[i].cpu, i);
2416 			for(j=0; j<cnts[i].pos; j++) {
2417 				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2418 			}
2419 			printf(" sum - %ld\n", (long int)cnts[i].sum);
2420 		}
2421 	}
2422 	if (expression == NULL) {
2423 		return(0);
2424 	}
2425 	for(i=0, cnt=0; i<MAX_CPU; i++) {
2426 		if (glob_cpu[i]) {
2427 			do_expression(glob_cpu[i], -1);
2428 			cnt++;
2429 			if (cnt == cpu_count_out) {
2430 				printf("\n");
2431 				break;
2432 			} else {
2433 				printf("\t");
2434 			}
2435 		}
2436 	}
2437 	return(0);
2438 }
2439