1 /*-
2 * Copyright (c) 2014-2015 Netflix, Inc.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer,
9 * in this position and unchanged.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <sys/errno.h>
34 #include <signal.h>
35 #include <sys/wait.h>
36 #include <getopt.h>
37 #include "eval_expr.h"
38 static int max_pmc_counters = 1;
39 static int run_all = 0;
40
41 #define MAX_COUNTER_SLOTS 1024
42 #define MAX_NLEN 64
43 #define MAX_CPU 64
44 static int verbose = 0;
45
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
49
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
54 int valid_pmc_cnt=0;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
57
58 /*
59 * The following two varients on popen and pclose with
60 * the cavet that they get you the PID so that you
61 * can supply it to pclose so it can send a SIGTERM
62 * to the process.
63 */
64 static FILE *
my_popen(const char * command,const char * dir,pid_t * p_pid)65 my_popen(const char *command, const char *dir, pid_t *p_pid)
66 {
67 FILE *io_out, *io_in;
68 int pdesin[2], pdesout[2];
69 char *argv[4];
70 pid_t pid;
71 char cmd[4];
72 char cmd2[1024];
73 char arg1[4];
74
75 if ((strcmp(dir, "r") != 0) &&
76 (strcmp(dir, "w") != 0)) {
77 errno = EINVAL;
78 return(NULL);
79 }
80 if (pipe(pdesin) < 0)
81 return (NULL);
82
83 if (pipe(pdesout) < 0) {
84 (void)close(pdesin[0]);
85 (void)close(pdesin[1]);
86 return (NULL);
87 }
88 strcpy(cmd, "sh");
89 strcpy(arg1, "-c");
90 strcpy(cmd2, command);
91 argv[0] = cmd;
92 argv[1] = arg1;
93 argv[2] = cmd2;
94 argv[3] = NULL;
95
96 switch (pid = fork()) {
97 case -1: /* Error. */
98 (void)close(pdesin[0]);
99 (void)close(pdesin[1]);
100 (void)close(pdesout[0]);
101 (void)close(pdesout[1]);
102 return (NULL);
103 /* NOTREACHED */
104 case 0: /* Child. */
105 /* Close out un-used sides */
106 (void)close(pdesin[1]);
107 (void)close(pdesout[0]);
108 /* Now prepare the stdin of the process */
109 close(0);
110 (void)dup(pdesin[0]);
111 (void)close(pdesin[0]);
112 /* Now prepare the stdout of the process */
113 close(1);
114 (void)dup(pdesout[1]);
115 /* And lets do stderr just in case */
116 close(2);
117 (void)dup(pdesout[1]);
118 (void)close(pdesout[1]);
119 /* Now run it */
120 execve("/bin/sh", argv, environ);
121 exit(127);
122 /* NOTREACHED */
123 }
124 /* Parent; assume fdopen can't fail. */
125 /* Store the pid */
126 *p_pid = pid;
127 if (strcmp(dir, "r") != 0) {
128 io_out = fdopen(pdesin[1], "w");
129 (void)close(pdesin[0]);
130 (void)close(pdesout[0]);
131 (void)close(pdesout[1]);
132 return(io_out);
133 } else {
134 /* Prepare the input stream */
135 io_in = fdopen(pdesout[0], "r");
136 (void)close(pdesout[1]);
137 (void)close(pdesin[0]);
138 (void)close(pdesin[1]);
139 return (io_in);
140 }
141 }
142
143 /*
144 * pclose --
145 * Pclose returns -1 if stream is not associated with a `popened' command,
146 * if already `pclosed', or waitpid returns an error.
147 */
148 static void
my_pclose(FILE * io,pid_t the_pid)149 my_pclose(FILE *io, pid_t the_pid)
150 {
151 int pstat;
152 pid_t pid;
153
154 /*
155 * Find the appropriate file pointer and remove it from the list.
156 */
157 (void)fclose(io);
158 /* Die if you are not dead! */
159 kill(the_pid, SIGTERM);
160 do {
161 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162 } while (pid == -1 && errno == EINTR);
163 }
164
165 struct counters {
166 struct counters *next_cpu;
167 char counter_name[MAX_NLEN]; /* Name of counter */
168 int cpu; /* CPU we are on */
169 int pos; /* Index we are filling to. */
170 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
171 uint64_t sum; /* Summary of entries */
172 };
173
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
176
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
179
180 extern int ncnts;
181 int ncnts=0;
182
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
185
186 static const char *threshold=NULL;
187 static const char *command;
188
189 struct cpu_entry {
190 const char *name;
191 const char *thresh;
192 const char *command;
193 int (*func)(struct counters *, int);
194 int counters_required;
195 };
196
197 struct cpu_type {
198 char cputype[32];
199 int number;
200 struct cpu_entry *ents;
201 void (*explain)(const char *name);
202 };
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
205
206 static void
explain_name_sb(const char * name)207 explain_name_sb(const char *name)
208 {
209 const char *mythresh;
210 if (strcmp(name, "allocstall1") == 0) {
211 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212 mythresh = "thresh > .05";
213 } else if (strcmp(name, "allocstall2") == 0) {
214 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215 mythresh = "thresh > .05";
216 } else if (strcmp(name, "br_miss") == 0) {
217 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218 mythresh = "thresh >= .2";
219 } else if (strcmp(name, "splitload") == 0) {
220 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221 mythresh = "thresh >= .1";
222 } else if (strcmp(name, "splitstore") == 0) {
223 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
224 mythresh = "thresh >= .01";
225 } else if (strcmp(name, "contested") == 0) {
226 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227 mythresh = "thresh >= .05";
228 } else if (strcmp(name, "blockstorefwd") == 0) {
229 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230 mythresh = "thresh >= .05";
231 } else if (strcmp(name, "cache2") == 0) {
232 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236 mythresh = "thresh >= .2";
237 } else if (strcmp(name, "cache1") == 0) {
238 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239 mythresh = "thresh >= .2";
240 } else if (strcmp(name, "dtlbmissload") == 0) {
241 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
243 mythresh = "thresh >= .1";
244 } else if (strcmp(name, "frontendstall") == 0) {
245 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246 mythresh = "thresh >= .15";
247 } else if (strcmp(name, "clears") == 0) {
248 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249 printf(" MACHINE_CLEARS.SMC + \n");
250 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251 mythresh = "thresh >= .02";
252 } else if (strcmp(name, "microassist") == 0) {
253 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255 mythresh = "thresh >= .05";
256 } else if (strcmp(name, "aliasing_4k") == 0) {
257 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258 mythresh = "thresh >= .1";
259 } else if (strcmp(name, "fpassist") == 0) {
260 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261 mythresh = "look for a excessive value";
262 } else if (strcmp(name, "otherassistavx") == 0) {
263 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264 mythresh = "look for a excessive value";
265 } else if (strcmp(name, "otherassistsse") == 0) {
266 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 mythresh = "look for a excessive value";
268 } else if (strcmp(name, "eff1") == 0) {
269 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270 mythresh = "thresh < .9";
271 } else if (strcmp(name, "eff2") == 0) {
272 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273 mythresh = "thresh > 1.0";
274 } else if (strcmp(name, "dtlbmissstore") == 0) {
275 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
277 mythresh = "thresh >= .05";
278 } else {
279 printf("Unknown name:%s\n", name);
280 mythresh = "unknown entry";
281 }
282 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283 }
284
285 static void
explain_name_ib(const char * name)286 explain_name_ib(const char *name)
287 {
288 const char *mythresh;
289 if (strcmp(name, "br_miss") == 0) {
290 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293 mythresh = "thresh >= .2";
294 } else if (strcmp(name, "eff1") == 0) {
295 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296 mythresh = "thresh < .9";
297 } else if (strcmp(name, "eff2") == 0) {
298 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299 mythresh = "thresh > 1.0";
300 } else if (strcmp(name, "cache1") == 0) {
301 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302 mythresh = "thresh >= .2";
303 } else if (strcmp(name, "cache2") == 0) {
304 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305 mythresh = "thresh >= .2";
306 } else if (strcmp(name, "itlbmiss") == 0) {
307 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308 mythresh = "thresh > .05";
309 } else if (strcmp(name, "icachemiss") == 0) {
310 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311 mythresh = "thresh > .05";
312 } else if (strcmp(name, "lcpstall") == 0) {
313 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314 mythresh = "thresh > .05";
315 } else if (strcmp(name, "datashare") == 0) {
316 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317 mythresh = "thresh > .05";
318 } else if (strcmp(name, "blockstorefwd") == 0) {
319 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320 mythresh = "thresh >= .05";
321 } else if (strcmp(name, "splitload") == 0) {
322 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324 mythresh = "thresh >= .1";
325 } else if (strcmp(name, "splitstore") == 0) {
326 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
327 mythresh = "thresh >= .01";
328 } else if (strcmp(name, "aliasing_4k") == 0) {
329 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330 mythresh = "thresh >= .1";
331 } else if (strcmp(name, "dtlbmissload") == 0) {
332 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
334 mythresh = "thresh >= .1";
335 } else if (strcmp(name, "dtlbmissstore") == 0) {
336 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
338 mythresh = "thresh >= .05";
339 } else if (strcmp(name, "contested") == 0) {
340 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341 mythresh = "thresh >= .05";
342 } else if (strcmp(name, "clears") == 0) {
343 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344 printf(" MACHINE_CLEARS.SMC + \n");
345 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346 mythresh = "thresh >= .02";
347 } else if (strcmp(name, "microassist") == 0) {
348 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350 mythresh = "thresh >= .05";
351 } else if (strcmp(name, "fpassist") == 0) {
352 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353 mythresh = "look for a excessive value";
354 } else if (strcmp(name, "otherassistavx") == 0) {
355 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356 mythresh = "look for a excessive value";
357 } else if (strcmp(name, "otherassistsse") == 0) {
358 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 mythresh = "look for a excessive value";
360 } else {
361 printf("Unknown name:%s\n", name);
362 mythresh = "unknown entry";
363 }
364 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365 }
366
367
368 static void
explain_name_has(const char * name)369 explain_name_has(const char *name)
370 {
371 const char *mythresh;
372 if (strcmp(name, "eff1") == 0) {
373 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374 mythresh = "thresh < .75";
375 } else if (strcmp(name, "eff2") == 0) {
376 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377 mythresh = "thresh > 1.0";
378 } else if (strcmp(name, "itlbmiss") == 0) {
379 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380 mythresh = "thresh > .05";
381 } else if (strcmp(name, "icachemiss") == 0) {
382 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383 mythresh = "thresh > .05";
384 } else if (strcmp(name, "lcpstall") == 0) {
385 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386 mythresh = "thresh > .05";
387 } else if (strcmp(name, "cache1") == 0) {
388 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389 mythresh = "thresh >= .2";
390 } else if (strcmp(name, "cache2") == 0) {
391 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394 printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
395 mythresh = "thresh >= .2";
396 } else if (strcmp(name, "contested") == 0) {
397 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398 mythresh = "thresh >= .05";
399 } else if (strcmp(name, "datashare") == 0) {
400 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401 mythresh = "thresh > .05";
402 } else if (strcmp(name, "blockstorefwd") == 0) {
403 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404 mythresh = "thresh >= .05";
405 } else if (strcmp(name, "splitload") == 0) {
406 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407 mythresh = "thresh >= .1";
408 } else if (strcmp(name, "splitstore") == 0) {
409 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
410 mythresh = "thresh >= .01";
411 } else if (strcmp(name, "aliasing_4k") == 0) {
412 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413 mythresh = "thresh >= .1";
414 } else if (strcmp(name, "dtlbmissload") == 0) {
415 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
417 mythresh = "thresh >= .1";
418 } else if (strcmp(name, "br_miss") == 0) {
419 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420 mythresh = "thresh >= .2";
421 } else if (strcmp(name, "clears") == 0) {
422 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423 printf(" MACHINE_CLEARS.SMC + \n");
424 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425 mythresh = "thresh >= .02";
426 } else if (strcmp(name, "microassist") == 0) {
427 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429 mythresh = "thresh >= .05";
430 } else if (strcmp(name, "fpassist") == 0) {
431 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432 mythresh = "look for a excessive value";
433 } else if (strcmp(name, "otherassistavx") == 0) {
434 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435 mythresh = "look for a excessive value";
436 } else if (strcmp(name, "otherassistsse") == 0) {
437 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 mythresh = "look for a excessive value";
439 } else {
440 printf("Unknown name:%s\n", name);
441 mythresh = "unknown entry";
442 }
443 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444 }
445
446
447
448 static struct counters *
find_counter(struct counters * base,const char * name)449 find_counter(struct counters *base, const char *name)
450 {
451 struct counters *at;
452 int len;
453
454 at = base;
455 len = strlen(name);
456 while(at) {
457 if (strncmp(at->counter_name, name, len) == 0) {
458 return(at);
459 }
460 at = at->next_cpu;
461 }
462 printf("Can't find counter %s\n", name);
463 printf("We have:\n");
464 at = base;
465 while(at) {
466 printf("- %s\n", at->counter_name);
467 at = at->next_cpu;
468 }
469 exit(-1);
470 }
471
472 static int
allocstall1(struct counters * cpu,int pos)473 allocstall1(struct counters *cpu, int pos)
474 {
475 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
476 int ret;
477 struct counters *partial;
478 struct counters *unhalt;
479 double un, par, res;
480 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
481 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
482 if (pos != -1) {
483 par = partial->vals[pos] * 1.0;
484 un = unhalt->vals[pos] * 1.0;
485 } else {
486 par = partial->sum * 1.0;
487 un = unhalt->sum * 1.0;
488 }
489 res = par/un;
490 ret = printf("%1.3f", res);
491 return(ret);
492 }
493
494 static int
allocstall2(struct counters * cpu,int pos)495 allocstall2(struct counters *cpu, int pos)
496 {
497 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
498 int ret;
499 struct counters *partial;
500 struct counters *unhalt;
501 double un, par, res;
502 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
503 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
504 if (pos != -1) {
505 par = partial->vals[pos] * 1.0;
506 un = unhalt->vals[pos] * 1.0;
507 } else {
508 par = partial->sum * 1.0;
509 un = unhalt->sum * 1.0;
510 }
511 res = par/un;
512 ret = printf("%1.3f", res);
513 return(ret);
514 }
515
516 static int
br_mispredict(struct counters * cpu,int pos)517 br_mispredict(struct counters *cpu, int pos)
518 {
519 struct counters *brctr;
520 struct counters *unhalt;
521 int ret;
522 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
523 double br, un, con, res;
524 con = 20.0;
525
526 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
527 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
528 if (pos != -1) {
529 br = brctr->vals[pos] * 1.0;
530 un = unhalt->vals[pos] * 1.0;
531 } else {
532 br = brctr->sum * 1.0;
533 un = unhalt->sum * 1.0;
534 }
535 res = (con * br)/un;
536 ret = printf("%1.3f", res);
537 return(ret);
538 }
539
540 static int
br_mispredictib(struct counters * cpu,int pos)541 br_mispredictib(struct counters *cpu, int pos)
542 {
543 struct counters *brctr;
544 struct counters *unhalt;
545 struct counters *clear, *clear2, *clear3;
546 struct counters *uops;
547 struct counters *recv;
548 struct counters *iss;
549 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
550 int ret;
551 /*
552 * (BR_MISP_RETIRED.ALL_BRANCHES /
553 * (BR_MISP_RETIRED.ALL_BRANCHES +
554 * MACHINE_CLEAR.COUNT) *
555 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
556 *
557 */
558 double br, cl, cl2, cl3, uo, re, un, con, res, is;
559 con = 4.0;
560
561 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
562 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
563 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
564 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
565 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
566 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
567 iss = find_counter(cpu, "UOPS_ISSUED.ANY");
568 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
569 if (pos != -1) {
570 br = brctr->vals[pos] * 1.0;
571 cl = clear->vals[pos] * 1.0;
572 cl2 = clear2->vals[pos] * 1.0;
573 cl3 = clear3->vals[pos] * 1.0;
574 uo = uops->vals[pos] * 1.0;
575 re = recv->vals[pos] * 1.0;
576 is = iss->vals[pos] * 1.0;
577 un = unhalt->vals[pos] * 1.0;
578 } else {
579 br = brctr->sum * 1.0;
580 cl = clear->sum * 1.0;
581 cl2 = clear2->sum * 1.0;
582 cl3 = clear3->sum * 1.0;
583 uo = uops->sum * 1.0;
584 re = recv->sum * 1.0;
585 is = iss->sum * 1.0;
586 un = unhalt->sum * 1.0;
587 }
588 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
589 ret = printf("%1.3f", res);
590 return(ret);
591 }
592
593
594 static int
br_mispredict_broad(struct counters * cpu,int pos)595 br_mispredict_broad(struct counters *cpu, int pos)
596 {
597 struct counters *brctr;
598 struct counters *unhalt;
599 struct counters *clear;
600 struct counters *uops;
601 struct counters *uops_ret;
602 struct counters *recv;
603 int ret;
604 double br, cl, uo, uo_r, re, con, un, res;
605
606 con = 4.0;
607
608 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
609 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
610 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
611 uops = find_counter(cpu, "UOPS_ISSUED.ANY");
612 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
613 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
614
615 if (pos != -1) {
616 un = unhalt->vals[pos] * 1.0;
617 br = brctr->vals[pos] * 1.0;
618 cl = clear->vals[pos] * 1.0;
619 uo = uops->vals[pos] * 1.0;
620 uo_r = uops_ret->vals[pos] * 1.0;
621 re = recv->vals[pos] * 1.0;
622 } else {
623 un = unhalt->sum * 1.0;
624 br = brctr->sum * 1.0;
625 cl = clear->sum * 1.0;
626 uo = uops->sum * 1.0;
627 uo_r = uops_ret->sum * 1.0;
628 re = recv->sum * 1.0;
629 }
630 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
631 ret = printf("%1.3f", res);
632 return(ret);
633 }
634
635 static int
splitloadib(struct counters * cpu,int pos)636 splitloadib(struct counters *cpu, int pos)
637 {
638 int ret;
639 struct counters *mem;
640 struct counters *l1d, *ldblock;
641 struct counters *unhalt;
642 double un, memd, res, l1, ldb;
643 /*
644 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
645 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
646 */
647
648 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
649 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
650 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
651 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
652 if (pos != -1) {
653 memd = mem->vals[pos] * 1.0;
654 l1 = l1d->vals[pos] * 1.0;
655 ldb = ldblock->vals[pos] * 1.0;
656 un = unhalt->vals[pos] * 1.0;
657 } else {
658 memd = mem->sum * 1.0;
659 l1 = l1d->sum * 1.0;
660 ldb = ldblock->sum * 1.0;
661 un = unhalt->sum * 1.0;
662 }
663 res = ((l1 / memd) * ldb)/un;
664 ret = printf("%1.3f", res);
665 return(ret);
666 }
667
668
669 static int
splitload(struct counters * cpu,int pos)670 splitload(struct counters *cpu, int pos)
671 {
672 int ret;
673 struct counters *mem;
674 struct counters *unhalt;
675 double con, un, memd, res;
676 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
677
678 con = 5.0;
679 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
680 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
681 if (pos != -1) {
682 memd = mem->vals[pos] * 1.0;
683 un = unhalt->vals[pos] * 1.0;
684 } else {
685 memd = mem->sum * 1.0;
686 un = unhalt->sum * 1.0;
687 }
688 res = (memd * con)/un;
689 ret = printf("%1.3f", res);
690 return(ret);
691 }
692
693
694 static int
splitload_sb(struct counters * cpu,int pos)695 splitload_sb(struct counters *cpu, int pos)
696 {
697 int ret;
698 struct counters *mem;
699 struct counters *unhalt;
700 double con, un, memd, res;
701 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
702
703 con = 5.0;
704 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
705 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
706 if (pos != -1) {
707 memd = mem->vals[pos] * 1.0;
708 un = unhalt->vals[pos] * 1.0;
709 } else {
710 memd = mem->sum * 1.0;
711 un = unhalt->sum * 1.0;
712 }
713 res = (memd * con)/un;
714 ret = printf("%1.3f", res);
715 return(ret);
716 }
717
718
719 static int
splitstore_sb(struct counters * cpu,int pos)720 splitstore_sb(struct counters *cpu, int pos)
721 {
722 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
723 int ret;
724 struct counters *mem_split;
725 struct counters *mem_stores;
726 double memsplit, memstore, res;
727 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
728 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
729 if (pos != -1) {
730 memsplit = mem_split->vals[pos] * 1.0;
731 memstore = mem_stores->vals[pos] * 1.0;
732 } else {
733 memsplit = mem_split->sum * 1.0;
734 memstore = mem_stores->sum * 1.0;
735 }
736 res = memsplit/memstore;
737 ret = printf("%1.3f", res);
738 return(ret);
739 }
740
741
742
743 static int
splitstore(struct counters * cpu,int pos)744 splitstore(struct counters *cpu, int pos)
745 {
746 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
747 int ret;
748 struct counters *mem_split;
749 struct counters *mem_stores;
750 double memsplit, memstore, res;
751 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
752 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
753 if (pos != -1) {
754 memsplit = mem_split->vals[pos] * 1.0;
755 memstore = mem_stores->vals[pos] * 1.0;
756 } else {
757 memsplit = mem_split->sum * 1.0;
758 memstore = mem_stores->sum * 1.0;
759 }
760 res = memsplit/memstore;
761 ret = printf("%1.3f", res);
762 return(ret);
763 }
764
765
766 static int
contested(struct counters * cpu,int pos)767 contested(struct counters *cpu, int pos)
768 {
769 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
770 int ret;
771 struct counters *mem;
772 struct counters *unhalt;
773 double con, un, memd, res;
774
775 con = 60.0;
776 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
777 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
778 if (pos != -1) {
779 memd = mem->vals[pos] * 1.0;
780 un = unhalt->vals[pos] * 1.0;
781 } else {
782 memd = mem->sum * 1.0;
783 un = unhalt->sum * 1.0;
784 }
785 res = (memd * con)/un;
786 ret = printf("%1.3f", res);
787 return(ret);
788 }
789
790 static int
contested_has(struct counters * cpu,int pos)791 contested_has(struct counters *cpu, int pos)
792 {
793 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
794 int ret;
795 struct counters *mem;
796 struct counters *unhalt;
797 double con, un, memd, res;
798
799 con = 84.0;
800 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
801 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
802 if (pos != -1) {
803 memd = mem->vals[pos] * 1.0;
804 un = unhalt->vals[pos] * 1.0;
805 } else {
806 memd = mem->sum * 1.0;
807 un = unhalt->sum * 1.0;
808 }
809 res = (memd * con)/un;
810 ret = printf("%1.3f", res);
811 return(ret);
812 }
813
814 static int
contestedbroad(struct counters * cpu,int pos)815 contestedbroad(struct counters *cpu, int pos)
816 {
817 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
818 int ret;
819 struct counters *mem;
820 struct counters *mem2;
821 struct counters *unhalt;
822 double con, un, memd, memtoo, res;
823
824 con = 84.0;
825 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
826 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
827 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
828
829 if (pos != -1) {
830 memd = mem->vals[pos] * 1.0;
831 memtoo = mem2->vals[pos] * 1.0;
832 un = unhalt->vals[pos] * 1.0;
833 } else {
834 memd = mem->sum * 1.0;
835 memtoo = mem2->sum * 1.0;
836 un = unhalt->sum * 1.0;
837 }
838 res = ((memd * con) + memtoo)/un;
839 ret = printf("%1.3f", res);
840 return(ret);
841 }
842
843
844 static int
blockstoreforward(struct counters * cpu,int pos)845 blockstoreforward(struct counters *cpu, int pos)
846 {
847 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
848 int ret;
849 struct counters *ldb;
850 struct counters *unhalt;
851 double con, un, ld, res;
852
853 con = 13.0;
854 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
855 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
856 if (pos != -1) {
857 ld = ldb->vals[pos] * 1.0;
858 un = unhalt->vals[pos] * 1.0;
859 } else {
860 ld = ldb->sum * 1.0;
861 un = unhalt->sum * 1.0;
862 }
863 res = (ld * con)/un;
864 ret = printf("%1.3f", res);
865 return(ret);
866 }
867
868 static int
cache2(struct counters * cpu,int pos)869 cache2(struct counters *cpu, int pos)
870 {
871 /* ** Suspect ***
872 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
873 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
874 */
875 int ret;
876 struct counters *mem1, *mem2, *mem3;
877 struct counters *unhalt;
878 double con1, con2, con3, un, me_1, me_2, me_3, res;
879
880 con1 = 26.0;
881 con2 = 43.0;
882 con3 = 60.0;
883 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
884 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
885 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
886 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
887 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
888 if (pos != -1) {
889 me_1 = mem1->vals[pos] * 1.0;
890 me_2 = mem2->vals[pos] * 1.0;
891 me_3 = mem3->vals[pos] * 1.0;
892 un = unhalt->vals[pos] * 1.0;
893 } else {
894 me_1 = mem1->sum * 1.0;
895 me_2 = mem2->sum * 1.0;
896 me_3 = mem3->sum * 1.0;
897 un = unhalt->sum * 1.0;
898 }
899 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
900 ret = printf("%1.3f", res);
901 return(ret);
902 }
903
904 static int
datasharing(struct counters * cpu,int pos)905 datasharing(struct counters *cpu, int pos)
906 {
907 /*
908 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
909 */
910 int ret;
911 struct counters *mem;
912 struct counters *unhalt;
913 double con, res, me, un;
914
915 con = 43.0;
916 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
917 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
918 if (pos != -1) {
919 me = mem->vals[pos] * 1.0;
920 un = unhalt->vals[pos] * 1.0;
921 } else {
922 me = mem->sum * 1.0;
923 un = unhalt->sum * 1.0;
924 }
925 res = (me * con)/un;
926 ret = printf("%1.3f", res);
927 return(ret);
928
929 }
930
931
932 static int
datasharing_has(struct counters * cpu,int pos)933 datasharing_has(struct counters *cpu, int pos)
934 {
935 /*
936 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
937 */
938 int ret;
939 struct counters *mem;
940 struct counters *unhalt;
941 double con, res, me, un;
942
943 con = 72.0;
944 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
945 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
946 if (pos != -1) {
947 me = mem->vals[pos] * 1.0;
948 un = unhalt->vals[pos] * 1.0;
949 } else {
950 me = mem->sum * 1.0;
951 un = unhalt->sum * 1.0;
952 }
953 res = (me * con)/un;
954 ret = printf("%1.3f", res);
955 return(ret);
956
957 }
958
959
960 static int
cache2ib(struct counters * cpu,int pos)961 cache2ib(struct counters *cpu, int pos)
962 {
963 /*
964 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
965 */
966 int ret;
967 struct counters *mem;
968 struct counters *unhalt;
969 double con, un, me, res;
970
971 con = 29.0;
972 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
973 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
974 if (pos != -1) {
975 me = mem->vals[pos] * 1.0;
976 un = unhalt->vals[pos] * 1.0;
977 } else {
978 me = mem->sum * 1.0;
979 un = unhalt->sum * 1.0;
980 }
981 res = (con * me)/un;
982 ret = printf("%1.3f", res);
983 return(ret);
984 }
985
986 static int
cache2has(struct counters * cpu,int pos)987 cache2has(struct counters *cpu, int pos)
988 {
989 /*
990 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
991 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
992 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
993 * / CPU_CLK_UNHALTED.THREAD_P
994 */
995 int ret;
996 struct counters *mem1, *mem2, *mem3;
997 struct counters *unhalt;
998 double con1, con2, con3, un, me1, me2, me3, res;
999
1000 con1 = 36.0;
1001 con2 = 72.0;
1002 con3 = 84.0;
1003 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1004 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1005 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1006 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1007 if (pos != -1) {
1008 me1 = mem1->vals[pos] * 1.0;
1009 me2 = mem2->vals[pos] * 1.0;
1010 me3 = mem3->vals[pos] * 1.0;
1011 un = unhalt->vals[pos] * 1.0;
1012 } else {
1013 me1 = mem1->sum * 1.0;
1014 me2 = mem2->sum * 1.0;
1015 me3 = mem3->sum * 1.0;
1016 un = unhalt->sum * 1.0;
1017 }
1018 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1019 ret = printf("%1.3f", res);
1020 return(ret);
1021 }
1022
1023
1024 static int
cache2broad(struct counters * cpu,int pos)1025 cache2broad(struct counters *cpu, int pos)
1026 {
1027 /*
1028 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1029 */
1030 int ret;
1031 struct counters *mem;
1032 struct counters *unhalt;
1033 double con, un, me, res;
1034
1035 con = 36.0;
1036 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1037 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1038 if (pos != -1) {
1039 me = mem->vals[pos] * 1.0;
1040 un = unhalt->vals[pos] * 1.0;
1041 } else {
1042 me = mem->sum * 1.0;
1043 un = unhalt->sum * 1.0;
1044 }
1045 res = (con * me)/un;
1046 ret = printf("%1.3f", res);
1047 return(ret);
1048 }
1049
1050
1051 static int
cache1(struct counters * cpu,int pos)1052 cache1(struct counters *cpu, int pos)
1053 {
1054 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1055 int ret;
1056 struct counters *mem;
1057 struct counters *unhalt;
1058 double con, un, me, res;
1059
1060 con = 180.0;
1061 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1062 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1063 if (pos != -1) {
1064 me = mem->vals[pos] * 1.0;
1065 un = unhalt->vals[pos] * 1.0;
1066 } else {
1067 me = mem->sum * 1.0;
1068 un = unhalt->sum * 1.0;
1069 }
1070 res = (me * con)/un;
1071 ret = printf("%1.3f", res);
1072 return(ret);
1073 }
1074
1075 static int
cache1ib(struct counters * cpu,int pos)1076 cache1ib(struct counters *cpu, int pos)
1077 {
1078 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1079 int ret;
1080 struct counters *mem;
1081 struct counters *unhalt;
1082 double con, un, me, res;
1083
1084 con = 180.0;
1085 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1086 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1087 if (pos != -1) {
1088 me = mem->vals[pos] * 1.0;
1089 un = unhalt->vals[pos] * 1.0;
1090 } else {
1091 me = mem->sum * 1.0;
1092 un = unhalt->sum * 1.0;
1093 }
1094 res = (me * con)/un;
1095 ret = printf("%1.3f", res);
1096 return(ret);
1097 }
1098
1099
1100 static int
cache1broad(struct counters * cpu,int pos)1101 cache1broad(struct counters *cpu, int pos)
1102 {
1103 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1104 int ret;
1105 struct counters *mem;
1106 struct counters *unhalt;
1107 double con, un, me, res;
1108
1109 con = 180.0;
1110 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1111 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1112 if (pos != -1) {
1113 me = mem->vals[pos] * 1.0;
1114 un = unhalt->vals[pos] * 1.0;
1115 } else {
1116 me = mem->sum * 1.0;
1117 un = unhalt->sum * 1.0;
1118 }
1119 res = (me * con)/un;
1120 ret = printf("%1.3f", res);
1121 return(ret);
1122 }
1123
1124
1125 static int
dtlb_missload(struct counters * cpu,int pos)1126 dtlb_missload(struct counters *cpu, int pos)
1127 {
1128 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1129 int ret;
1130 struct counters *dtlb_m, *dtlb_d;
1131 struct counters *unhalt;
1132 double con, un, d1, d2, res;
1133
1134 con = 7.0;
1135 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1136 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1137 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1138 if (pos != -1) {
1139 d1 = dtlb_m->vals[pos] * 1.0;
1140 d2 = dtlb_d->vals[pos] * 1.0;
1141 un = unhalt->vals[pos] * 1.0;
1142 } else {
1143 d1 = dtlb_m->sum * 1.0;
1144 d2 = dtlb_d->sum * 1.0;
1145 un = unhalt->sum * 1.0;
1146 }
1147 res = ((d1 * con) + d2)/un;
1148 ret = printf("%1.3f", res);
1149 return(ret);
1150 }
1151
1152 static int
dtlb_missstore(struct counters * cpu,int pos)1153 dtlb_missstore(struct counters *cpu, int pos)
1154 {
1155 /*
1156 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1157 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1158 */
1159 int ret;
1160 struct counters *dtsb_m, *dtsb_d;
1161 struct counters *unhalt;
1162 double con, un, d1, d2, res;
1163
1164 con = 7.0;
1165 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1166 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1167 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1168 if (pos != -1) {
1169 d1 = dtsb_m->vals[pos] * 1.0;
1170 d2 = dtsb_d->vals[pos] * 1.0;
1171 un = unhalt->vals[pos] * 1.0;
1172 } else {
1173 d1 = dtsb_m->sum * 1.0;
1174 d2 = dtsb_d->sum * 1.0;
1175 un = unhalt->sum * 1.0;
1176 }
1177 res = ((d1 * con) + d2)/un;
1178 ret = printf("%1.3f", res);
1179 return(ret);
1180 }
1181
1182 static int
itlb_miss(struct counters * cpu,int pos)1183 itlb_miss(struct counters *cpu, int pos)
1184 {
1185 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
1186 int ret;
1187 struct counters *itlb;
1188 struct counters *unhalt;
1189 double un, d1, res;
1190
1191 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1192 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1193 if (pos != -1) {
1194 d1 = itlb->vals[pos] * 1.0;
1195 un = unhalt->vals[pos] * 1.0;
1196 } else {
1197 d1 = itlb->sum * 1.0;
1198 un = unhalt->sum * 1.0;
1199 }
1200 res = d1/un;
1201 ret = printf("%1.3f", res);
1202 return(ret);
1203 }
1204
1205
1206 static int
itlb_miss_broad(struct counters * cpu,int pos)1207 itlb_miss_broad(struct counters *cpu, int pos)
1208 {
1209 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */
1210 int ret;
1211 struct counters *itlb;
1212 struct counters *unhalt;
1213 struct counters *four_k;
1214 double un, d1, res, k;
1215
1216 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1217 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1218 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1219 if (pos != -1) {
1220 d1 = itlb->vals[pos] * 1.0;
1221 un = unhalt->vals[pos] * 1.0;
1222 k = four_k->vals[pos] * 1.0;
1223 } else {
1224 d1 = itlb->sum * 1.0;
1225 un = unhalt->sum * 1.0;
1226 k = four_k->sum * 1.0;
1227 }
1228 res = (7.0 * k + d1)/un;
1229 ret = printf("%1.3f", res);
1230 return(ret);
1231 }
1232
1233
1234 static int
icache_miss(struct counters * cpu,int pos)1235 icache_miss(struct counters *cpu, int pos)
1236 {
1237 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1238
1239 int ret;
1240 struct counters *itlb, *icache;
1241 struct counters *unhalt;
1242 double un, d1, ic, res;
1243
1244 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1245 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1246 icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1247 if (pos != -1) {
1248 d1 = itlb->vals[pos] * 1.0;
1249 ic = icache->vals[pos] * 1.0;
1250 un = unhalt->vals[pos] * 1.0;
1251 } else {
1252 d1 = itlb->sum * 1.0;
1253 ic = icache->sum * 1.0;
1254 un = unhalt->sum * 1.0;
1255 }
1256 res = (ic-d1)/un;
1257 ret = printf("%1.3f", res);
1258 return(ret);
1259
1260 }
1261
1262 static int
icache_miss_has(struct counters * cpu,int pos)1263 icache_miss_has(struct counters *cpu, int pos)
1264 {
1265 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1266
1267 int ret;
1268 struct counters *icache;
1269 struct counters *unhalt;
1270 double un, con, ic, res;
1271
1272 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273 icache = find_counter(cpu, "ICACHE.MISSES");
1274 con = 36.0;
1275 if (pos != -1) {
1276 ic = icache->vals[pos] * 1.0;
1277 un = unhalt->vals[pos] * 1.0;
1278 } else {
1279 ic = icache->sum * 1.0;
1280 un = unhalt->sum * 1.0;
1281 }
1282 res = (con * ic)/un;
1283 ret = printf("%1.3f", res);
1284 return(ret);
1285
1286 }
1287
1288 static int
lcp_stall(struct counters * cpu,int pos)1289 lcp_stall(struct counters *cpu, int pos)
1290 {
1291 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1292 int ret;
1293 struct counters *ild;
1294 struct counters *unhalt;
1295 double un, d1, res;
1296
1297 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298 ild = find_counter(cpu, "ILD_STALL.LCP");
1299 if (pos != -1) {
1300 d1 = ild->vals[pos] * 1.0;
1301 un = unhalt->vals[pos] * 1.0;
1302 } else {
1303 d1 = ild->sum * 1.0;
1304 un = unhalt->sum * 1.0;
1305 }
1306 res = d1/un;
1307 ret = printf("%1.3f", res);
1308 return(ret);
1309
1310 }
1311
1312
1313 static int
frontendstall(struct counters * cpu,int pos)1314 frontendstall(struct counters *cpu, int pos)
1315 {
1316 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1317 int ret;
1318 struct counters *idq;
1319 struct counters *unhalt;
1320 double con, un, id, res;
1321
1322 con = 4.0;
1323 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1324 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1325 if (pos != -1) {
1326 id = idq->vals[pos] * 1.0;
1327 un = unhalt->vals[pos] * 1.0;
1328 } else {
1329 id = idq->sum * 1.0;
1330 un = unhalt->sum * 1.0;
1331 }
1332 res = id/(un * con);
1333 ret = printf("%1.3f", res);
1334 return(ret);
1335 }
1336
1337 static int
clears(struct counters * cpu,int pos)1338 clears(struct counters *cpu, int pos)
1339 {
1340 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1341 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
1342
1343 int ret;
1344 struct counters *clr1, *clr2, *clr3;
1345 struct counters *unhalt;
1346 double con, un, cl1, cl2, cl3, res;
1347
1348 con = 100.0;
1349 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1350 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1351 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1352 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1353
1354 if (pos != -1) {
1355 cl1 = clr1->vals[pos] * 1.0;
1356 cl2 = clr2->vals[pos] * 1.0;
1357 cl3 = clr3->vals[pos] * 1.0;
1358 un = unhalt->vals[pos] * 1.0;
1359 } else {
1360 cl1 = clr1->sum * 1.0;
1361 cl2 = clr2->sum * 1.0;
1362 cl3 = clr3->sum * 1.0;
1363 un = unhalt->sum * 1.0;
1364 }
1365 res = ((cl1 + cl2 + cl3) * con)/un;
1366 ret = printf("%1.3f", res);
1367 return(ret);
1368 }
1369
1370
1371
1372 static int
clears_broad(struct counters * cpu,int pos)1373 clears_broad(struct counters *cpu, int pos)
1374 {
1375 int ret;
1376 struct counters *clr1, *clr2, *clr3, *cyc;
1377 struct counters *unhalt;
1378 double con, un, cl1, cl2, cl3, cy, res;
1379
1380 con = 100.0;
1381 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1382 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1383 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1384 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1385 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1386 if (pos != -1) {
1387 cl1 = clr1->vals[pos] * 1.0;
1388 cl2 = clr2->vals[pos] * 1.0;
1389 cl3 = clr3->vals[pos] * 1.0;
1390 cy = cyc->vals[pos] * 1.0;
1391 un = unhalt->vals[pos] * 1.0;
1392 } else {
1393 cl1 = clr1->sum * 1.0;
1394 cl2 = clr2->sum * 1.0;
1395 cl3 = clr3->sum * 1.0;
1396 cy = cyc->sum * 1.0;
1397 un = unhalt->sum * 1.0;
1398 }
1399 /* Formula not listed but extrapulated to add the cy ?? */
1400 res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1401 ret = printf("%1.3f", res);
1402 return(ret);
1403 }
1404
1405
1406
1407
1408
1409 static int
microassist(struct counters * cpu,int pos)1410 microassist(struct counters *cpu, int pos)
1411 {
1412 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1413 int ret;
1414 struct counters *idq;
1415 struct counters *unhalt;
1416 double un, id, res, con;
1417
1418 con = 4.0;
1419 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1420 idq = find_counter(cpu, "IDQ.MS_UOPS");
1421 if (pos != -1) {
1422 id = idq->vals[pos] * 1.0;
1423 un = unhalt->vals[pos] * 1.0;
1424 } else {
1425 id = idq->sum * 1.0;
1426 un = unhalt->sum * 1.0;
1427 }
1428 res = id/(un * con);
1429 ret = printf("%1.3f", res);
1430 return(ret);
1431 }
1432
1433
1434 static int
microassist_broad(struct counters * cpu,int pos)1435 microassist_broad(struct counters *cpu, int pos)
1436 {
1437 int ret;
1438 struct counters *idq;
1439 struct counters *unhalt;
1440 struct counters *uopiss;
1441 struct counters *uopret;
1442 double un, id, res, con, uoi, uor;
1443
1444 con = 4.0;
1445 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1446 idq = find_counter(cpu, "IDQ.MS_UOPS");
1447 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1448 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1449 if (pos != -1) {
1450 id = idq->vals[pos] * 1.0;
1451 un = unhalt->vals[pos] * 1.0;
1452 uoi = uopiss->vals[pos] * 1.0;
1453 uor = uopret->vals[pos] * 1.0;
1454 } else {
1455 id = idq->sum * 1.0;
1456 un = unhalt->sum * 1.0;
1457 uoi = uopiss->sum * 1.0;
1458 uor = uopret->sum * 1.0;
1459 }
1460 res = (uor/uoi) * (id/(un * con));
1461 ret = printf("%1.3f", res);
1462 return(ret);
1463 }
1464
1465
1466 static int
aliasing(struct counters * cpu,int pos)1467 aliasing(struct counters *cpu, int pos)
1468 {
1469 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1470 int ret;
1471 struct counters *ld;
1472 struct counters *unhalt;
1473 double un, lds, con, res;
1474
1475 con = 5.0;
1476 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1477 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1478 if (pos != -1) {
1479 lds = ld->vals[pos] * 1.0;
1480 un = unhalt->vals[pos] * 1.0;
1481 } else {
1482 lds = ld->sum * 1.0;
1483 un = unhalt->sum * 1.0;
1484 }
1485 res = (lds * con)/un;
1486 ret = printf("%1.3f", res);
1487 return(ret);
1488 }
1489
1490 static int
aliasing_broad(struct counters * cpu,int pos)1491 aliasing_broad(struct counters *cpu, int pos)
1492 {
1493 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1494 int ret;
1495 struct counters *ld;
1496 struct counters *unhalt;
1497 double un, lds, con, res;
1498
1499 con = 7.0;
1500 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1501 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1502 if (pos != -1) {
1503 lds = ld->vals[pos] * 1.0;
1504 un = unhalt->vals[pos] * 1.0;
1505 } else {
1506 lds = ld->sum * 1.0;
1507 un = unhalt->sum * 1.0;
1508 }
1509 res = (lds * con)/un;
1510 ret = printf("%1.3f", res);
1511 return(ret);
1512 }
1513
1514
1515 static int
fpassists(struct counters * cpu,int pos)1516 fpassists(struct counters *cpu, int pos)
1517 {
1518 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1519 int ret;
1520 struct counters *fp;
1521 struct counters *inst;
1522 double un, fpd, res;
1523
1524 inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1525 fp = find_counter(cpu, "FP_ASSIST.ANY");
1526 if (pos != -1) {
1527 fpd = fp->vals[pos] * 1.0;
1528 un = inst->vals[pos] * 1.0;
1529 } else {
1530 fpd = fp->sum * 1.0;
1531 un = inst->sum * 1.0;
1532 }
1533 res = fpd/un;
1534 ret = printf("%1.3f", res);
1535 return(ret);
1536 }
1537
1538 static int
otherassistavx(struct counters * cpu,int pos)1539 otherassistavx(struct counters *cpu, int pos)
1540 {
1541 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1542 int ret;
1543 struct counters *oth;
1544 struct counters *unhalt;
1545 double un, ot, con, res;
1546
1547 con = 75.0;
1548 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1549 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1550 if (pos != -1) {
1551 ot = oth->vals[pos] * 1.0;
1552 un = unhalt->vals[pos] * 1.0;
1553 } else {
1554 ot = oth->sum * 1.0;
1555 un = unhalt->sum * 1.0;
1556 }
1557 res = (ot * con)/un;
1558 ret = printf("%1.3f", res);
1559 return(ret);
1560 }
1561
1562 static int
otherassistsse(struct counters * cpu,int pos)1563 otherassistsse(struct counters *cpu, int pos)
1564 {
1565
1566 int ret;
1567 struct counters *oth;
1568 struct counters *unhalt;
1569 double un, ot, con, res;
1570
1571 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1572 con = 75.0;
1573 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1574 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1575 if (pos != -1) {
1576 ot = oth->vals[pos] * 1.0;
1577 un = unhalt->vals[pos] * 1.0;
1578 } else {
1579 ot = oth->sum * 1.0;
1580 un = unhalt->sum * 1.0;
1581 }
1582 res = (ot * con)/un;
1583 ret = printf("%1.3f", res);
1584 return(ret);
1585 }
1586
1587 static int
efficiency1(struct counters * cpu,int pos)1588 efficiency1(struct counters *cpu, int pos)
1589 {
1590
1591 int ret;
1592 struct counters *uops;
1593 struct counters *unhalt;
1594 double un, ot, con, res;
1595
1596 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1597 con = 4.0;
1598 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1599 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1600 if (pos != -1) {
1601 ot = uops->vals[pos] * 1.0;
1602 un = unhalt->vals[pos] * 1.0;
1603 } else {
1604 ot = uops->sum * 1.0;
1605 un = unhalt->sum * 1.0;
1606 }
1607 res = ot/(con * un);
1608 ret = printf("%1.3f", res);
1609 return(ret);
1610 }
1611
1612 static int
efficiency2(struct counters * cpu,int pos)1613 efficiency2(struct counters *cpu, int pos)
1614 {
1615
1616 int ret;
1617 struct counters *uops;
1618 struct counters *unhalt;
1619 double un, ot, res;
1620
1621 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1622 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1623 uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1624 if (pos != -1) {
1625 ot = uops->vals[pos] * 1.0;
1626 un = unhalt->vals[pos] * 1.0;
1627 } else {
1628 ot = uops->sum * 1.0;
1629 un = unhalt->sum * 1.0;
1630 }
1631 res = un/ot;
1632 ret = printf("%1.3f", res);
1633 return(ret);
1634 }
1635
1636 #define SANDY_BRIDGE_COUNT 20
1637 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1638 /*01*/ { "allocstall1", "thresh > .05",
1639 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1640 allocstall1, 2 },
1641 /* -- not defined for SB right (partial-rat_stalls) 02*/
1642 { "allocstall2", "thresh > .05",
1643 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1644 allocstall2, 2 },
1645 /*03*/ { "br_miss", "thresh >= .2",
1646 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1647 br_mispredict, 2 },
1648 /*04*/ { "splitload", "thresh >= .1",
1649 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1650 splitload_sb, 2 },
1651 /* 05*/ { "splitstore", "thresh >= .01",
1652 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1653 splitstore_sb, 2 },
1654 /*06*/ { "contested", "thresh >= .05",
1655 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1656 contested, 2 },
1657 /*07*/ { "blockstorefwd", "thresh >= .05",
1658 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659 blockstoreforward, 2 },
1660 /*08*/ { "cache2", "thresh >= .2",
1661 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662 cache2, 4 },
1663 /*09*/ { "cache1", "thresh >= .2",
1664 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665 cache1, 2 },
1666 /*10*/ { "dtlbmissload", "thresh >= .1",
1667 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668 dtlb_missload, 3 },
1669 /*11*/ { "dtlbmissstore", "thresh >= .05",
1670 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671 dtlb_missstore, 3 },
1672 /*12*/ { "frontendstall", "thresh >= .15",
1673 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674 frontendstall, 2 },
1675 /*13*/ { "clears", "thresh >= .02",
1676 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677 clears, 4 },
1678 /*14*/ { "microassist", "thresh >= .05",
1679 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680 microassist, 2 },
1681 /*15*/ { "aliasing_4k", "thresh >= .1",
1682 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683 aliasing, 2 },
1684 /*16*/ { "fpassist", "look for a excessive value",
1685 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1686 fpassists, 2 },
1687 /*17*/ { "otherassistavx", "look for a excessive value",
1688 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1689 otherassistavx, 2},
1690 /*18*/ { "otherassistsse", "look for a excessive value",
1691 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692 otherassistsse, 2 },
1693 /*19*/ { "eff1", "thresh < .9",
1694 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695 efficiency1, 2 },
1696 /*20*/ { "eff2", "thresh > 1.0",
1697 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698 efficiency2, 2 },
1699 };
1700
1701
1702 #define IVY_BRIDGE_COUNT 21
1703 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1704 /*1*/ { "eff1", "thresh < .75",
1705 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1706 efficiency1, 2 },
1707 /*2*/ { "eff2", "thresh > 1.0",
1708 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709 efficiency2, 2 },
1710 /*3*/ { "itlbmiss", "thresh > .05",
1711 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712 itlb_miss, 2 },
1713 /*4*/ { "icachemiss", "thresh > .05",
1714 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715 icache_miss, 3 },
1716 /*5*/ { "lcpstall", "thresh > .05",
1717 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718 lcp_stall, 2 },
1719 /*6*/ { "cache1", "thresh >= .2",
1720 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721 cache1ib, 2 },
1722 /*7*/ { "cache2", "thresh >= .2",
1723 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724 cache2ib, 2 },
1725 /*8*/ { "contested", "thresh >= .05",
1726 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727 contested, 2 },
1728 /*9*/ { "datashare", "thresh >= .05",
1729 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730 datasharing, 2 },
1731 /*10*/ { "blockstorefwd", "thresh >= .05",
1732 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733 blockstoreforward, 2 },
1734 /*11*/ { "splitload", "thresh >= .1",
1735 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1736 splitloadib, 4 },
1737 /*12*/ { "splitstore", "thresh >= .01",
1738 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1739 splitstore, 2 },
1740 /*13*/ { "aliasing_4k", "thresh >= .1",
1741 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1742 aliasing, 2 },
1743 /*14*/ { "dtlbmissload", "thresh >= .1",
1744 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745 dtlb_missload , 3},
1746 /*15*/ { "dtlbmissstore", "thresh >= .05",
1747 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748 dtlb_missstore, 3 },
1749 /*16*/ { "br_miss", "thresh >= .2",
1750 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1751 br_mispredictib, 8 },
1752 /*17*/ { "clears", "thresh >= .02",
1753 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1754 clears, 4 },
1755 /*18*/ { "microassist", "thresh >= .05",
1756 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757 microassist, 2 },
1758 /*19*/ { "fpassist", "look for a excessive value",
1759 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1760 fpassists, 2 },
1761 /*20*/ { "otherassistavx", "look for a excessive value",
1762 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1763 otherassistavx , 2},
1764 /*21*/ { "otherassistsse", "look for a excessive value",
1765 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766 otherassistsse, 2 },
1767 };
1768
1769 #define HASWELL_COUNT 20
1770 static struct cpu_entry haswell[HASWELL_COUNT] = {
1771 /*1*/ { "eff1", "thresh < .75",
1772 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1773 efficiency1, 2 },
1774 /*2*/ { "eff2", "thresh > 1.0",
1775 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776 efficiency2, 2 },
1777 /*3*/ { "itlbmiss", "thresh > .05",
1778 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779 itlb_miss, 2 },
1780 /*4*/ { "icachemiss", "thresh > .05",
1781 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782 icache_miss_has, 2 },
1783 /*5*/ { "lcpstall", "thresh > .05",
1784 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785 lcp_stall, 2 },
1786 /*6*/ { "cache1", "thresh >= .2",
1787 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788 cache1ib, 2 },
1789 /*7*/ { "cache2", "thresh >= .2",
1790 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791 cache2has, 4 },
1792 /*8*/ { "contested", "thresh >= .05",
1793 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794 contested_has, 2 },
1795 /*9*/ { "datashare", "thresh >= .05",
1796 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797 datasharing_has, 2 },
1798 /*10*/ { "blockstorefwd", "thresh >= .05",
1799 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800 blockstoreforward, 2 },
1801 /*11*/ { "splitload", "thresh >= .1",
1802 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1803 splitload , 2},
1804 /*12*/ { "splitstore", "thresh >= .01",
1805 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1806 splitstore, 2 },
1807 /*13*/ { "aliasing_4k", "thresh >= .1",
1808 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1809 aliasing, 2 },
1810 /*14*/ { "dtlbmissload", "thresh >= .1",
1811 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812 dtlb_missload, 3 },
1813 /*15*/ { "br_miss", "thresh >= .2",
1814 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1815 br_mispredict, 2 },
1816 /*16*/ { "clears", "thresh >= .02",
1817 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1818 clears, 4 },
1819 /*17*/ { "microassist", "thresh >= .05",
1820 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821 microassist, 2 },
1822 /*18*/ { "fpassist", "look for a excessive value",
1823 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1824 fpassists, 2 },
1825 /*19*/ { "otherassistavx", "look for a excessive value",
1826 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1827 otherassistavx, 2 },
1828 /*20*/ { "otherassistsse", "look for a excessive value",
1829 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830 otherassistsse, 2 },
1831 };
1832
1833
1834 static void
explain_name_broad(const char * name)1835 explain_name_broad(const char *name)
1836 {
1837 const char *mythresh;
1838 if (strcmp(name, "eff1") == 0) {
1839 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1840 mythresh = "thresh < .75";
1841 } else if (strcmp(name, "eff2") == 0) {
1842 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1843 mythresh = "thresh > 1.0";
1844 } else if (strcmp(name, "itlbmiss") == 0) {
1845 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1846 mythresh = "thresh > .05";
1847 } else if (strcmp(name, "icachemiss") == 0) {
1848 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1849 mythresh = "thresh > .05";
1850 } else if (strcmp(name, "lcpstall") == 0) {
1851 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1852 mythresh = "thresh > .05";
1853 } else if (strcmp(name, "cache1") == 0) {
1854 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1855 mythresh = "thresh >= .1";
1856 } else if (strcmp(name, "cache2") == 0) {
1857 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1858 mythresh = "thresh >= .2";
1859 } else if (strcmp(name, "contested") == 0) {
1860 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1861 mythresh = "thresh >= .05";
1862 } else if (strcmp(name, "datashare") == 0) {
1863 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1864 mythresh = "thresh > .05";
1865 } else if (strcmp(name, "blockstorefwd") == 0) {
1866 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1867 mythresh = "thresh >= .05";
1868 } else if (strcmp(name, "aliasing_4k") == 0) {
1869 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1870 mythresh = "thresh >= .1";
1871 } else if (strcmp(name, "dtlbmissload") == 0) {
1872 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1873 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
1874 mythresh = "thresh >= .1";
1875
1876 } else if (strcmp(name, "br_miss") == 0) {
1877 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1878 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1879 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1880 mythresh = "thresh >= .2";
1881 } else if (strcmp(name, "clears") == 0) {
1882 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1883 printf(" MACHINE_CLEARS.SMC + \n");
1884 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1885 mythresh = "thresh >= .02";
1886 } else if (strcmp(name, "fpassist") == 0) {
1887 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1888 mythresh = "look for a excessive value";
1889 } else if (strcmp(name, "otherassistavx") == 0) {
1890 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1891 mythresh = "look for a excessive value";
1892 } else if (strcmp(name, "microassist") == 0) {
1893 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1894 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1895 mythresh = "thresh >= .05";
1896 } else {
1897 printf("Unknown name:%s\n", name);
1898 mythresh = "unknown entry";
1899 }
1900 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1901 }
1902
1903
1904 #define BROADWELL_COUNT 17
1905 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1906 /*1*/ { "eff1", "thresh < .75",
1907 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1908 efficiency1, 2 },
1909 /*2*/ { "eff2", "thresh > 1.0",
1910 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911 efficiency2, 2 },
1912 /*3*/ { "itlbmiss", "thresh > .05",
1913 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1914 itlb_miss_broad, 3 },
1915 /*4*/ { "icachemiss", "thresh > .05",
1916 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1917 icache_miss_has, 2 },
1918 /*5*/ { "lcpstall", "thresh > .05",
1919 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920 lcp_stall, 2 },
1921 /*6*/ { "cache1", "thresh >= .1",
1922 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923 cache1broad, 2 },
1924 /*7*/ { "cache2", "thresh >= .2",
1925 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926 cache2broad, 2 },
1927 /*8*/ { "contested", "thresh >= .05",
1928 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1929 contestedbroad, 2 },
1930 /*9*/ { "datashare", "thresh >= .05",
1931 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1932 datasharing_has, 2 },
1933 /*10*/ { "blockstorefwd", "thresh >= .05",
1934 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935 blockstoreforward, 2 },
1936 /*11*/ { "aliasing_4k", "thresh >= .1",
1937 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938 aliasing_broad, 2 },
1939 /*12*/ { "dtlbmissload", "thresh >= .1",
1940 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941 dtlb_missload, 3 },
1942 /*13*/ { "br_miss", "thresh >= .2",
1943 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1944 br_mispredict_broad, 7 },
1945 /*14*/ { "clears", "thresh >= .02",
1946 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1947 clears_broad, 5 },
1948 /*15*/ { "fpassist", "look for a excessive value",
1949 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1950 fpassists, 2 },
1951 /*16*/ { "otherassistavx", "look for a excessive value",
1952 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1953 otherassistavx, 2 },
1954 /*17*/ { "microassist", "thresh >= .2",
1955 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
1956 microassist_broad, 4 },
1957 };
1958
1959
1960 static void
set_sandybridge(void)1961 set_sandybridge(void)
1962 {
1963 strcpy(the_cpu.cputype, "SandyBridge PMC");
1964 the_cpu.number = SANDY_BRIDGE_COUNT;
1965 the_cpu.ents = sandy_bridge;
1966 the_cpu.explain = explain_name_sb;
1967 }
1968
1969 static void
set_ivybridge(void)1970 set_ivybridge(void)
1971 {
1972 strcpy(the_cpu.cputype, "IvyBridge PMC");
1973 the_cpu.number = IVY_BRIDGE_COUNT;
1974 the_cpu.ents = ivy_bridge;
1975 the_cpu.explain = explain_name_ib;
1976 }
1977
1978
1979 static void
set_haswell(void)1980 set_haswell(void)
1981 {
1982 strcpy(the_cpu.cputype, "HASWELL PMC");
1983 the_cpu.number = HASWELL_COUNT;
1984 the_cpu.ents = haswell;
1985 the_cpu.explain = explain_name_has;
1986 }
1987
1988
1989 static void
set_broadwell(void)1990 set_broadwell(void)
1991 {
1992 strcpy(the_cpu.cputype, "HASWELL PMC");
1993 the_cpu.number = BROADWELL_COUNT;
1994 the_cpu.ents = broadwell;
1995 the_cpu.explain = explain_name_broad;
1996 }
1997
1998
1999 static int
set_expression(const char * name)2000 set_expression(const char *name)
2001 {
2002 int found = 0, i;
2003 for(i=0 ; i< the_cpu.number; i++) {
2004 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2005 found = 1;
2006 expression = the_cpu.ents[i].func;
2007 command = the_cpu.ents[i].command;
2008 threshold = the_cpu.ents[i].thresh;
2009 if (the_cpu.ents[i].counters_required > max_pmc_counters) {
2010 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2011 the_cpu.ents[i].name,
2012 the_cpu.ents[i].counters_required, max_pmc_counters);
2013 printf("Sorry this test can not be run\n");
2014 if (run_all == 0) {
2015 exit(-1);
2016 } else {
2017 return(-1);
2018 }
2019 }
2020 break;
2021 }
2022 }
2023 if (!found) {
2024 printf("For CPU type %s we have no expression:%s\n",
2025 the_cpu.cputype, name);
2026 exit(-1);
2027 }
2028 return(0);
2029 }
2030
2031
2032
2033
2034
2035 static int
validate_expression(char * name)2036 validate_expression(char *name)
2037 {
2038 int i, found;
2039
2040 found = 0;
2041 for(i=0 ; i< the_cpu.number; i++) {
2042 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2043 found = 1;
2044 break;
2045 }
2046 }
2047 if (!found) {
2048 return(-1);
2049 }
2050 return (0);
2051 }
2052
2053 static void
do_expression(struct counters * cpu,int pos)2054 do_expression(struct counters *cpu, int pos)
2055 {
2056 if (expression == NULL)
2057 return;
2058 (*expression)(cpu, pos);
2059 }
2060
2061 static void
process_header(int idx,char * p)2062 process_header(int idx, char *p)
2063 {
2064 struct counters *up;
2065 int i, len, nlen;
2066 /*
2067 * Given header element idx, at p in
2068 * form 's/NN/nameof'
2069 * process the entry to pull out the name and
2070 * the CPU number.
2071 */
2072 if (strncmp(p, "s/", 2)) {
2073 printf("Check -- invalid header no s/ in %s\n",
2074 p);
2075 return;
2076 }
2077 up = &cnts[idx];
2078 up->cpu = strtol(&p[2], NULL, 10);
2079 len = strlen(p);
2080 for (i=2; i<len; i++) {
2081 if (p[i] == '/') {
2082 nlen = strlen(&p[(i+1)]);
2083 if (nlen < (MAX_NLEN-1)) {
2084 strcpy(up->counter_name, &p[(i+1)]);
2085 } else {
2086 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2087 }
2088 }
2089 }
2090 }
2091
2092 static void
build_counters_from_header(FILE * io)2093 build_counters_from_header(FILE *io)
2094 {
2095 char buffer[8192], *p;
2096 int i, len, cnt;
2097 size_t mlen;
2098
2099 /* We have a new start, lets
2100 * setup our headers and cpus.
2101 */
2102 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2103 printf("First line can't be read from file err:%d\n", errno);
2104 return;
2105 }
2106 /*
2107 * Ok output is an array of counters. Once
2108 * we start to read the values in we must
2109 * put them in there slot to match there CPU and
2110 * counter being updated. We create a mass array
2111 * of the counters, filling in the CPU and
2112 * counter name.
2113 */
2114 /* How many do we get? */
2115 len = strlen(buffer);
2116 for (i=0, cnt=0; i<len; i++) {
2117 if (strncmp(&buffer[i], "s/", 2) == 0) {
2118 cnt++;
2119 for(;i<len;i++) {
2120 if (buffer[i] == ' ')
2121 break;
2122 }
2123 }
2124 }
2125 mlen = sizeof(struct counters) * cnt;
2126 cnts = malloc(mlen);
2127 ncnts = cnt;
2128 if (cnts == NULL) {
2129 printf("No memory err:%d\n", errno);
2130 return;
2131 }
2132 memset(cnts, 0, mlen);
2133 for (i=0, cnt=0; i<len; i++) {
2134 if (strncmp(&buffer[i], "s/", 2) == 0) {
2135 p = &buffer[i];
2136 for(;i<len;i++) {
2137 if (buffer[i] == ' ') {
2138 buffer[i] = 0;
2139 break;
2140 }
2141 }
2142 process_header(cnt, p);
2143 cnt++;
2144 }
2145 }
2146 if (verbose)
2147 printf("We have %d entries\n", cnt);
2148 }
2149 extern int max_to_collect;
2150 int max_to_collect = MAX_COUNTER_SLOTS;
2151
2152 static int
read_a_line(FILE * io)2153 read_a_line(FILE *io)
2154 {
2155 char buffer[8192], *p, *stop;
2156 int pos, i;
2157
2158 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2159 return(0);
2160 }
2161 p = buffer;
2162 for (i=0; i<ncnts; i++) {
2163 pos = cnts[i].pos;
2164 cnts[i].vals[pos] = strtol(p, &stop, 0);
2165 cnts[i].pos++;
2166 cnts[i].sum += cnts[i].vals[pos];
2167 p = stop;
2168 }
2169 return (1);
2170 }
2171
2172 extern int cpu_count_out;
2173 int cpu_count_out=0;
2174
2175 static void
print_header(void)2176 print_header(void)
2177 {
2178 int i, cnt, printed_cnt;
2179
2180 printf("*********************************\n");
2181 for(i=0, cnt=0; i<MAX_CPU; i++) {
2182 if (glob_cpu[i]) {
2183 cnt++;
2184 }
2185 }
2186 cpu_count_out = cnt;
2187 for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2188 if (glob_cpu[i]) {
2189 printf("CPU%d", i);
2190 printed_cnt++;
2191 }
2192 if (printed_cnt == cnt) {
2193 printf("\n");
2194 break;
2195 } else {
2196 printf("\t");
2197 }
2198 }
2199 }
2200
2201 static void
lace_cpus_together(void)2202 lace_cpus_together(void)
2203 {
2204 int i, j, lace_cpu;
2205 struct counters *cpat, *at;
2206
2207 for(i=0; i<ncnts; i++) {
2208 cpat = &cnts[i];
2209 if (cpat->next_cpu) {
2210 /* Already laced in */
2211 continue;
2212 }
2213 lace_cpu = cpat->cpu;
2214 if (lace_cpu >= MAX_CPU) {
2215 printf("CPU %d to big\n", lace_cpu);
2216 continue;
2217 }
2218 if (glob_cpu[lace_cpu] == NULL) {
2219 glob_cpu[lace_cpu] = cpat;
2220 } else {
2221 /* Already processed this cpu */
2222 continue;
2223 }
2224 /* Ok look forward for cpu->cpu and link in */
2225 for(j=(i+1); j<ncnts; j++) {
2226 at = &cnts[j];
2227 if (at->next_cpu) {
2228 continue;
2229 }
2230 if (at->cpu == lace_cpu) {
2231 /* Found one */
2232 cpat->next_cpu = at;
2233 cpat = at;
2234 }
2235 }
2236 }
2237 }
2238
2239
2240 static void
process_file(char * filename)2241 process_file(char *filename)
2242 {
2243 FILE *io;
2244 int i;
2245 int line_at, not_done;
2246 pid_t pid_of_command=0;
2247
2248 if (filename == NULL) {
2249 io = my_popen(command, "r", &pid_of_command);
2250 if (io == NULL) {
2251 printf("Can't popen the command %s\n", command);
2252 return;
2253 }
2254 } else {
2255 io = fopen(filename, "r");
2256 if (io == NULL) {
2257 printf("Can't process file %s err:%d\n",
2258 filename, errno);
2259 return;
2260 }
2261 }
2262 build_counters_from_header(io);
2263 if (cnts == NULL) {
2264 /* Nothing we can do */
2265 printf("Nothing to do -- no counters built\n");
2266 if (filename) {
2267 fclose(io);
2268 } else {
2269 my_pclose(io, pid_of_command);
2270 }
2271 return;
2272 }
2273 lace_cpus_together();
2274 print_header();
2275 if (verbose) {
2276 for (i=0; i<ncnts; i++) {
2277 printf("Counter:%s cpu:%d index:%d\n",
2278 cnts[i].counter_name,
2279 cnts[i].cpu, i);
2280 }
2281 }
2282 line_at = 0;
2283 not_done = 1;
2284 while(not_done) {
2285 if (read_a_line(io)) {
2286 line_at++;
2287 } else {
2288 break;
2289 }
2290 if (line_at >= max_to_collect) {
2291 not_done = 0;
2292 }
2293 if (filename == NULL) {
2294 int cnt;
2295 /* For the ones we dynamically open we print now */
2296 for(i=0, cnt=0; i<MAX_CPU; i++) {
2297 do_expression(glob_cpu[i], (line_at-1));
2298 cnt++;
2299 if (cnt == cpu_count_out) {
2300 printf("\n");
2301 break;
2302 } else {
2303 printf("\t");
2304 }
2305 }
2306 }
2307 }
2308 if (filename) {
2309 fclose(io);
2310 } else {
2311 my_pclose(io, pid_of_command);
2312 }
2313 }
2314 #if defined(__amd64__)
2315 #define cpuid(in,a,b,c,d)\
2316 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2317
2318 static __inline void
do_cpuid(u_int ax,u_int cx,u_int * p)2319 do_cpuid(u_int ax, u_int cx, u_int *p)
2320 {
2321 __asm __volatile("cpuid"
2322 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2323 : "0" (ax), "c" (cx) );
2324 }
2325
2326 #else
2327 #define cpuid(in, a, b, c, d)
2328 #define do_cpuid(ax, cx, p)
2329 #endif
2330
2331 static void
get_cpuid_set(void)2332 get_cpuid_set(void)
2333 {
2334 unsigned long eax, ebx, ecx, edx;
2335 int model;
2336 pid_t pid_of_command=0;
2337 size_t sz, len;
2338 FILE *io;
2339 char linebuf[1024], *str;
2340 u_int reg[4];
2341
2342 eax = ebx = ecx = edx = 0;
2343
2344 cpuid(0, eax, ebx, ecx, edx);
2345 if (ebx == 0x68747541) {
2346 printf("AMD processors are not supported by this program\n");
2347 printf("Sorry\n");
2348 exit(0);
2349 } else if (ebx == 0x6972794) {
2350 printf("Cyrix processors are not supported by this program\n");
2351 printf("Sorry\n");
2352 exit(0);
2353 } else if (ebx == 0x756e6547) {
2354 printf("Genuine Intel\n");
2355 } else {
2356 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2357 exit(0);
2358 }
2359 cpuid(1, eax, ebx, ecx, edx);
2360 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2361 printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2362 switch (eax & 0xF00) {
2363 case 0x500: /* Pentium family processors */
2364 printf("Intel Pentium P5\n");
2365 goto not_supported;
2366 break;
2367 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
2368 switch (model) {
2369 case 0x1:
2370 printf("Intel Pentium P6\n");
2371 goto not_supported;
2372 break;
2373 case 0x3:
2374 case 0x5:
2375 printf("Intel PII\n");
2376 goto not_supported;
2377 break;
2378 case 0x6: case 0x16:
2379 printf("Intel CL\n");
2380 goto not_supported;
2381 break;
2382 case 0x7: case 0x8: case 0xA: case 0xB:
2383 printf("Intel PIII\n");
2384 goto not_supported;
2385 break;
2386 case 0x9: case 0xD:
2387 printf("Intel PM\n");
2388 goto not_supported;
2389 break;
2390 case 0xE:
2391 printf("Intel CORE\n");
2392 goto not_supported;
2393 break;
2394 case 0xF:
2395 printf("Intel CORE2\n");
2396 goto not_supported;
2397 break;
2398 case 0x17:
2399 printf("Intel CORE2EXTREME\n");
2400 goto not_supported;
2401 break;
2402 case 0x1C: /* Per Intel document 320047-002. */
2403 printf("Intel ATOM\n");
2404 goto not_supported;
2405 break;
2406 case 0x1A:
2407 case 0x1E: /*
2408 * Per Intel document 253669-032 9/2009,
2409 * pages A-2 and A-57
2410 */
2411 case 0x1F: /*
2412 * Per Intel document 253669-032 9/2009,
2413 * pages A-2 and A-57
2414 */
2415 printf("Intel COREI7\n");
2416 goto not_supported;
2417 break;
2418 case 0x2E:
2419 printf("Intel NEHALEM\n");
2420 goto not_supported;
2421 break;
2422 case 0x25: /* Per Intel document 253669-033US 12/2009. */
2423 case 0x2C: /* Per Intel document 253669-033US 12/2009. */
2424 printf("Intel WESTMERE\n");
2425 goto not_supported;
2426 break;
2427 case 0x2F: /* Westmere-EX, seen in wild */
2428 printf("Intel WESTMERE\n");
2429 goto not_supported;
2430 break;
2431 case 0x2A: /* Per Intel document 253669-039US 05/2011. */
2432 printf("Intel SANDYBRIDGE\n");
2433 set_sandybridge();
2434 break;
2435 case 0x2D: /* Per Intel document 253669-044US 08/2012. */
2436 printf("Intel SANDYBRIDGE_XEON\n");
2437 set_sandybridge();
2438 break;
2439 case 0x3A: /* Per Intel document 253669-043US 05/2012. */
2440 printf("Intel IVYBRIDGE\n");
2441 set_ivybridge();
2442 break;
2443 case 0x3E: /* Per Intel document 325462-045US 01/2013. */
2444 printf("Intel IVYBRIDGE_XEON\n");
2445 set_ivybridge();
2446 break;
2447 case 0x3F: /* Per Intel document 325462-045US 09/2014. */
2448 printf("Intel HASWELL (Xeon)\n");
2449 set_haswell();
2450 break;
2451 case 0x3C: /* Per Intel document 325462-045US 01/2013. */
2452 case 0x45:
2453 case 0x46:
2454 printf("Intel HASWELL\n");
2455 set_haswell();
2456 break;
2457
2458 case 0x4e:
2459 case 0x5e:
2460 printf("Intel SKY-LAKE\n");
2461 goto not_supported;
2462 break;
2463 case 0x3D:
2464 case 0x47:
2465 printf("Intel BROADWELL\n");
2466 set_broadwell();
2467 break;
2468 case 0x4f:
2469 case 0x56:
2470 printf("Intel BROADWEL (Xeon)\n");
2471 set_broadwell();
2472 break;
2473
2474 case 0x4D:
2475 /* Per Intel document 330061-001 01/2014. */
2476 printf("Intel ATOM_SILVERMONT\n");
2477 goto not_supported;
2478 break;
2479 default:
2480 printf("Intel model 0x%x is not known -- sorry\n",
2481 model);
2482 goto not_supported;
2483 break;
2484 }
2485 break;
2486 case 0xF00: /* P4 */
2487 printf("Intel unknown model %d\n", model);
2488 goto not_supported;
2489 break;
2490 }
2491 do_cpuid(0xa, 0, reg);
2492 max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2493 printf("We have %d PMC counters to work with\n", max_pmc_counters);
2494 /* Ok lets load the list of all known PMC's */
2495 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2496 if (valid_pmcs == NULL) {
2497 /* Likely */
2498 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2499 sz = sizeof(char *) * pmc_allocated_cnt;
2500 valid_pmcs = malloc(sz);
2501 if (valid_pmcs == NULL) {
2502 printf("No memory allocation fails at startup?\n");
2503 exit(-1);
2504 }
2505 memset(valid_pmcs, 0, sz);
2506 }
2507
2508 while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2509 if (linebuf[0] != '\t') {
2510 /* sometimes headers ;-) */
2511 continue;
2512 }
2513 len = strlen(linebuf);
2514 if (linebuf[(len-1)] == '\n') {
2515 /* Likely */
2516 linebuf[(len-1)] = 0;
2517 }
2518 str = &linebuf[1];
2519 len = strlen(str) + 1;
2520 valid_pmcs[valid_pmc_cnt] = malloc(len);
2521 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2522 printf("No memory2 allocation fails at startup?\n");
2523 exit(-1);
2524 }
2525 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2526 strcpy(valid_pmcs[valid_pmc_cnt], str);
2527 valid_pmc_cnt++;
2528 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2529 /* Got to expand -- unlikely */
2530 char **more;
2531
2532 sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2533 more = malloc(sz);
2534 if (more == NULL) {
2535 printf("No memory3 allocation fails at startup?\n");
2536 exit(-1);
2537 }
2538 memset(more, 0, sz);
2539 memcpy(more, valid_pmcs, sz);
2540 pmc_allocated_cnt *= 2;
2541 free(valid_pmcs);
2542 valid_pmcs = more;
2543 }
2544 }
2545 my_pclose(io, pid_of_command);
2546 return;
2547 not_supported:
2548 printf("Not supported\n");
2549 exit(-1);
2550 }
2551
2552 static void
explain_all(void)2553 explain_all(void)
2554 {
2555 int i;
2556 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2557 printf("-------------------------------------------------------------\n");
2558 for(i=0; i<the_cpu.number; i++){
2559 printf("For -e %s ", the_cpu.ents[i].name);
2560 (*the_cpu.explain)(the_cpu.ents[i].name);
2561 printf("----------------------------\n");
2562 }
2563 }
2564
2565 static void
test_for_a_pmc(const char * pmc,int out_so_far)2566 test_for_a_pmc(const char *pmc, int out_so_far)
2567 {
2568 FILE *io;
2569 pid_t pid_of_command=0;
2570 char my_command[1024];
2571 char line[1024];
2572 char resp[1024];
2573 int len, llen, i;
2574
2575 if (out_so_far < 50) {
2576 len = 50 - out_so_far;
2577 for(i=0; i<len; i++) {
2578 printf(" ");
2579 }
2580 }
2581 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2582 io = my_popen(my_command, "r", &pid_of_command);
2583 if (io == NULL) {
2584 printf("Failed -- popen fails\n");
2585 return;
2586 }
2587 /* Setup what we expect */
2588 len = sprintf(resp, "%s", pmc);
2589 if (fgets(line, sizeof(line), io) == NULL) {
2590 printf("Failed -- no output from pmstat\n");
2591 goto out;
2592 }
2593 llen = strlen(line);
2594 if (line[(llen-1)] == '\n') {
2595 line[(llen-1)] = 0;
2596 llen--;
2597 }
2598 for(i=2; i<(llen-len); i++) {
2599 if (strncmp(&line[i], "ERROR", 5) == 0) {
2600 printf("Failed %s\n", line);
2601 goto out;
2602 } else if (strncmp(&line[i], resp, len) == 0) {
2603 int j, k;
2604
2605 if (fgets(line, sizeof(line), io) == NULL) {
2606 printf("Failed -- no second output from pmstat\n");
2607 goto out;
2608 }
2609 len = strlen(line);
2610 for (j=0; j<len; j++) {
2611 if (line[j] == ' ') {
2612 j++;
2613 } else {
2614 break;
2615 }
2616 }
2617 printf("Pass");
2618 len = strlen(&line[j]);
2619 if (len < 20) {
2620 for(k=0; k<(20-len); k++) {
2621 printf(" ");
2622 }
2623 }
2624 if (len) {
2625 printf("%s", &line[j]);
2626 } else {
2627 printf("\n");
2628 }
2629 goto out;
2630 }
2631 }
2632 printf("Failed -- '%s' not '%s'\n", line, resp);
2633 out:
2634 my_pclose(io, pid_of_command);
2635
2636 }
2637
2638 static int
add_it_to(char ** vars,int cur_cnt,char * name)2639 add_it_to(char **vars, int cur_cnt, char *name)
2640 {
2641 int i;
2642 size_t len;
2643 for(i=0; i<cur_cnt; i++) {
2644 if (strcmp(vars[i], name) == 0) {
2645 /* Already have */
2646 return(0);
2647 }
2648 }
2649 if (vars[cur_cnt] != NULL) {
2650 printf("Cur_cnt:%d filled with %s??\n",
2651 cur_cnt, vars[cur_cnt]);
2652 exit(-1);
2653 }
2654 /* Ok its new */
2655 len = strlen(name) + 1;
2656 vars[cur_cnt] = malloc(len);
2657 if (vars[cur_cnt] == NULL) {
2658 printf("No memory %s\n", __FUNCTION__);
2659 exit(-1);
2660 }
2661 memset(vars[cur_cnt], 0, len);
2662 strcpy(vars[cur_cnt], name);
2663 return(1);
2664 }
2665
2666 static char *
build_command_for_exp(struct expression * exp)2667 build_command_for_exp(struct expression *exp)
2668 {
2669 /*
2670 * Build the pmcstat command to handle
2671 * the passed in expression.
2672 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2673 * where NNN and QQQ represent the PMC's in the expression
2674 * uniquely..
2675 */
2676 char forming[1024];
2677 int cnt_pmc, alloced_pmcs, i;
2678 struct expression *at;
2679 char **vars, *cmd;
2680 size_t mal;
2681
2682 alloced_pmcs = cnt_pmc = 0;
2683 /* first how many do we have */
2684 at = exp;
2685 while (at) {
2686 if (at->type == TYPE_VALUE_PMC) {
2687 cnt_pmc++;
2688 }
2689 at = at->next;
2690 }
2691 if (cnt_pmc == 0) {
2692 printf("No PMC's in your expression -- nothing to do!!\n");
2693 exit(0);
2694 }
2695 mal = cnt_pmc * sizeof(char *);
2696 vars = malloc(mal);
2697 if (vars == NULL) {
2698 printf("No memory\n");
2699 exit(-1);
2700 }
2701 memset(vars, 0, mal);
2702 at = exp;
2703 while (at) {
2704 if (at->type == TYPE_VALUE_PMC) {
2705 if(add_it_to(vars, alloced_pmcs, at->name)) {
2706 alloced_pmcs++;
2707 }
2708 }
2709 at = at->next;
2710 }
2711 /* Now we have a unique list in vars so create our command */
2712 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
2713 for(i=0; i<alloced_pmcs; i++) {
2714 mal += strlen(vars[i]) + 4; /* var + " -s " */
2715 }
2716 cmd = malloc((mal+2));
2717 if (cmd == NULL) {
2718 printf("%s out of mem\n", __FUNCTION__);
2719 exit(-1);
2720 }
2721 memset(cmd, 0, (mal+2));
2722 strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2723 at = exp;
2724 for(i=0; i<alloced_pmcs; i++) {
2725 sprintf(forming, " -s %s", vars[i]);
2726 strcat(cmd, forming);
2727 free(vars[i]);
2728 vars[i] = NULL;
2729 }
2730 free(vars);
2731 return(cmd);
2732 }
2733
2734 static int
user_expr(struct counters * cpu,int pos)2735 user_expr(struct counters *cpu, int pos)
2736 {
2737 int ret;
2738 double res;
2739 struct counters *var;
2740 struct expression *at;
2741
2742 at = master_exp;
2743 while (at) {
2744 if (at->type == TYPE_VALUE_PMC) {
2745 var = find_counter(cpu, at->name);
2746 if (var == NULL) {
2747 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2748 exit(-1);
2749 }
2750 if (pos != -1) {
2751 at->value = var->vals[pos] * 1.0;
2752 } else {
2753 at->value = var->sum * 1.0;
2754 }
2755 }
2756 at = at->next;
2757 }
2758 res = run_expr(master_exp, 1, NULL);
2759 ret = printf("%1.3f", res);
2760 return(ret);
2761 }
2762
2763
2764 static void
set_manual_exp(struct expression * exp)2765 set_manual_exp(struct expression *exp)
2766 {
2767 expression = user_expr;
2768 command = build_command_for_exp(exp);
2769 threshold = "User defined threshold";
2770 }
2771
2772 static void
run_tests(void)2773 run_tests(void)
2774 {
2775 int i, lenout;
2776 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2777 printf("------------------------------------------------------------------------\n");
2778 for(i=0; i<valid_pmc_cnt; i++) {
2779 lenout = printf("%s", valid_pmcs[i]);
2780 fflush(stdout);
2781 test_for_a_pmc(valid_pmcs[i], lenout);
2782 }
2783 }
2784 static void
list_all(void)2785 list_all(void)
2786 {
2787 int i, cnt, j;
2788 printf("PMC Abbreviation\n");
2789 printf("--------------------------------------------------------------\n");
2790 for(i=0; i<valid_pmc_cnt; i++) {
2791 cnt = printf("%s", valid_pmcs[i]);
2792 for(j=cnt; j<52; j++) {
2793 printf(" ");
2794 }
2795 printf("%%%d\n", i);
2796 }
2797 }
2798
2799
2800 int
main(int argc,char ** argv)2801 main(int argc, char **argv)
2802 {
2803 int i, j, cnt;
2804 char *filename=NULL;
2805 const char *name=NULL;
2806 int help_only = 0;
2807 int test_mode = 0;
2808 int test_at = 0;
2809
2810 get_cpuid_set();
2811 memset(glob_cpu, 0, sizeof(glob_cpu));
2812 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2813 switch (i) {
2814 case 'A':
2815 run_all = 1;
2816 break;
2817 case 'L':
2818 list_all();
2819 return(0);
2820 case 'H':
2821 printf("**********************************\n");
2822 explain_all();
2823 printf("**********************************\n");
2824 return(0);
2825 break;
2826 case 'T':
2827 test_mode = 1;
2828 break;
2829 case 'E':
2830 master_exp = parse_expression(optarg);
2831 if (master_exp) {
2832 set_manual_exp(master_exp);
2833 }
2834 break;
2835 case 'e':
2836 if (validate_expression(optarg)) {
2837 printf("Unknown expression %s\n", optarg);
2838 return(0);
2839 }
2840 name = optarg;
2841 set_expression(optarg);
2842 break;
2843 case 'm':
2844 max_to_collect = strtol(optarg, NULL, 0);
2845 if (max_to_collect > MAX_COUNTER_SLOTS) {
2846 /* You can't collect more than max in array */
2847 max_to_collect = MAX_COUNTER_SLOTS;
2848 }
2849 break;
2850 case 'v':
2851 verbose++;
2852 break;
2853 case 'h':
2854 help_only = 1;
2855 break;
2856 case 'i':
2857 filename = optarg;
2858 break;
2859 case '?':
2860 default:
2861 use:
2862 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2863 argv[0]);
2864 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2865 printf("-v -- verbose dump debug type things -- you don't want this\n");
2866 printf("-m N -- maximum to collect is N measurements\n");
2867 printf("-e expr-name -- Do expression expr-name\n");
2868 printf("-E 'your expression' -- Do your expression\n");
2869 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2870 printf("-H -- Don't run anything, just explain all canned expressions\n");
2871 printf("-T -- Test all PMC's defined by this processor\n");
2872 printf("-A -- Run all canned tests\n");
2873 return(0);
2874 break;
2875 }
2876 }
2877 if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2878 (test_mode == 0) && (master_exp == NULL)) {
2879 printf("Without setting an expression we cannot dynamically gather information\n");
2880 printf("you must supply a filename (and you probably want verbosity)\n");
2881 goto use;
2882 }
2883 if (run_all && max_to_collect > 10) {
2884 max_to_collect = 3;
2885 }
2886 if (test_mode) {
2887 run_tests();
2888 return(0);
2889 }
2890 printf("*********************************\n");
2891 if ((master_exp == NULL) && name) {
2892 (*the_cpu.explain)(name);
2893 } else if (master_exp) {
2894 printf("Examine your expression ");
2895 print_exp(master_exp);
2896 printf("User defined threshold\n");
2897 }
2898 if (help_only) {
2899 return(0);
2900 }
2901 if (run_all) {
2902 more:
2903 name = the_cpu.ents[test_at].name;
2904 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2905 test_at++;
2906 if (set_expression(name) == -1) {
2907 if (test_at >= the_cpu.number) {
2908 goto done;
2909 } else
2910 goto more;
2911 }
2912
2913 }
2914 process_file(filename);
2915 if (verbose >= 2) {
2916 for (i=0; i<ncnts; i++) {
2917 printf("Counter:%s cpu:%d index:%d\n",
2918 cnts[i].counter_name,
2919 cnts[i].cpu, i);
2920 for(j=0; j<cnts[i].pos; j++) {
2921 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2922 }
2923 printf(" sum - %ld\n", (long int)cnts[i].sum);
2924 }
2925 }
2926 if (expression == NULL) {
2927 return(0);
2928 }
2929 if (max_to_collect > 1) {
2930 for(i=0, cnt=0; i<MAX_CPU; i++) {
2931 if (glob_cpu[i]) {
2932 do_expression(glob_cpu[i], -1);
2933 cnt++;
2934 if (cnt == cpu_count_out) {
2935 printf("\n");
2936 break;
2937 } else {
2938 printf("\t");
2939 }
2940 }
2941 }
2942 }
2943 if (run_all && (test_at < the_cpu.number)) {
2944 memset(glob_cpu, 0, sizeof(glob_cpu));
2945 ncnts = 0;
2946 printf("*********************************\n");
2947 goto more;
2948 } else if (run_all) {
2949 done:
2950 printf("*********************************\n");
2951 }
2952 return(0);
2953 }
2954