1 /*- 2 * Copyright (c) 2014, 2015 Netflix Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include <sys/types.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <unistd.h> 32 #include <string.h> 33 #include <strings.h> 34 #include <sys/errno.h> 35 #include <signal.h> 36 #include <sys/wait.h> 37 #include <getopt.h> 38 #include "eval_expr.h" 39 __FBSDID("$FreeBSD$"); 40 41 #define MAX_COUNTER_SLOTS 1024 42 #define MAX_NLEN 64 43 #define MAX_CPU 64 44 static int verbose = 0; 45 46 extern char **environ; 47 extern struct expression *master_exp; 48 struct expression *master_exp=NULL; 49 50 #define PMC_INITIAL_ALLOC 512 51 extern char **valid_pmcs; 52 char **valid_pmcs = NULL; 53 extern int valid_pmc_cnt; 54 int valid_pmc_cnt=0; 55 extern int pmc_allocated_cnt; 56 int pmc_allocated_cnt=0; 57 58 /* 59 * The following two varients on popen and pclose with 60 * the cavet that they get you the PID so that you 61 * can supply it to pclose so it can send a SIGTERM 62 * to the process. 63 */ 64 static FILE * 65 my_popen(const char *command, const char *dir, pid_t *p_pid) 66 { 67 FILE *io_out, *io_in; 68 int pdesin[2], pdesout[2]; 69 char *argv[4]; 70 pid_t pid; 71 char cmd[4]; 72 char cmd2[1024]; 73 char arg1[4]; 74 75 if ((strcmp(dir, "r") != 0) && 76 (strcmp(dir, "w") != 0)) { 77 errno = EINVAL; 78 return(NULL); 79 } 80 if (pipe(pdesin) < 0) 81 return (NULL); 82 83 if (pipe(pdesout) < 0) { 84 (void)close(pdesin[0]); 85 (void)close(pdesin[1]); 86 return (NULL); 87 } 88 strcpy(cmd, "sh"); 89 strcpy(arg1, "-c"); 90 strcpy(cmd2, command); 91 argv[0] = cmd; 92 argv[1] = arg1; 93 argv[2] = cmd2; 94 argv[3] = NULL; 95 96 switch (pid = fork()) { 97 case -1: /* Error. */ 98 (void)close(pdesin[0]); 99 (void)close(pdesin[1]); 100 (void)close(pdesout[0]); 101 (void)close(pdesout[1]); 102 return (NULL); 103 /* NOTREACHED */ 104 case 0: /* Child. */ 105 /* Close out un-used sides */ 106 (void)close(pdesin[1]); 107 (void)close(pdesout[0]); 108 /* Now prepare the stdin of the process */ 109 close(0); 110 (void)dup(pdesin[0]); 111 (void)close(pdesin[0]); 112 /* Now prepare the stdout of the process */ 113 close(1); 114 (void)dup(pdesout[1]); 115 /* And lets do stderr just in case */ 116 close(2); 117 (void)dup(pdesout[1]); 118 (void)close(pdesout[1]); 119 /* Now run it */ 120 execve("/bin/sh", argv, environ); 121 exit(127); 122 /* NOTREACHED */ 123 } 124 /* Parent; assume fdopen can't fail. */ 125 /* Store the pid */ 126 *p_pid = pid; 127 if (strcmp(dir, "r") != 0) { 128 io_out = fdopen(pdesin[1], "w"); 129 (void)close(pdesin[0]); 130 (void)close(pdesout[0]); 131 (void)close(pdesout[1]); 132 return(io_out); 133 } else { 134 /* Prepare the input stream */ 135 io_in = fdopen(pdesout[0], "r"); 136 (void)close(pdesout[1]); 137 (void)close(pdesin[0]); 138 (void)close(pdesin[1]); 139 return (io_in); 140 } 141 } 142 143 /* 144 * pclose -- 145 * Pclose returns -1 if stream is not associated with a `popened' command, 146 * if already `pclosed', or waitpid returns an error. 147 */ 148 static void 149 my_pclose(FILE *io, pid_t the_pid) 150 { 151 int pstat; 152 pid_t pid; 153 154 /* 155 * Find the appropriate file pointer and remove it from the list. 156 */ 157 (void)fclose(io); 158 /* Die if you are not dead! */ 159 kill(the_pid, SIGTERM); 160 do { 161 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 162 } while (pid == -1 && errno == EINTR); 163 } 164 165 struct counters { 166 struct counters *next_cpu; 167 char counter_name[MAX_NLEN]; /* Name of counter */ 168 int cpu; /* CPU we are on */ 169 int pos; /* Index we are filling to. */ 170 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 171 uint64_t sum; /* Summary of entries */ 172 }; 173 174 extern struct counters *glob_cpu[MAX_CPU]; 175 struct counters *glob_cpu[MAX_CPU]; 176 177 extern struct counters *cnts; 178 struct counters *cnts=NULL; 179 180 extern int ncnts; 181 int ncnts=0; 182 183 extern int (*expression)(struct counters *, int); 184 int (*expression)(struct counters *, int); 185 186 static const char *threshold=NULL; 187 static const char *command; 188 189 struct cpu_entry { 190 const char *name; 191 const char *thresh; 192 const char *command; 193 int (*func)(struct counters *, int); 194 }; 195 196 197 struct cpu_type { 198 char cputype[32]; 199 int number; 200 struct cpu_entry *ents; 201 void (*explain)(const char *name); 202 }; 203 extern struct cpu_type the_cpu; 204 struct cpu_type the_cpu; 205 206 static void 207 explain_name_sb(const char *name) 208 { 209 const char *mythresh; 210 if (strcmp(name, "allocstall1") == 0) { 211 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 212 mythresh = "thresh > .05"; 213 } else if (strcmp(name, "allocstall2") == 0) { 214 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 215 mythresh = "thresh > .05"; 216 } else if (strcmp(name, "br_miss") == 0) { 217 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 218 mythresh = "thresh >= .2"; 219 } else if (strcmp(name, "splitload") == 0) { 220 printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 221 mythresh = "thresh >= .1"; 222 } else if (strcmp(name, "splitstore") == 0) { 223 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 224 mythresh = "thresh >= .01"; 225 } else if (strcmp(name, "contested") == 0) { 226 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 227 mythresh = "thresh >= .05"; 228 } else if (strcmp(name, "blockstorefwd") == 0) { 229 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 230 mythresh = "thresh >= .05"; 231 } else if (strcmp(name, "cache2") == 0) { 232 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 233 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 234 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 235 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 236 mythresh = "thresh >= .2"; 237 } else if (strcmp(name, "cache1") == 0) { 238 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 239 mythresh = "thresh >= .2"; 240 } else if (strcmp(name, "dtlbmissload") == 0) { 241 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 242 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 243 mythresh = "thresh >= .1"; 244 } else if (strcmp(name, "frontendstall") == 0) { 245 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 246 mythresh = "thresh >= .15"; 247 } else if (strcmp(name, "clears") == 0) { 248 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 249 printf(" MACHINE_CLEARS.SMC + \n"); 250 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 251 mythresh = "thresh >= .02"; 252 } else if (strcmp(name, "microassist") == 0) { 253 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 254 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 255 mythresh = "thresh >= .05"; 256 } else if (strcmp(name, "aliasing_4k") == 0) { 257 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 258 mythresh = "thresh >= .1"; 259 } else if (strcmp(name, "fpassist") == 0) { 260 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 261 mythresh = "look for a excessive value"; 262 } else if (strcmp(name, "otherassistavx") == 0) { 263 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 264 mythresh = "look for a excessive value"; 265 } else if (strcmp(name, "otherassistsse") == 0) { 266 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267 mythresh = "look for a excessive value"; 268 } else if (strcmp(name, "eff1") == 0) { 269 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 270 mythresh = "thresh < .9"; 271 } else if (strcmp(name, "eff2") == 0) { 272 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 273 mythresh = "thresh > 1.0"; 274 } else if (strcmp(name, "dtlbmissstore") == 0) { 275 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 276 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 277 mythresh = "thresh >= .05"; 278 } else { 279 printf("Unknown name:%s\n", name); 280 mythresh = "unknown entry"; 281 } 282 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 283 } 284 285 static void 286 explain_name_ib(const char *name) 287 { 288 const char *mythresh; 289 if (strcmp(name, "br_miss") == 0) { 290 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 291 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 292 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 293 mythresh = "thresh >= .2"; 294 } else if (strcmp(name, "eff1") == 0) { 295 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 296 mythresh = "thresh < .9"; 297 } else if (strcmp(name, "eff2") == 0) { 298 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 299 mythresh = "thresh > 1.0"; 300 } else if (strcmp(name, "cache1") == 0) { 301 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 302 mythresh = "thresh >= .2"; 303 } else if (strcmp(name, "cache2") == 0) { 304 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 305 mythresh = "thresh >= .2"; 306 } else if (strcmp(name, "itlbmiss") == 0) { 307 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 308 mythresh = "thresh > .05"; 309 } else if (strcmp(name, "icachemiss") == 0) { 310 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 311 mythresh = "thresh > .05"; 312 } else if (strcmp(name, "lcpstall") == 0) { 313 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 314 mythresh = "thresh > .05"; 315 } else if (strcmp(name, "datashare") == 0) { 316 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 317 mythresh = "thresh > .05"; 318 } else if (strcmp(name, "blockstorefwd") == 0) { 319 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 320 mythresh = "thresh >= .05"; 321 } else if (strcmp(name, "splitload") == 0) { 322 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 323 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 324 mythresh = "thresh >= .1"; 325 } else if (strcmp(name, "splitstore") == 0) { 326 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 327 mythresh = "thresh >= .01"; 328 } else if (strcmp(name, "aliasing_4k") == 0) { 329 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 330 mythresh = "thresh >= .1"; 331 } else if (strcmp(name, "dtlbmissload") == 0) { 332 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 333 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 334 mythresh = "thresh >= .1"; 335 } else if (strcmp(name, "dtlbmissstore") == 0) { 336 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 337 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 338 mythresh = "thresh >= .05"; 339 } else if (strcmp(name, "contested") == 0) { 340 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 341 mythresh = "thresh >= .05"; 342 } else if (strcmp(name, "clears") == 0) { 343 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 344 printf(" MACHINE_CLEARS.SMC + \n"); 345 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 346 mythresh = "thresh >= .02"; 347 } else if (strcmp(name, "microassist") == 0) { 348 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 349 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 350 mythresh = "thresh >= .05"; 351 } else if (strcmp(name, "fpassist") == 0) { 352 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 353 mythresh = "look for a excessive value"; 354 } else if (strcmp(name, "otherassistavx") == 0) { 355 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 356 mythresh = "look for a excessive value"; 357 } else if (strcmp(name, "otherassistsse") == 0) { 358 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359 mythresh = "look for a excessive value"; 360 } else { 361 printf("Unknown name:%s\n", name); 362 mythresh = "unknown entry"; 363 } 364 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 365 } 366 367 368 static void 369 explain_name_has(const char *name) 370 { 371 const char *mythresh; 372 if (strcmp(name, "eff1") == 0) { 373 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 374 mythresh = "thresh < .75"; 375 } else if (strcmp(name, "eff2") == 0) { 376 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 377 mythresh = "thresh > 1.0"; 378 } else if (strcmp(name, "itlbmiss") == 0) { 379 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 380 mythresh = "thresh > .05"; 381 } else if (strcmp(name, "icachemiss") == 0) { 382 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 383 mythresh = "thresh > .05"; 384 } else if (strcmp(name, "lcpstall") == 0) { 385 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 386 mythresh = "thresh > .05"; 387 } else if (strcmp(name, "cache1") == 0) { 388 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 389 mythresh = "thresh >= .2"; 390 } else if (strcmp(name, "cache2") == 0) { 391 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 392 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 393 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 394 printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 395 mythresh = "thresh >= .2"; 396 } else if (strcmp(name, "contested") == 0) { 397 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 398 mythresh = "thresh >= .05"; 399 } else if (strcmp(name, "datashare") == 0) { 400 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 401 mythresh = "thresh > .05"; 402 } else if (strcmp(name, "blockstorefwd") == 0) { 403 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 404 mythresh = "thresh >= .05"; 405 } else if (strcmp(name, "splitload") == 0) { 406 printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 407 mythresh = "thresh >= .1"; 408 } else if (strcmp(name, "splitstore") == 0) { 409 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 410 mythresh = "thresh >= .01"; 411 } else if (strcmp(name, "aliasing_4k") == 0) { 412 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 413 mythresh = "thresh >= .1"; 414 } else if (strcmp(name, "dtlbmissload") == 0) { 415 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 416 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 417 mythresh = "thresh >= .1"; 418 } else if (strcmp(name, "br_miss") == 0) { 419 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 420 mythresh = "thresh >= .2"; 421 } else if (strcmp(name, "clears") == 0) { 422 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 423 printf(" MACHINE_CLEARS.SMC + \n"); 424 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 425 mythresh = "thresh >= .02"; 426 } else if (strcmp(name, "microassist") == 0) { 427 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 428 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 429 mythresh = "thresh >= .05"; 430 } else if (strcmp(name, "fpassist") == 0) { 431 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 432 mythresh = "look for a excessive value"; 433 } else if (strcmp(name, "otherassistavx") == 0) { 434 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 435 mythresh = "look for a excessive value"; 436 } else if (strcmp(name, "otherassistsse") == 0) { 437 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438 mythresh = "look for a excessive value"; 439 } else { 440 printf("Unknown name:%s\n", name); 441 mythresh = "unknown entry"; 442 } 443 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 444 } 445 446 static struct counters * 447 find_counter(struct counters *base, const char *name) 448 { 449 struct counters *at; 450 int len; 451 452 at = base; 453 len = strlen(name); 454 while(at) { 455 if (strncmp(at->counter_name, name, len) == 0) { 456 return(at); 457 } 458 at = at->next_cpu; 459 } 460 printf("Can't find counter %s\n", name); 461 printf("We have:\n"); 462 at = base; 463 while(at) { 464 printf("- %s\n", at->counter_name); 465 at = at->next_cpu; 466 } 467 exit(-1); 468 } 469 470 static int 471 allocstall1(struct counters *cpu, int pos) 472 { 473 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 474 int ret; 475 struct counters *partial; 476 struct counters *unhalt; 477 double un, par, res; 478 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 479 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 480 if (pos != -1) { 481 par = partial->vals[pos] * 1.0; 482 un = unhalt->vals[pos] * 1.0; 483 } else { 484 par = partial->sum * 1.0; 485 un = unhalt->sum * 1.0; 486 } 487 res = par/un; 488 ret = printf("%1.3f", res); 489 return(ret); 490 } 491 492 static int 493 allocstall2(struct counters *cpu, int pos) 494 { 495 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 496 int ret; 497 struct counters *partial; 498 struct counters *unhalt; 499 double un, par, res; 500 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 501 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 502 if (pos != -1) { 503 par = partial->vals[pos] * 1.0; 504 un = unhalt->vals[pos] * 1.0; 505 } else { 506 par = partial->sum * 1.0; 507 un = unhalt->sum * 1.0; 508 } 509 res = par/un; 510 ret = printf("%1.3f", res); 511 return(ret); 512 } 513 514 static int 515 br_mispredict(struct counters *cpu, int pos) 516 { 517 struct counters *brctr; 518 struct counters *unhalt; 519 int ret; 520 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 521 double br, un, con, res; 522 con = 20.0; 523 524 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 525 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 526 if (pos != -1) { 527 br = brctr->vals[pos] * 1.0; 528 un = unhalt->vals[pos] * 1.0; 529 } else { 530 br = brctr->sum * 1.0; 531 un = unhalt->sum * 1.0; 532 } 533 res = (con * br)/un; 534 ret = printf("%1.3f", res); 535 return(ret); 536 } 537 538 static int 539 br_mispredictib(struct counters *cpu, int pos) 540 { 541 struct counters *brctr; 542 struct counters *unhalt; 543 struct counters *clear, *clear2, *clear3; 544 struct counters *uops; 545 struct counters *recv; 546 struct counters *iss; 547 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 548 int ret; 549 /* 550 * (BR_MISP_RETIRED.ALL_BRANCHES / 551 * (BR_MISP_RETIRED.ALL_BRANCHES + 552 * MACHINE_CLEAR.COUNT) * 553 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 554 * 555 */ 556 double br, cl, cl2, cl3, uo, re, un, con, res, is; 557 con = 4.0; 558 559 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 560 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 561 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 562 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 563 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 564 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 565 iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 566 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 567 if (pos != -1) { 568 br = brctr->vals[pos] * 1.0; 569 cl = clear->vals[pos] * 1.0; 570 cl2 = clear2->vals[pos] * 1.0; 571 cl3 = clear3->vals[pos] * 1.0; 572 uo = uops->vals[pos] * 1.0; 573 re = recv->vals[pos] * 1.0; 574 is = iss->vals[pos] * 1.0; 575 un = unhalt->vals[pos] * 1.0; 576 } else { 577 br = brctr->sum * 1.0; 578 cl = clear->sum * 1.0; 579 cl2 = clear2->sum * 1.0; 580 cl3 = clear3->sum * 1.0; 581 uo = uops->sum * 1.0; 582 re = recv->sum * 1.0; 583 is = iss->sum * 1.0; 584 un = unhalt->sum * 1.0; 585 } 586 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 587 ret = printf("%1.3f", res); 588 return(ret); 589 } 590 591 static int 592 br_mispredict_broad(struct counters *cpu, int pos) 593 { 594 struct counters *brctr; 595 struct counters *unhalt; 596 struct counters *clear; 597 struct counters *uops; 598 struct counters *uops_ret; 599 struct counters *recv; 600 int ret; 601 double br, cl, uo, uo_r, re, con, un, res; 602 603 con = 4.0; 604 605 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 606 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 607 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 608 uops = find_counter(cpu, "UOPS_ISSUED.ANY"); 609 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 610 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 611 612 if (pos != -1) { 613 un = unhalt->vals[pos] * 1.0; 614 br = brctr->vals[pos] * 1.0; 615 cl = clear->vals[pos] * 1.0; 616 uo = uops->vals[pos] * 1.0; 617 uo_r = uops_ret->vals[pos] * 1.0; 618 re = recv->vals[pos] * 1.0; 619 } else { 620 un = unhalt->sum * 1.0; 621 br = brctr->sum * 1.0; 622 cl = clear->sum * 1.0; 623 uo = uops->sum * 1.0; 624 uo_r = uops_ret->sum * 1.0; 625 re = recv->sum * 1.0; 626 } 627 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); 628 ret = printf("%1.3f", res); 629 return(ret); 630 } 631 632 static int 633 splitloadib(struct counters *cpu, int pos) 634 { 635 int ret; 636 struct counters *mem; 637 struct counters *l1d, *ldblock; 638 struct counters *unhalt; 639 double un, memd, res, l1, ldb; 640 /* 641 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 642 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 643 */ 644 645 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 646 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 647 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 648 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 649 if (pos != -1) { 650 memd = mem->vals[pos] * 1.0; 651 l1 = l1d->vals[pos] * 1.0; 652 ldb = ldblock->vals[pos] * 1.0; 653 un = unhalt->vals[pos] * 1.0; 654 } else { 655 memd = mem->sum * 1.0; 656 l1 = l1d->sum * 1.0; 657 ldb = ldblock->sum * 1.0; 658 un = unhalt->sum * 1.0; 659 } 660 res = ((l1 / memd) * ldb)/un; 661 ret = printf("%1.3f", res); 662 return(ret); 663 } 664 665 static int 666 splitload(struct counters *cpu, int pos) 667 { 668 int ret; 669 struct counters *mem; 670 struct counters *unhalt; 671 double con, un, memd, res; 672 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 673 674 con = 5.0; 675 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 676 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 677 if (pos != -1) { 678 memd = mem->vals[pos] * 1.0; 679 un = unhalt->vals[pos] * 1.0; 680 } else { 681 memd = mem->sum * 1.0; 682 un = unhalt->sum * 1.0; 683 } 684 res = (memd * con)/un; 685 ret = printf("%1.3f", res); 686 return(ret); 687 } 688 689 static int 690 splitstore(struct counters *cpu, int pos) 691 { 692 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 693 int ret; 694 struct counters *mem_split; 695 struct counters *mem_stores; 696 double memsplit, memstore, res; 697 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 698 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 699 if (pos != -1) { 700 memsplit = mem_split->vals[pos] * 1.0; 701 memstore = mem_stores->vals[pos] * 1.0; 702 } else { 703 memsplit = mem_split->sum * 1.0; 704 memstore = mem_stores->sum * 1.0; 705 } 706 res = memsplit/memstore; 707 ret = printf("%1.3f", res); 708 return(ret); 709 } 710 711 712 static int 713 contested(struct counters *cpu, int pos) 714 { 715 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 716 int ret; 717 struct counters *mem; 718 struct counters *unhalt; 719 double con, un, memd, res; 720 721 con = 60.0; 722 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 723 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 724 if (pos != -1) { 725 memd = mem->vals[pos] * 1.0; 726 un = unhalt->vals[pos] * 1.0; 727 } else { 728 memd = mem->sum * 1.0; 729 un = unhalt->sum * 1.0; 730 } 731 res = (memd * con)/un; 732 ret = printf("%1.3f", res); 733 return(ret); 734 } 735 736 static int 737 contested_has(struct counters *cpu, int pos) 738 { 739 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 740 int ret; 741 struct counters *mem; 742 struct counters *unhalt; 743 double con, un, memd, res; 744 745 con = 84.0; 746 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 747 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 748 if (pos != -1) { 749 memd = mem->vals[pos] * 1.0; 750 un = unhalt->vals[pos] * 1.0; 751 } else { 752 memd = mem->sum * 1.0; 753 un = unhalt->sum * 1.0; 754 } 755 res = (memd * con)/un; 756 ret = printf("%1.3f", res); 757 return(ret); 758 } 759 760 static int 761 contestedbroad(struct counters *cpu, int pos) 762 { 763 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 764 int ret; 765 struct counters *mem; 766 struct counters *mem2; 767 struct counters *unhalt; 768 double con, un, memd, memtoo, res; 769 770 con = 84.0; 771 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 772 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 773 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); 774 775 if (pos != -1) { 776 memd = mem->vals[pos] * 1.0; 777 memtoo = mem2->vals[pos] * 1.0; 778 un = unhalt->vals[pos] * 1.0; 779 } else { 780 memd = mem->sum * 1.0; 781 memtoo = mem2->sum * 1.0; 782 un = unhalt->sum * 1.0; 783 } 784 res = ((memd * con) + memtoo)/un; 785 ret = printf("%1.3f", res); 786 return(ret); 787 } 788 789 790 static int 791 blockstoreforward(struct counters *cpu, int pos) 792 { 793 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 794 int ret; 795 struct counters *ldb; 796 struct counters *unhalt; 797 double con, un, ld, res; 798 799 con = 13.0; 800 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 801 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 802 if (pos != -1) { 803 ld = ldb->vals[pos] * 1.0; 804 un = unhalt->vals[pos] * 1.0; 805 } else { 806 ld = ldb->sum * 1.0; 807 un = unhalt->sum * 1.0; 808 } 809 res = (ld * con)/un; 810 ret = printf("%1.3f", res); 811 return(ret); 812 } 813 814 static int 815 cache2(struct counters *cpu, int pos) 816 { 817 /* ** Suspect *** 818 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 819 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 820 */ 821 int ret; 822 struct counters *mem1, *mem2, *mem3; 823 struct counters *unhalt; 824 double con1, con2, con3, un, me_1, me_2, me_3, res; 825 826 con1 = 26.0; 827 con2 = 43.0; 828 con3 = 60.0; 829 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 830 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 831 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 832 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 833 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 834 if (pos != -1) { 835 me_1 = mem1->vals[pos] * 1.0; 836 me_2 = mem2->vals[pos] * 1.0; 837 me_3 = mem3->vals[pos] * 1.0; 838 un = unhalt->vals[pos] * 1.0; 839 } else { 840 me_1 = mem1->sum * 1.0; 841 me_2 = mem2->sum * 1.0; 842 me_3 = mem3->sum * 1.0; 843 un = unhalt->sum * 1.0; 844 } 845 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 846 ret = printf("%1.3f", res); 847 return(ret); 848 } 849 850 static int 851 datasharing(struct counters *cpu, int pos) 852 { 853 /* 854 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 855 */ 856 int ret; 857 struct counters *mem; 858 struct counters *unhalt; 859 double con, res, me, un; 860 861 con = 43.0; 862 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 863 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 864 if (pos != -1) { 865 me = mem->vals[pos] * 1.0; 866 un = unhalt->vals[pos] * 1.0; 867 } else { 868 me = mem->sum * 1.0; 869 un = unhalt->sum * 1.0; 870 } 871 res = (me * con)/un; 872 ret = printf("%1.3f", res); 873 return(ret); 874 875 } 876 877 878 static int 879 datasharing_has(struct counters *cpu, int pos) 880 { 881 /* 882 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 883 */ 884 int ret; 885 struct counters *mem; 886 struct counters *unhalt; 887 double con, res, me, un; 888 889 con = 72.0; 890 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 891 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 892 if (pos != -1) { 893 me = mem->vals[pos] * 1.0; 894 un = unhalt->vals[pos] * 1.0; 895 } else { 896 me = mem->sum * 1.0; 897 un = unhalt->sum * 1.0; 898 } 899 res = (me * con)/un; 900 ret = printf("%1.3f", res); 901 return(ret); 902 903 } 904 905 906 static int 907 cache2ib(struct counters *cpu, int pos) 908 { 909 /* 910 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 911 */ 912 int ret; 913 struct counters *mem; 914 struct counters *unhalt; 915 double con, un, me, res; 916 917 con = 29.0; 918 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 919 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 920 if (pos != -1) { 921 me = mem->vals[pos] * 1.0; 922 un = unhalt->vals[pos] * 1.0; 923 } else { 924 me = mem->sum * 1.0; 925 un = unhalt->sum * 1.0; 926 } 927 res = (con * me)/un; 928 ret = printf("%1.3f", res); 929 return(ret); 930 } 931 932 static int 933 cache2has(struct counters *cpu, int pos) 934 { 935 /* 936 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 937 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 938 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 939 * / CPU_CLK_UNHALTED.THREAD_P 940 */ 941 int ret; 942 struct counters *mem1, *mem2, *mem3; 943 struct counters *unhalt; 944 double con1, con2, con3, un, me1, me2, me3, res; 945 946 con1 = 36.0; 947 con2 = 72.0; 948 con3 = 84.0; 949 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 950 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 951 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 952 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 953 if (pos != -1) { 954 me1 = mem1->vals[pos] * 1.0; 955 me2 = mem2->vals[pos] * 1.0; 956 me3 = mem3->vals[pos] * 1.0; 957 un = unhalt->vals[pos] * 1.0; 958 } else { 959 me1 = mem1->sum * 1.0; 960 me2 = mem2->sum * 1.0; 961 me3 = mem3->sum * 1.0; 962 un = unhalt->sum * 1.0; 963 } 964 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 965 ret = printf("%1.3f", res); 966 return(ret); 967 } 968 969 static int 970 cache2broad(struct counters *cpu, int pos) 971 { 972 /* 973 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 974 */ 975 int ret; 976 struct counters *mem; 977 struct counters *unhalt; 978 double con, un, me, res; 979 980 con = 36.0; 981 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 982 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); 983 if (pos != -1) { 984 me = mem->vals[pos] * 1.0; 985 un = unhalt->vals[pos] * 1.0; 986 } else { 987 me = mem->sum * 1.0; 988 un = unhalt->sum * 1.0; 989 } 990 res = (con * me)/un; 991 ret = printf("%1.3f", res); 992 return(ret); 993 } 994 995 996 static int 997 cache1(struct counters *cpu, int pos) 998 { 999 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1000 int ret; 1001 struct counters *mem; 1002 struct counters *unhalt; 1003 double con, un, me, res; 1004 1005 con = 180.0; 1006 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1007 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 1008 if (pos != -1) { 1009 me = mem->vals[pos] * 1.0; 1010 un = unhalt->vals[pos] * 1.0; 1011 } else { 1012 me = mem->sum * 1.0; 1013 un = unhalt->sum * 1.0; 1014 } 1015 res = (me * con)/un; 1016 ret = printf("%1.3f", res); 1017 return(ret); 1018 } 1019 1020 static int 1021 cache1ib(struct counters *cpu, int pos) 1022 { 1023 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1024 int ret; 1025 struct counters *mem; 1026 struct counters *unhalt; 1027 double con, un, me, res; 1028 1029 con = 180.0; 1030 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1031 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 1032 if (pos != -1) { 1033 me = mem->vals[pos] * 1.0; 1034 un = unhalt->vals[pos] * 1.0; 1035 } else { 1036 me = mem->sum * 1.0; 1037 un = unhalt->sum * 1.0; 1038 } 1039 res = (me * con)/un; 1040 ret = printf("%1.3f", res); 1041 return(ret); 1042 } 1043 1044 1045 static int 1046 cache1broad(struct counters *cpu, int pos) 1047 { 1048 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1049 int ret; 1050 struct counters *mem; 1051 struct counters *unhalt; 1052 double con, un, me, res; 1053 1054 con = 180.0; 1055 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1056 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); 1057 if (pos != -1) { 1058 me = mem->vals[pos] * 1.0; 1059 un = unhalt->vals[pos] * 1.0; 1060 } else { 1061 me = mem->sum * 1.0; 1062 un = unhalt->sum * 1.0; 1063 } 1064 res = (me * con)/un; 1065 ret = printf("%1.3f", res); 1066 return(ret); 1067 } 1068 1069 1070 static int 1071 dtlb_missload(struct counters *cpu, int pos) 1072 { 1073 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 1074 int ret; 1075 struct counters *dtlb_m, *dtlb_d; 1076 struct counters *unhalt; 1077 double con, un, d1, d2, res; 1078 1079 con = 7.0; 1080 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1081 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 1082 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 1083 if (pos != -1) { 1084 d1 = dtlb_m->vals[pos] * 1.0; 1085 d2 = dtlb_d->vals[pos] * 1.0; 1086 un = unhalt->vals[pos] * 1.0; 1087 } else { 1088 d1 = dtlb_m->sum * 1.0; 1089 d2 = dtlb_d->sum * 1.0; 1090 un = unhalt->sum * 1.0; 1091 } 1092 res = ((d1 * con) + d2)/un; 1093 ret = printf("%1.3f", res); 1094 return(ret); 1095 } 1096 1097 static int 1098 dtlb_missstore(struct counters *cpu, int pos) 1099 { 1100 /* 1101 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 1102 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 1103 */ 1104 int ret; 1105 struct counters *dtsb_m, *dtsb_d; 1106 struct counters *unhalt; 1107 double con, un, d1, d2, res; 1108 1109 con = 7.0; 1110 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1111 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 1112 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 1113 if (pos != -1) { 1114 d1 = dtsb_m->vals[pos] * 1.0; 1115 d2 = dtsb_d->vals[pos] * 1.0; 1116 un = unhalt->vals[pos] * 1.0; 1117 } else { 1118 d1 = dtsb_m->sum * 1.0; 1119 d2 = dtsb_d->sum * 1.0; 1120 un = unhalt->sum * 1.0; 1121 } 1122 res = ((d1 * con) + d2)/un; 1123 ret = printf("%1.3f", res); 1124 return(ret); 1125 } 1126 1127 static int 1128 itlb_miss(struct counters *cpu, int pos) 1129 { 1130 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1131 int ret; 1132 struct counters *itlb; 1133 struct counters *unhalt; 1134 double un, d1, res; 1135 1136 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1137 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1138 if (pos != -1) { 1139 d1 = itlb->vals[pos] * 1.0; 1140 un = unhalt->vals[pos] * 1.0; 1141 } else { 1142 d1 = itlb->sum * 1.0; 1143 un = unhalt->sum * 1.0; 1144 } 1145 res = d1/un; 1146 ret = printf("%1.3f", res); 1147 return(ret); 1148 } 1149 1150 1151 static int 1152 itlb_miss_broad(struct counters *cpu, int pos) 1153 { 1154 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ 1155 int ret; 1156 struct counters *itlb; 1157 struct counters *unhalt; 1158 struct counters *four_k; 1159 double un, d1, res, k; 1160 1161 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1162 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1163 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); 1164 if (pos != -1) { 1165 d1 = itlb->vals[pos] * 1.0; 1166 un = unhalt->vals[pos] * 1.0; 1167 k = four_k->vals[pos] * 1.0; 1168 } else { 1169 d1 = itlb->sum * 1.0; 1170 un = unhalt->sum * 1.0; 1171 k = four_k->sum * 1.0; 1172 } 1173 res = (7.0 * k + d1)/un; 1174 ret = printf("%1.3f", res); 1175 return(ret); 1176 } 1177 1178 1179 static int 1180 icache_miss(struct counters *cpu, int pos) 1181 { 1182 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1183 1184 int ret; 1185 struct counters *itlb, *icache; 1186 struct counters *unhalt; 1187 double un, d1, ic, res; 1188 1189 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1190 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1191 icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1192 if (pos != -1) { 1193 d1 = itlb->vals[pos] * 1.0; 1194 ic = icache->vals[pos] * 1.0; 1195 un = unhalt->vals[pos] * 1.0; 1196 } else { 1197 d1 = itlb->sum * 1.0; 1198 ic = icache->sum * 1.0; 1199 un = unhalt->sum * 1.0; 1200 } 1201 res = (ic-d1)/un; 1202 ret = printf("%1.3f", res); 1203 return(ret); 1204 1205 } 1206 1207 static int 1208 icache_miss_has(struct counters *cpu, int pos) 1209 { 1210 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1211 1212 int ret; 1213 struct counters *icache; 1214 struct counters *unhalt; 1215 double un, con, ic, res; 1216 1217 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1218 icache = find_counter(cpu, "ICACHE.MISSES"); 1219 con = 36.0; 1220 if (pos != -1) { 1221 ic = icache->vals[pos] * 1.0; 1222 un = unhalt->vals[pos] * 1.0; 1223 } else { 1224 ic = icache->sum * 1.0; 1225 un = unhalt->sum * 1.0; 1226 } 1227 res = (con * ic)/un; 1228 ret = printf("%1.3f", res); 1229 return(ret); 1230 1231 } 1232 1233 static int 1234 lcp_stall(struct counters *cpu, int pos) 1235 { 1236 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1237 int ret; 1238 struct counters *ild; 1239 struct counters *unhalt; 1240 double un, d1, res; 1241 1242 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1243 ild = find_counter(cpu, "ILD_STALL.LCP"); 1244 if (pos != -1) { 1245 d1 = ild->vals[pos] * 1.0; 1246 un = unhalt->vals[pos] * 1.0; 1247 } else { 1248 d1 = ild->sum * 1.0; 1249 un = unhalt->sum * 1.0; 1250 } 1251 res = d1/un; 1252 ret = printf("%1.3f", res); 1253 return(ret); 1254 1255 } 1256 1257 1258 static int 1259 frontendstall(struct counters *cpu, int pos) 1260 { 1261 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1262 int ret; 1263 struct counters *idq; 1264 struct counters *unhalt; 1265 double con, un, id, res; 1266 1267 con = 4.0; 1268 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1269 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1270 if (pos != -1) { 1271 id = idq->vals[pos] * 1.0; 1272 un = unhalt->vals[pos] * 1.0; 1273 } else { 1274 id = idq->sum * 1.0; 1275 un = unhalt->sum * 1.0; 1276 } 1277 res = id/(un * con); 1278 ret = printf("%1.3f", res); 1279 return(ret); 1280 } 1281 1282 static int 1283 clears(struct counters *cpu, int pos) 1284 { 1285 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1286 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1287 1288 int ret; 1289 struct counters *clr1, *clr2, *clr3; 1290 struct counters *unhalt; 1291 double con, un, cl1, cl2, cl3, res; 1292 1293 con = 100.0; 1294 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1295 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1296 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1297 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1298 1299 if (pos != -1) { 1300 cl1 = clr1->vals[pos] * 1.0; 1301 cl2 = clr2->vals[pos] * 1.0; 1302 cl3 = clr3->vals[pos] * 1.0; 1303 un = unhalt->vals[pos] * 1.0; 1304 } else { 1305 cl1 = clr1->sum * 1.0; 1306 cl2 = clr2->sum * 1.0; 1307 cl3 = clr3->sum * 1.0; 1308 un = unhalt->sum * 1.0; 1309 } 1310 res = ((cl1 + cl2 + cl3) * con)/un; 1311 ret = printf("%1.3f", res); 1312 return(ret); 1313 } 1314 1315 static int 1316 clears_broad(struct counters *cpu, int pos) 1317 { 1318 int ret; 1319 struct counters *clr1, *clr2, *clr3, *cyc; 1320 struct counters *unhalt; 1321 double con, un, cl1, cl2, cl3, cy, res; 1322 1323 con = 100.0; 1324 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1325 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1326 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1327 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1328 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 1329 if (pos != -1) { 1330 cl1 = clr1->vals[pos] * 1.0; 1331 cl2 = clr2->vals[pos] * 1.0; 1332 cl3 = clr3->vals[pos] * 1.0; 1333 cy = cyc->vals[pos] * 1.0; 1334 un = unhalt->vals[pos] * 1.0; 1335 } else { 1336 cl1 = clr1->sum * 1.0; 1337 cl2 = clr2->sum * 1.0; 1338 cl3 = clr3->sum * 1.0; 1339 cy = cyc->sum * 1.0; 1340 un = unhalt->sum * 1.0; 1341 } 1342 /* Formula not listed but extrapulated to add the cy ?? */ 1343 res = ((cl1 + cl2 + cl3 + cy) * con)/un; 1344 ret = printf("%1.3f", res); 1345 return(ret); 1346 } 1347 1348 static int 1349 microassist(struct counters *cpu, int pos) 1350 { 1351 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1352 int ret; 1353 struct counters *idq; 1354 struct counters *unhalt; 1355 double un, id, res, con; 1356 1357 con = 4.0; 1358 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1359 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1360 if (pos != -1) { 1361 id = idq->vals[pos] * 1.0; 1362 un = unhalt->vals[pos] * 1.0; 1363 } else { 1364 id = idq->sum * 1.0; 1365 un = unhalt->sum * 1.0; 1366 } 1367 res = id/(un * con); 1368 ret = printf("%1.3f", res); 1369 return(ret); 1370 } 1371 1372 static int 1373 microassist_broad(struct counters *cpu, int pos) 1374 { 1375 int ret; 1376 struct counters *idq; 1377 struct counters *unhalt; 1378 struct counters *uopiss; 1379 struct counters *uopret; 1380 double un, id, res, con, uoi, uor; 1381 1382 con = 4.0; 1383 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1384 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1385 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); 1386 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1387 if (pos != -1) { 1388 id = idq->vals[pos] * 1.0; 1389 un = unhalt->vals[pos] * 1.0; 1390 uoi = uopiss->vals[pos] * 1.0; 1391 uor = uopret->vals[pos] * 1.0; 1392 } else { 1393 id = idq->sum * 1.0; 1394 un = unhalt->sum * 1.0; 1395 uoi = uopiss->sum * 1.0; 1396 uor = uopret->sum * 1.0; 1397 } 1398 res = (uor/uoi) * (id/(un * con)); 1399 ret = printf("%1.3f", res); 1400 return(ret); 1401 } 1402 1403 static int 1404 aliasing(struct counters *cpu, int pos) 1405 { 1406 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1407 int ret; 1408 struct counters *ld; 1409 struct counters *unhalt; 1410 double un, lds, con, res; 1411 1412 con = 5.0; 1413 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1414 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1415 if (pos != -1) { 1416 lds = ld->vals[pos] * 1.0; 1417 un = unhalt->vals[pos] * 1.0; 1418 } else { 1419 lds = ld->sum * 1.0; 1420 un = unhalt->sum * 1.0; 1421 } 1422 res = (lds * con)/un; 1423 ret = printf("%1.3f", res); 1424 return(ret); 1425 } 1426 1427 static int 1428 aliasing_broad(struct counters *cpu, int pos) 1429 { 1430 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1431 int ret; 1432 struct counters *ld; 1433 struct counters *unhalt; 1434 double un, lds, con, res; 1435 1436 con = 7.0; 1437 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1438 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1439 if (pos != -1) { 1440 lds = ld->vals[pos] * 1.0; 1441 un = unhalt->vals[pos] * 1.0; 1442 } else { 1443 lds = ld->sum * 1.0; 1444 un = unhalt->sum * 1.0; 1445 } 1446 res = (lds * con)/un; 1447 ret = printf("%1.3f", res); 1448 return(ret); 1449 } 1450 1451 1452 static int 1453 fpassists(struct counters *cpu, int pos) 1454 { 1455 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1456 int ret; 1457 struct counters *fp; 1458 struct counters *inst; 1459 double un, fpd, res; 1460 1461 inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1462 fp = find_counter(cpu, "FP_ASSIST.ANY"); 1463 if (pos != -1) { 1464 fpd = fp->vals[pos] * 1.0; 1465 un = inst->vals[pos] * 1.0; 1466 } else { 1467 fpd = fp->sum * 1.0; 1468 un = inst->sum * 1.0; 1469 } 1470 res = fpd/un; 1471 ret = printf("%1.3f", res); 1472 return(ret); 1473 } 1474 1475 static int 1476 otherassistavx(struct counters *cpu, int pos) 1477 { 1478 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1479 int ret; 1480 struct counters *oth; 1481 struct counters *unhalt; 1482 double un, ot, con, res; 1483 1484 con = 75.0; 1485 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1486 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1487 if (pos != -1) { 1488 ot = oth->vals[pos] * 1.0; 1489 un = unhalt->vals[pos] * 1.0; 1490 } else { 1491 ot = oth->sum * 1.0; 1492 un = unhalt->sum * 1.0; 1493 } 1494 res = (ot * con)/un; 1495 ret = printf("%1.3f", res); 1496 return(ret); 1497 } 1498 1499 static int 1500 otherassistsse(struct counters *cpu, int pos) 1501 { 1502 1503 int ret; 1504 struct counters *oth; 1505 struct counters *unhalt; 1506 double un, ot, con, res; 1507 1508 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1509 con = 75.0; 1510 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1511 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1512 if (pos != -1) { 1513 ot = oth->vals[pos] * 1.0; 1514 un = unhalt->vals[pos] * 1.0; 1515 } else { 1516 ot = oth->sum * 1.0; 1517 un = unhalt->sum * 1.0; 1518 } 1519 res = (ot * con)/un; 1520 ret = printf("%1.3f", res); 1521 return(ret); 1522 } 1523 1524 static int 1525 efficiency1(struct counters *cpu, int pos) 1526 { 1527 1528 int ret; 1529 struct counters *uops; 1530 struct counters *unhalt; 1531 double un, ot, con, res; 1532 1533 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1534 con = 4.0; 1535 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1536 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1537 if (pos != -1) { 1538 ot = uops->vals[pos] * 1.0; 1539 un = unhalt->vals[pos] * 1.0; 1540 } else { 1541 ot = uops->sum * 1.0; 1542 un = unhalt->sum * 1.0; 1543 } 1544 res = ot/(con * un); 1545 ret = printf("%1.3f", res); 1546 return(ret); 1547 } 1548 1549 static int 1550 efficiency2(struct counters *cpu, int pos) 1551 { 1552 1553 int ret; 1554 struct counters *uops; 1555 struct counters *unhalt; 1556 double un, ot, res; 1557 1558 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1559 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1560 uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1561 if (pos != -1) { 1562 ot = uops->vals[pos] * 1.0; 1563 un = unhalt->vals[pos] * 1.0; 1564 } else { 1565 ot = uops->sum * 1.0; 1566 un = unhalt->sum * 1.0; 1567 } 1568 res = un/ot; 1569 ret = printf("%1.3f", res); 1570 return(ret); 1571 } 1572 1573 #define SANDY_BRIDGE_COUNT 20 1574 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1575 /*01*/ { "allocstall1", "thresh > .05", 1576 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1577 allocstall1 }, 1578 /*02*/ { "allocstall2", "thresh > .05", 1579 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", 1580 allocstall2 }, 1581 /*03*/ { "br_miss", "thresh >= .2", 1582 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1583 br_mispredict }, 1584 /*04*/ { "splitload", "thresh >= .1", 1585 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1586 splitload }, 1587 /*05*/ { "splitstore", "thresh >= .01", 1588 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1589 splitstore }, 1590 /*06*/ { "contested", "thresh >= .05", 1591 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1592 contested }, 1593 /*07*/ { "blockstorefwd", "thresh >= .05", 1594 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1595 blockstoreforward }, 1596 /*08*/ { "cache2", "thresh >= .2", 1597 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1598 cache2 }, 1599 /*09*/ { "cache1", "thresh >= .2", 1600 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1601 cache1 }, 1602 /*10*/ { "dtlbmissload", "thresh >= .1", 1603 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1604 dtlb_missload }, 1605 /*11*/ { "dtlbmissstore", "thresh >= .05", 1606 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1607 dtlb_missstore }, 1608 /*12*/ { "frontendstall", "thresh >= .15", 1609 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1610 frontendstall }, 1611 /*13*/ { "clears", "thresh >= .02", 1612 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1613 clears }, 1614 /*14*/ { "microassist", "thresh >= .05", 1615 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1616 microassist }, 1617 /*15*/ { "aliasing_4k", "thresh >= .1", 1618 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1619 aliasing }, 1620 /*16*/ { "fpassist", "look for a excessive value", 1621 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1622 fpassists }, 1623 /*17*/ { "otherassistavx", "look for a excessive value", 1624 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1625 otherassistavx }, 1626 /*18*/ { "otherassistsse", "look for a excessive value", 1627 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1628 otherassistsse }, 1629 /*19*/ { "eff1", "thresh < .9", 1630 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1631 efficiency1 }, 1632 /*20*/ { "eff2", "thresh > 1.0", 1633 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1634 efficiency2 }, 1635 }; 1636 1637 1638 #define IVY_BRIDGE_COUNT 21 1639 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1640 /*1*/ { "eff1", "thresh < .75", 1641 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1642 efficiency1 }, 1643 /*2*/ { "eff2", "thresh > 1.0", 1644 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1645 efficiency2 }, 1646 /*3*/ { "itlbmiss", "thresh > .05", 1647 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1648 itlb_miss }, 1649 /*4*/ { "icachemiss", "thresh > .05", 1650 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1651 icache_miss }, 1652 /*5*/ { "lcpstall", "thresh > .05", 1653 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1654 lcp_stall }, 1655 /*6*/ { "cache1", "thresh >= .2", 1656 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1657 cache1ib }, 1658 /*7*/ { "cache2", "thresh >= .2", 1659 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1660 cache2ib }, 1661 /*8*/ { "contested", "thresh >= .05", 1662 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1663 contested }, 1664 /*9*/ { "datashare", "thresh >= .05", 1665 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1666 datasharing }, 1667 /*10*/ { "blockstorefwd", "thresh >= .05", 1668 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1669 blockstoreforward }, 1670 /*11*/ { "splitload", "thresh >= .1", 1671 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1672 splitloadib }, 1673 /*12*/ { "splitstore", "thresh >= .01", 1674 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1675 splitstore }, 1676 /*13*/ { "aliasing_4k", "thresh >= .1", 1677 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1678 aliasing }, 1679 /*14*/ { "dtlbmissload", "thresh >= .1", 1680 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1681 dtlb_missload }, 1682 /*15*/ { "dtlbmissstore", "thresh >= .05", 1683 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1684 dtlb_missstore }, 1685 /*16*/ { "br_miss", "thresh >= .2", 1686 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1687 br_mispredictib }, 1688 /*17*/ { "clears", "thresh >= .02", 1689 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1690 clears }, 1691 /*18*/ { "microassist", "thresh >= .05", 1692 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1693 microassist }, 1694 /*19*/ { "fpassist", "look for a excessive value", 1695 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1696 fpassists }, 1697 /*20*/ { "otherassistavx", "look for a excessive value", 1698 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1699 otherassistavx }, 1700 /*21*/ { "otherassistsse", "look for a excessive value", 1701 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1702 otherassistsse }, 1703 }; 1704 1705 #define HASWELL_COUNT 20 1706 static struct cpu_entry haswell[HASWELL_COUNT] = { 1707 /*1*/ { "eff1", "thresh < .75", 1708 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1709 efficiency1 }, 1710 /*2*/ { "eff2", "thresh > 1.0", 1711 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1712 efficiency2 }, 1713 /*3*/ { "itlbmiss", "thresh > .05", 1714 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1715 itlb_miss }, 1716 /*4*/ { "icachemiss", "thresh > .05", 1717 "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", 1718 icache_miss_has }, 1719 /*5*/ { "lcpstall", "thresh > .05", 1720 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1721 lcp_stall }, 1722 /*6*/ { "cache1", "thresh >= .2", 1723 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1724 cache1ib }, 1725 /*7*/ { "cache2", "thresh >= .2", 1726 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1727 cache2has }, 1728 /*8*/ { "contested", "thresh >= .05", 1729 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1730 contested_has }, 1731 /*9*/ { "datashare", "thresh >= .05", 1732 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1733 datasharing_has }, 1734 /*10*/ { "blockstorefwd", "thresh >= .05", 1735 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1736 blockstoreforward }, 1737 /*11*/ { "splitload", "thresh >= .1", 1738 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1739 splitload }, 1740 /*12*/ { "splitstore", "thresh >= .01", 1741 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1742 splitstore }, 1743 /*13*/ { "aliasing_4k", "thresh >= .1", 1744 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1745 aliasing }, 1746 /*14*/ { "dtlbmissload", "thresh >= .1", 1747 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1748 dtlb_missload }, 1749 /*15*/ { "br_miss", "thresh >= .2", 1750 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1751 br_mispredict }, 1752 /*16*/ { "clears", "thresh >= .02", 1753 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1754 clears }, 1755 /*17*/ { "microassist", "thresh >= .05", 1756 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1757 microassist }, 1758 /*18*/ { "fpassist", "look for a excessive value", 1759 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1760 fpassists }, 1761 /*19*/ { "otherassistavx", "look for a excessive value", 1762 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1763 otherassistavx }, 1764 /*20*/ { "otherassistsse", "look for a excessive value", 1765 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1766 otherassistsse }, 1767 }; 1768 1769 1770 static void 1771 explain_name_broad(const char *name) 1772 { 1773 const char *mythresh; 1774 if (strcmp(name, "eff1") == 0) { 1775 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 1776 mythresh = "thresh < .75"; 1777 } else if (strcmp(name, "eff2") == 0) { 1778 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 1779 mythresh = "thresh > 1.0"; 1780 } else if (strcmp(name, "itlbmiss") == 0) { 1781 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1782 mythresh = "thresh > .05"; 1783 } else if (strcmp(name, "icachemiss") == 0) { 1784 printf("Examine ( 36.0 * CACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); 1785 mythresh = "thresh > .05"; 1786 } else if (strcmp(name, "lcpstall") == 0) { 1787 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 1788 mythresh = "thresh > .05"; 1789 } else if (strcmp(name, "cache1") == 0) { 1790 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 1791 mythresh = "thresh >= .1"; 1792 } else if (strcmp(name, "cache2") == 0) { 1793 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); 1794 mythresh = "thresh >= .2"; 1795 } else if (strcmp(name, "contested") == 0) { 1796 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1797 mythresh = "thresh >= .05"; 1798 } else if (strcmp(name, "datashare") == 0) { 1799 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 1800 mythresh = "thresh > .05"; 1801 } else if (strcmp(name, "blockstorefwd") == 0) { 1802 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 1803 mythresh = "thresh >= .05"; 1804 } else if (strcmp(name, "aliasing_4k") == 0) { 1805 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); 1806 mythresh = "thresh >= .1"; 1807 } else if (strcmp(name, "dtlbmissload") == 0) { 1808 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 1809 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 1810 mythresh = "thresh >= .1"; 1811 1812 } else if (strcmp(name, "br_miss") == 0) { 1813 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); 1814 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); 1815 printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); 1816 mythresh = "thresh >= .2"; 1817 } else if (strcmp(name, "clears") == 0) { 1818 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 1819 printf(" MACHINE_CLEARS.SMC + \n"); 1820 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 1821 mythresh = "thresh >= .02"; 1822 } else if (strcmp(name, "fpassist") == 0) { 1823 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 1824 mythresh = "look for a excessive value"; 1825 } else if (strcmp(name, "otherassistavx") == 0) { 1826 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 1827 mythresh = "look for a excessive value"; 1828 } else if (strcmp(name, "microassist") == 0) { 1829 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 1830 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 1831 mythresh = "thresh >= .05"; 1832 } else { 1833 printf("Unknown name:%s\n", name); 1834 mythresh = "unknown entry"; 1835 } 1836 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 1837 } 1838 1839 1840 #define BROADWELL_COUNT 17 1841 static struct cpu_entry broadwell[BROADWELL_COUNT] = { 1842 /*1*/ { "eff1", "thresh < .75", 1843 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1844 efficiency1 }, 1845 /*2*/ { "eff2", "thresh > 1.0", 1846 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1847 efficiency2 }, 1848 /*3*/ { "itlbmiss", "thresh > .05", 1849 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", 1850 itlb_miss_broad }, 1851 /*4*/ { "icachemiss", "thresh > .05", 1852 "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", 1853 icache_miss_has }, 1854 /*5*/ { "lcpstall", "thresh > .05", 1855 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1856 lcp_stall }, 1857 /*6*/ { "cache1", "thresh >= .1", 1858 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1859 cache1broad }, 1860 /*7*/ { "cache2", "thresh >= .2", 1861 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1862 cache2broad }, 1863 /*8*/ { "contested", "thresh >= .05", 1864 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", 1865 contestedbroad }, 1866 /*9*/ { "datashare", "thresh >= .05", 1867 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1868 datasharing_has }, 1869 /*10*/ { "blockstorefwd", "thresh >= .05", 1870 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1871 blockstoreforward }, 1872 /*11*/ { "aliasing_4k", "thresh >= .1", 1873 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1874 aliasing_broad }, 1875 /*12*/ { "dtlbmissload", "thresh >= .1", 1876 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1877 dtlb_missload }, 1878 /*13*/ { "br_miss", "thresh >= .2", 1879 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1880 br_mispredict_broad }, 1881 /*14*/ { "clears", "thresh >= .02", 1882 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1883 clears_broad }, 1884 /*15*/ { "fpassist", "look for a excessive value", 1885 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1886 fpassists }, 1887 /*16*/ { "otherassistavx", "look for a excessive value", 1888 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1889 otherassistavx }, 1890 /*17*/ { "microassist", "thresh >= .2", 1891 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", 1892 microassist_broad }, 1893 }; 1894 1895 static void 1896 set_sandybridge(void) 1897 { 1898 strcpy(the_cpu.cputype, "SandyBridge PMC"); 1899 the_cpu.number = SANDY_BRIDGE_COUNT; 1900 the_cpu.ents = sandy_bridge; 1901 the_cpu.explain = explain_name_sb; 1902 } 1903 1904 static void 1905 set_ivybridge(void) 1906 { 1907 strcpy(the_cpu.cputype, "IvyBridge PMC"); 1908 the_cpu.number = IVY_BRIDGE_COUNT; 1909 the_cpu.ents = ivy_bridge; 1910 the_cpu.explain = explain_name_ib; 1911 } 1912 1913 1914 static void 1915 set_haswell(void) 1916 { 1917 strcpy(the_cpu.cputype, "HASWELL PMC"); 1918 the_cpu.number = HASWELL_COUNT; 1919 the_cpu.ents = haswell; 1920 the_cpu.explain = explain_name_has; 1921 } 1922 1923 static void 1924 set_broadwell(void) 1925 { 1926 strcpy(the_cpu.cputype, "HASWELL PMC"); 1927 the_cpu.number = BROADWELL_COUNT; 1928 the_cpu.ents = broadwell; 1929 the_cpu.explain = explain_name_broad; 1930 } 1931 1932 static void 1933 set_expression(char *name) 1934 { 1935 int found = 0, i; 1936 for(i=0 ; i< the_cpu.number; i++) { 1937 if (strcmp(name, the_cpu.ents[i].name) == 0) { 1938 found = 1; 1939 expression = the_cpu.ents[i].func; 1940 command = the_cpu.ents[i].command; 1941 threshold = the_cpu.ents[i].thresh; 1942 break; 1943 } 1944 } 1945 if (!found) { 1946 printf("For CPU type %s we have no expression:%s\n", 1947 the_cpu.cputype, name); 1948 exit(-1); 1949 } 1950 } 1951 1952 static int 1953 validate_expression(char *name) 1954 { 1955 int i, found; 1956 1957 found = 0; 1958 for(i=0 ; i< the_cpu.number; i++) { 1959 if (strcmp(name, the_cpu.ents[i].name) == 0) { 1960 found = 1; 1961 break; 1962 } 1963 } 1964 if (!found) { 1965 return(-1); 1966 } 1967 return (0); 1968 } 1969 1970 static void 1971 do_expression(struct counters *cpu, int pos) 1972 { 1973 if (expression == NULL) 1974 return; 1975 (*expression)(cpu, pos); 1976 } 1977 1978 static void 1979 process_header(int idx, char *p) 1980 { 1981 struct counters *up; 1982 int i, len, nlen; 1983 /* 1984 * Given header element idx, at p in 1985 * form 's/NN/nameof' 1986 * process the entry to pull out the name and 1987 * the CPU number. 1988 */ 1989 if (strncmp(p, "s/", 2)) { 1990 printf("Check -- invalid header no s/ in %s\n", 1991 p); 1992 return; 1993 } 1994 up = &cnts[idx]; 1995 up->cpu = strtol(&p[2], NULL, 10); 1996 len = strlen(p); 1997 for (i=2; i<len; i++) { 1998 if (p[i] == '/') { 1999 nlen = strlen(&p[(i+1)]); 2000 if (nlen < (MAX_NLEN-1)) { 2001 strcpy(up->counter_name, &p[(i+1)]); 2002 } else { 2003 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 2004 } 2005 } 2006 } 2007 } 2008 2009 static void 2010 build_counters_from_header(FILE *io) 2011 { 2012 char buffer[8192], *p; 2013 int i, len, cnt; 2014 size_t mlen; 2015 2016 /* We have a new start, lets 2017 * setup our headers and cpus. 2018 */ 2019 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2020 printf("First line can't be read from file err:%d\n", errno); 2021 return; 2022 } 2023 /* 2024 * Ok output is an array of counters. Once 2025 * we start to read the values in we must 2026 * put them in there slot to match there CPU and 2027 * counter being updated. We create a mass array 2028 * of the counters, filling in the CPU and 2029 * counter name. 2030 */ 2031 /* How many do we get? */ 2032 len = strlen(buffer); 2033 for (i=0, cnt=0; i<len; i++) { 2034 if (strncmp(&buffer[i], "s/", 2) == 0) { 2035 cnt++; 2036 for(;i<len;i++) { 2037 if (buffer[i] == ' ') 2038 break; 2039 } 2040 } 2041 } 2042 mlen = sizeof(struct counters) * cnt; 2043 cnts = malloc(mlen); 2044 ncnts = cnt; 2045 if (cnts == NULL) { 2046 printf("No memory err:%d\n", errno); 2047 return; 2048 } 2049 memset(cnts, 0, mlen); 2050 for (i=0, cnt=0; i<len; i++) { 2051 if (strncmp(&buffer[i], "s/", 2) == 0) { 2052 p = &buffer[i]; 2053 for(;i<len;i++) { 2054 if (buffer[i] == ' ') { 2055 buffer[i] = 0; 2056 break; 2057 } 2058 } 2059 process_header(cnt, p); 2060 cnt++; 2061 } 2062 } 2063 if (verbose) 2064 printf("We have %d entries\n", cnt); 2065 } 2066 extern int max_to_collect; 2067 int max_to_collect = MAX_COUNTER_SLOTS; 2068 2069 static int 2070 read_a_line(FILE *io) 2071 { 2072 char buffer[8192], *p, *stop; 2073 int pos, i; 2074 2075 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2076 return(0); 2077 } 2078 p = buffer; 2079 for (i=0; i<ncnts; i++) { 2080 pos = cnts[i].pos; 2081 cnts[i].vals[pos] = strtol(p, &stop, 0); 2082 cnts[i].pos++; 2083 cnts[i].sum += cnts[i].vals[pos]; 2084 p = stop; 2085 } 2086 return (1); 2087 } 2088 2089 extern int cpu_count_out; 2090 int cpu_count_out=0; 2091 2092 static void 2093 print_header(void) 2094 { 2095 int i, cnt, printed_cnt; 2096 2097 printf("*********************************\n"); 2098 for(i=0, cnt=0; i<MAX_CPU; i++) { 2099 if (glob_cpu[i]) { 2100 cnt++; 2101 } 2102 } 2103 cpu_count_out = cnt; 2104 for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 2105 if (glob_cpu[i]) { 2106 printf("CPU%d", i); 2107 printed_cnt++; 2108 } 2109 if (printed_cnt == cnt) { 2110 printf("\n"); 2111 break; 2112 } else { 2113 printf("\t"); 2114 } 2115 } 2116 } 2117 2118 static void 2119 lace_cpus_together(void) 2120 { 2121 int i, j, lace_cpu; 2122 struct counters *cpat, *at; 2123 2124 for(i=0; i<ncnts; i++) { 2125 cpat = &cnts[i]; 2126 if (cpat->next_cpu) { 2127 /* Already laced in */ 2128 continue; 2129 } 2130 lace_cpu = cpat->cpu; 2131 if (lace_cpu >= MAX_CPU) { 2132 printf("CPU %d to big\n", lace_cpu); 2133 continue; 2134 } 2135 if (glob_cpu[lace_cpu] == NULL) { 2136 glob_cpu[lace_cpu] = cpat; 2137 } else { 2138 /* Already processed this cpu */ 2139 continue; 2140 } 2141 /* Ok look forward for cpu->cpu and link in */ 2142 for(j=(i+1); j<ncnts; j++) { 2143 at = &cnts[j]; 2144 if (at->next_cpu) { 2145 continue; 2146 } 2147 if (at->cpu == lace_cpu) { 2148 /* Found one */ 2149 cpat->next_cpu = at; 2150 cpat = at; 2151 } 2152 } 2153 } 2154 } 2155 2156 2157 static void 2158 process_file(char *filename) 2159 { 2160 FILE *io; 2161 int i; 2162 int line_at, not_done; 2163 pid_t pid_of_command=0; 2164 2165 if (filename == NULL) { 2166 io = my_popen(command, "r", &pid_of_command); 2167 if (io == NULL) { 2168 printf("Can't popen the command %s\n", command); 2169 return; 2170 } 2171 } else { 2172 io = fopen(filename, "r"); 2173 if (io == NULL) { 2174 printf("Can't process file %s err:%d\n", 2175 filename, errno); 2176 return; 2177 } 2178 } 2179 build_counters_from_header(io); 2180 if (cnts == NULL) { 2181 /* Nothing we can do */ 2182 printf("Nothing to do -- no counters built\n"); 2183 if (filename) { 2184 fclose(io); 2185 } else { 2186 my_pclose(io, pid_of_command); 2187 } 2188 return; 2189 } 2190 lace_cpus_together(); 2191 print_header(); 2192 if (verbose) { 2193 for (i=0; i<ncnts; i++) { 2194 printf("Counter:%s cpu:%d index:%d\n", 2195 cnts[i].counter_name, 2196 cnts[i].cpu, i); 2197 } 2198 } 2199 line_at = 0; 2200 not_done = 1; 2201 while(not_done) { 2202 if (read_a_line(io)) { 2203 line_at++; 2204 } else { 2205 break; 2206 } 2207 if (line_at >= max_to_collect) { 2208 not_done = 0; 2209 } 2210 if (filename == NULL) { 2211 int cnt; 2212 /* For the ones we dynamically open we print now */ 2213 for(i=0, cnt=0; i<MAX_CPU; i++) { 2214 do_expression(glob_cpu[i], (line_at-1)); 2215 cnt++; 2216 if (cnt == cpu_count_out) { 2217 printf("\n"); 2218 break; 2219 } else { 2220 printf("\t"); 2221 } 2222 } 2223 } 2224 } 2225 if (filename) { 2226 fclose(io); 2227 } else { 2228 my_pclose(io, pid_of_command); 2229 } 2230 } 2231 #if defined(__amd64__) 2232 #define cpuid(in,a,b,c,d)\ 2233 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 2234 #else 2235 #define cpuid(in, a, b, c, d) 2236 #endif 2237 2238 static void 2239 get_cpuid_set(void) 2240 { 2241 unsigned long eax, ebx, ecx, edx; 2242 int model; 2243 pid_t pid_of_command=0; 2244 size_t sz, len; 2245 FILE *io; 2246 char linebuf[1024], *str; 2247 2248 eax = ebx = ecx = edx = 0; 2249 2250 cpuid(0, eax, ebx, ecx, edx); 2251 if (ebx == 0x68747541) { 2252 printf("AMD processors are not supported by this program\n"); 2253 printf("Sorry\n"); 2254 exit(0); 2255 } else if (ebx == 0x6972794) { 2256 printf("Cyrix processors are not supported by this program\n"); 2257 printf("Sorry\n"); 2258 exit(0); 2259 } else if (ebx == 0x756e6547) { 2260 printf("Genuine Intel\n"); 2261 } else { 2262 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 2263 exit(0); 2264 } 2265 cpuid(1, eax, ebx, ecx, edx); 2266 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 2267 printf("CPU model is 0x%x id:0x%lx\n", model, eax); 2268 switch (eax & 0xF00) { 2269 case 0x500: /* Pentium family processors */ 2270 printf("Intel Pentium P5\n"); 2271 goto not_supported; 2272 break; 2273 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 2274 switch (model) { 2275 case 0x1: 2276 printf("Intel Pentium P6\n"); 2277 goto not_supported; 2278 break; 2279 case 0x3: 2280 case 0x5: 2281 printf("Intel PII\n"); 2282 goto not_supported; 2283 break; 2284 case 0x6: case 0x16: 2285 printf("Intel CL\n"); 2286 goto not_supported; 2287 break; 2288 case 0x7: case 0x8: case 0xA: case 0xB: 2289 printf("Intel PIII\n"); 2290 goto not_supported; 2291 break; 2292 case 0x9: case 0xD: 2293 printf("Intel PM\n"); 2294 goto not_supported; 2295 break; 2296 case 0xE: 2297 printf("Intel CORE\n"); 2298 goto not_supported; 2299 break; 2300 case 0xF: 2301 printf("Intel CORE2\n"); 2302 goto not_supported; 2303 break; 2304 case 0x17: 2305 printf("Intel CORE2EXTREME\n"); 2306 goto not_supported; 2307 break; 2308 case 0x1C: /* Per Intel document 320047-002. */ 2309 printf("Intel ATOM\n"); 2310 goto not_supported; 2311 break; 2312 case 0x1A: 2313 case 0x1E: /* 2314 * Per Intel document 253669-032 9/2009, 2315 * pages A-2 and A-57 2316 */ 2317 case 0x1F: /* 2318 * Per Intel document 253669-032 9/2009, 2319 * pages A-2 and A-57 2320 */ 2321 printf("Intel COREI7\n"); 2322 goto not_supported; 2323 break; 2324 case 0x2E: 2325 printf("Intel NEHALEM\n"); 2326 goto not_supported; 2327 break; 2328 case 0x25: /* Per Intel document 253669-033US 12/2009. */ 2329 case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 2330 printf("Intel WESTMERE\n"); 2331 goto not_supported; 2332 break; 2333 case 0x2F: /* Westmere-EX, seen in wild */ 2334 printf("Intel WESTMERE\n"); 2335 goto not_supported; 2336 break; 2337 case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 2338 printf("Intel SANDYBRIDGE\n"); 2339 set_sandybridge(); 2340 break; 2341 case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 2342 printf("Intel SANDYBRIDGE_XEON\n"); 2343 set_sandybridge(); 2344 break; 2345 case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 2346 printf("Intel IVYBRIDGE\n"); 2347 set_ivybridge(); 2348 break; 2349 case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 2350 printf("Intel IVYBRIDGE_XEON\n"); 2351 set_ivybridge(); 2352 break; 2353 case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 2354 printf("Intel HASWELL (Xeon)\n"); 2355 set_haswell(); 2356 break; 2357 case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 2358 case 0x45: 2359 case 0x46: 2360 printf("Intel HASWELL\n"); 2361 set_haswell(); 2362 break; 2363 2364 case 0x4e: 2365 case 0x5e: 2366 printf("Intel SKY-LAKE\n"); 2367 goto not_supported; 2368 break; 2369 case 0x3D: 2370 case 0x47: 2371 printf("Intel BROADWELL\n"); 2372 set_broadwell(); 2373 break; 2374 case 0x4f: 2375 case 0x56: 2376 printf("Intel BROADWEL (Xeon)L\n"); 2377 set_broadwell(); 2378 break; 2379 2380 case 0x4D: 2381 /* Per Intel document 330061-001 01/2014. */ 2382 printf("Intel ATOM_SILVERMONT\n"); 2383 goto not_supported; 2384 break; 2385 default: 2386 printf("Intel model 0x%x is not known -- sorry\n", 2387 model); 2388 goto not_supported; 2389 break; 2390 } 2391 break; 2392 case 0xF00: /* P4 */ 2393 printf("Intel unknown model %d\n", model); 2394 goto not_supported; 2395 break; 2396 } 2397 /* Ok lets load the list of all known PMC's */ 2398 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2399 if (valid_pmcs == NULL) { 2400 /* Likely */ 2401 pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2402 sz = sizeof(char *) * pmc_allocated_cnt; 2403 valid_pmcs = malloc(sz); 2404 if (valid_pmcs == NULL) { 2405 printf("No memory allocation fails at startup?\n"); 2406 exit(-1); 2407 } 2408 memset(valid_pmcs, 0, sz); 2409 } 2410 2411 while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2412 if (linebuf[0] != '\t') { 2413 /* sometimes headers ;-) */ 2414 continue; 2415 } 2416 len = strlen(linebuf); 2417 if (linebuf[(len-1)] == '\n') { 2418 /* Likely */ 2419 linebuf[(len-1)] = 0; 2420 } 2421 str = &linebuf[1]; 2422 len = strlen(str) + 1; 2423 valid_pmcs[valid_pmc_cnt] = malloc(len); 2424 if (valid_pmcs[valid_pmc_cnt] == NULL) { 2425 printf("No memory2 allocation fails at startup?\n"); 2426 exit(-1); 2427 } 2428 memset(valid_pmcs[valid_pmc_cnt], 0, len); 2429 strcpy(valid_pmcs[valid_pmc_cnt], str); 2430 valid_pmc_cnt++; 2431 if (valid_pmc_cnt >= pmc_allocated_cnt) { 2432 /* Got to expand -- unlikely */ 2433 char **more; 2434 2435 sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2436 more = malloc(sz); 2437 if (more == NULL) { 2438 printf("No memory3 allocation fails at startup?\n"); 2439 exit(-1); 2440 } 2441 memset(more, 0, sz); 2442 memcpy(more, valid_pmcs, sz); 2443 pmc_allocated_cnt *= 2; 2444 free(valid_pmcs); 2445 valid_pmcs = more; 2446 } 2447 } 2448 my_pclose(io, pid_of_command); 2449 return; 2450 not_supported: 2451 printf("Not supported\n"); 2452 exit(-1); 2453 } 2454 2455 static void 2456 explain_all(void) 2457 { 2458 int i; 2459 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2460 printf("-------------------------------------------------------------\n"); 2461 for(i=0; i<the_cpu.number; i++){ 2462 printf("For -e %s ", the_cpu.ents[i].name); 2463 (*the_cpu.explain)(the_cpu.ents[i].name); 2464 printf("----------------------------\n"); 2465 } 2466 } 2467 2468 static void 2469 test_for_a_pmc(const char *pmc, int out_so_far) 2470 { 2471 FILE *io; 2472 pid_t pid_of_command=0; 2473 char my_command[1024]; 2474 char line[1024]; 2475 char resp[1024]; 2476 int len, llen, i; 2477 2478 if (out_so_far < 50) { 2479 len = 50 - out_so_far; 2480 for(i=0; i<len; i++) { 2481 printf(" "); 2482 } 2483 } 2484 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2485 io = my_popen(my_command, "r", &pid_of_command); 2486 if (io == NULL) { 2487 printf("Failed -- popen fails\n"); 2488 return; 2489 } 2490 /* Setup what we expect */ 2491 len = sprintf(resp, "%s", pmc); 2492 if (fgets(line, sizeof(line), io) == NULL) { 2493 printf("Failed -- no output from pmstat\n"); 2494 goto out; 2495 } 2496 llen = strlen(line); 2497 if (line[(llen-1)] == '\n') { 2498 line[(llen-1)] = 0; 2499 llen--; 2500 } 2501 for(i=2; i<(llen-len); i++) { 2502 if (strncmp(&line[i], "ERROR", 5) == 0) { 2503 printf("Failed %s\n", line); 2504 goto out; 2505 } else if (strncmp(&line[i], resp, len) == 0) { 2506 int j, k; 2507 2508 if (fgets(line, sizeof(line), io) == NULL) { 2509 printf("Failed -- no second output from pmstat\n"); 2510 goto out; 2511 } 2512 len = strlen(line); 2513 for (j=0; j<len; j++) { 2514 if (line[j] == ' ') { 2515 j++; 2516 } else { 2517 break; 2518 } 2519 } 2520 printf("Pass"); 2521 len = strlen(&line[j]); 2522 if (len < 20) { 2523 for(k=0; k<(20-len); k++) { 2524 printf(" "); 2525 } 2526 } 2527 if (len) { 2528 printf("%s", &line[j]); 2529 } else { 2530 printf("\n"); 2531 } 2532 goto out; 2533 } 2534 } 2535 printf("Failed -- '%s' not '%s'\n", line, resp); 2536 out: 2537 my_pclose(io, pid_of_command); 2538 2539 } 2540 2541 static int 2542 add_it_to(char **vars, int cur_cnt, char *name) 2543 { 2544 int i; 2545 size_t len; 2546 for(i=0; i<cur_cnt; i++) { 2547 if (strcmp(vars[i], name) == 0) { 2548 /* Already have */ 2549 return(0); 2550 } 2551 } 2552 if (vars[cur_cnt] != NULL) { 2553 printf("Cur_cnt:%d filled with %s??\n", 2554 cur_cnt, vars[cur_cnt]); 2555 exit(-1); 2556 } 2557 /* Ok its new */ 2558 len = strlen(name) + 1; 2559 vars[cur_cnt] = malloc(len); 2560 if (vars[cur_cnt] == NULL) { 2561 printf("No memory %s\n", __FUNCTION__); 2562 exit(-1); 2563 } 2564 memset(vars[cur_cnt], 0, len); 2565 strcpy(vars[cur_cnt], name); 2566 return(1); 2567 } 2568 2569 static char * 2570 build_command_for_exp(struct expression *exp) 2571 { 2572 /* 2573 * Build the pmcstat command to handle 2574 * the passed in expression. 2575 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2576 * where NNN and QQQ represent the PMC's in the expression 2577 * uniquely.. 2578 */ 2579 char forming[1024]; 2580 int cnt_pmc, alloced_pmcs, i; 2581 struct expression *at; 2582 char **vars, *cmd; 2583 size_t mal; 2584 2585 alloced_pmcs = cnt_pmc = 0; 2586 /* first how many do we have */ 2587 at = exp; 2588 while (at) { 2589 if (at->type == TYPE_VALUE_PMC) { 2590 cnt_pmc++; 2591 } 2592 at = at->next; 2593 } 2594 if (cnt_pmc == 0) { 2595 printf("No PMC's in your expression -- nothing to do!!\n"); 2596 exit(0); 2597 } 2598 mal = cnt_pmc * sizeof(char *); 2599 vars = malloc(mal); 2600 if (vars == NULL) { 2601 printf("No memory\n"); 2602 exit(-1); 2603 } 2604 memset(vars, 0, mal); 2605 at = exp; 2606 while (at) { 2607 if (at->type == TYPE_VALUE_PMC) { 2608 if(add_it_to(vars, alloced_pmcs, at->name)) { 2609 alloced_pmcs++; 2610 } 2611 } 2612 at = at->next; 2613 } 2614 /* Now we have a unique list in vars so create our command */ 2615 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2616 for(i=0; i<alloced_pmcs; i++) { 2617 mal += strlen(vars[i]) + 4; /* var + " -s " */ 2618 } 2619 cmd = malloc((mal+2)); 2620 if (cmd == NULL) { 2621 printf("%s out of mem\n", __FUNCTION__); 2622 exit(-1); 2623 } 2624 memset(cmd, 0, (mal+2)); 2625 strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2626 at = exp; 2627 for(i=0; i<alloced_pmcs; i++) { 2628 sprintf(forming, " -s %s", vars[i]); 2629 strcat(cmd, forming); 2630 free(vars[i]); 2631 vars[i] = NULL; 2632 } 2633 free(vars); 2634 return(cmd); 2635 } 2636 2637 static int 2638 user_expr(struct counters *cpu, int pos) 2639 { 2640 int ret; 2641 double res; 2642 struct counters *var; 2643 struct expression *at; 2644 2645 at = master_exp; 2646 while (at) { 2647 if (at->type == TYPE_VALUE_PMC) { 2648 var = find_counter(cpu, at->name); 2649 if (var == NULL) { 2650 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2651 exit(-1); 2652 } 2653 if (pos != -1) { 2654 at->value = var->vals[pos] * 1.0; 2655 } else { 2656 at->value = var->sum * 1.0; 2657 } 2658 } 2659 at = at->next; 2660 } 2661 res = run_expr(master_exp, 1, NULL); 2662 ret = printf("%1.3f", res); 2663 return(ret); 2664 } 2665 2666 2667 static void 2668 set_manual_exp(struct expression *exp) 2669 { 2670 expression = user_expr; 2671 command = build_command_for_exp(exp); 2672 threshold = "User defined threshold"; 2673 } 2674 2675 static void 2676 run_tests(void) 2677 { 2678 int i, lenout; 2679 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2680 printf("------------------------------------------------------------------------\n"); 2681 for(i=0; i<valid_pmc_cnt; i++) { 2682 lenout = printf("%s", valid_pmcs[i]); 2683 fflush(stdout); 2684 test_for_a_pmc(valid_pmcs[i], lenout); 2685 } 2686 } 2687 static void 2688 list_all(void) 2689 { 2690 int i, cnt, j; 2691 printf("PMC Abbreviation\n"); 2692 printf("--------------------------------------------------------------\n"); 2693 for(i=0; i<valid_pmc_cnt; i++) { 2694 cnt = printf("%s", valid_pmcs[i]); 2695 for(j=cnt; j<52; j++) { 2696 printf(" "); 2697 } 2698 printf("%%%d\n", i); 2699 } 2700 } 2701 2702 2703 int 2704 main(int argc, char **argv) 2705 { 2706 int i, j, cnt; 2707 char *filename=NULL; 2708 char *name=NULL; 2709 int help_only = 0; 2710 int test_mode = 0; 2711 2712 get_cpuid_set(); 2713 memset(glob_cpu, 0, sizeof(glob_cpu)); 2714 while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) { 2715 switch (i) { 2716 case 'L': 2717 list_all(); 2718 return(0); 2719 case 'H': 2720 printf("**********************************\n"); 2721 explain_all(); 2722 printf("**********************************\n"); 2723 return(0); 2724 break; 2725 case 'T': 2726 test_mode = 1; 2727 break; 2728 case 'E': 2729 master_exp = parse_expression(optarg); 2730 if (master_exp) { 2731 set_manual_exp(master_exp); 2732 } 2733 break; 2734 case 'e': 2735 if (validate_expression(optarg)) { 2736 printf("Unknown expression %s\n", optarg); 2737 return(0); 2738 } 2739 name = optarg; 2740 set_expression(optarg); 2741 break; 2742 case 'm': 2743 max_to_collect = strtol(optarg, NULL, 0); 2744 if (max_to_collect > MAX_COUNTER_SLOTS) { 2745 /* You can't collect more than max in array */ 2746 max_to_collect = MAX_COUNTER_SLOTS; 2747 } 2748 break; 2749 case 'v': 2750 verbose++; 2751 break; 2752 case 'h': 2753 help_only = 1; 2754 break; 2755 case 'i': 2756 filename = optarg; 2757 break; 2758 case '?': 2759 default: 2760 use: 2761 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2762 argv[0]); 2763 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2764 printf("-v -- verbose dump debug type things -- you don't want this\n"); 2765 printf("-m N -- maximum to collect is N measurments\n"); 2766 printf("-e expr-name -- Do expression expr-name\n"); 2767 printf("-E 'your expression' -- Do your expression\n"); 2768 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2769 printf("-H -- Don't run anything, just explain all canned expressions\n"); 2770 printf("-T -- Test all PMC's defined by this processor\n"); 2771 return(0); 2772 break; 2773 }; 2774 } 2775 if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) { 2776 printf("Without setting an expression we cannot dynamically gather information\n"); 2777 printf("you must supply a filename (and you probably want verbosity)\n"); 2778 goto use; 2779 } 2780 if (test_mode) { 2781 run_tests(); 2782 return(0); 2783 } 2784 printf("*********************************\n"); 2785 if (master_exp == NULL) { 2786 (*the_cpu.explain)(name); 2787 } else { 2788 printf("Examine your expression "); 2789 print_exp(master_exp); 2790 printf("User defined threshold\n"); 2791 } 2792 if (help_only) { 2793 return(0); 2794 } 2795 process_file(filename); 2796 if (verbose >= 2) { 2797 for (i=0; i<ncnts; i++) { 2798 printf("Counter:%s cpu:%d index:%d\n", 2799 cnts[i].counter_name, 2800 cnts[i].cpu, i); 2801 for(j=0; j<cnts[i].pos; j++) { 2802 printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2803 } 2804 printf(" sum - %ld\n", (long int)cnts[i].sum); 2805 } 2806 } 2807 if (expression == NULL) { 2808 return(0); 2809 } 2810 for(i=0, cnt=0; i<MAX_CPU; i++) { 2811 if (glob_cpu[i]) { 2812 do_expression(glob_cpu[i], -1); 2813 cnt++; 2814 if (cnt == cpu_count_out) { 2815 printf("\n"); 2816 break; 2817 } else { 2818 printf("\t"); 2819 } 2820 } 2821 } 2822 return(0); 2823 } 2824