1 /*- 2 * Copyright (c) 2014-2015 Netflix, Inc. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer, 9 * in this position and unchanged. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 #include <sys/types.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <unistd.h> 31 #include <string.h> 32 #include <strings.h> 33 #include <sys/errno.h> 34 #include <signal.h> 35 #include <sys/wait.h> 36 #include <getopt.h> 37 #include "eval_expr.h" 38 static int max_pmc_counters = 1; 39 static int run_all = 0; 40 41 #define MAX_COUNTER_SLOTS 1024 42 #define MAX_NLEN 64 43 #define MAX_CPU 64 44 static int verbose = 0; 45 46 extern char **environ; 47 extern struct expression *master_exp; 48 struct expression *master_exp=NULL; 49 50 #define PMC_INITIAL_ALLOC 512 51 extern char **valid_pmcs; 52 char **valid_pmcs = NULL; 53 extern int valid_pmc_cnt; 54 int valid_pmc_cnt=0; 55 extern int pmc_allocated_cnt; 56 int pmc_allocated_cnt=0; 57 58 /* 59 * The following two varients on popen and pclose with 60 * the cavet that they get you the PID so that you 61 * can supply it to pclose so it can send a SIGTERM 62 * to the process. 63 */ 64 static FILE * 65 my_popen(const char *command, const char *dir, pid_t *p_pid) 66 { 67 FILE *io_out, *io_in; 68 int pdesin[2], pdesout[2]; 69 char *argv[4]; 70 pid_t pid; 71 char cmd[4]; 72 char cmd2[1024]; 73 char arg1[4]; 74 75 if ((strcmp(dir, "r") != 0) && 76 (strcmp(dir, "w") != 0)) { 77 errno = EINVAL; 78 return(NULL); 79 } 80 if (pipe(pdesin) < 0) 81 return (NULL); 82 83 if (pipe(pdesout) < 0) { 84 (void)close(pdesin[0]); 85 (void)close(pdesin[1]); 86 return (NULL); 87 } 88 strcpy(cmd, "sh"); 89 strcpy(arg1, "-c"); 90 strcpy(cmd2, command); 91 argv[0] = cmd; 92 argv[1] = arg1; 93 argv[2] = cmd2; 94 argv[3] = NULL; 95 96 switch (pid = fork()) { 97 case -1: /* Error. */ 98 (void)close(pdesin[0]); 99 (void)close(pdesin[1]); 100 (void)close(pdesout[0]); 101 (void)close(pdesout[1]); 102 return (NULL); 103 /* NOTREACHED */ 104 case 0: /* Child. */ 105 /* Close out un-used sides */ 106 (void)close(pdesin[1]); 107 (void)close(pdesout[0]); 108 /* Now prepare the stdin of the process */ 109 close(0); 110 (void)dup(pdesin[0]); 111 (void)close(pdesin[0]); 112 /* Now prepare the stdout of the process */ 113 close(1); 114 (void)dup(pdesout[1]); 115 /* And lets do stderr just in case */ 116 close(2); 117 (void)dup(pdesout[1]); 118 (void)close(pdesout[1]); 119 /* Now run it */ 120 execve("/bin/sh", argv, environ); 121 exit(127); 122 /* NOTREACHED */ 123 } 124 /* Parent; assume fdopen can't fail. */ 125 /* Store the pid */ 126 *p_pid = pid; 127 if (strcmp(dir, "r") != 0) { 128 io_out = fdopen(pdesin[1], "w"); 129 (void)close(pdesin[0]); 130 (void)close(pdesout[0]); 131 (void)close(pdesout[1]); 132 return(io_out); 133 } else { 134 /* Prepare the input stream */ 135 io_in = fdopen(pdesout[0], "r"); 136 (void)close(pdesout[1]); 137 (void)close(pdesin[0]); 138 (void)close(pdesin[1]); 139 return (io_in); 140 } 141 } 142 143 /* 144 * pclose -- 145 * Pclose returns -1 if stream is not associated with a `popened' command, 146 * if already `pclosed', or waitpid returns an error. 147 */ 148 static void 149 my_pclose(FILE *io, pid_t the_pid) 150 { 151 int pstat; 152 pid_t pid; 153 154 /* 155 * Find the appropriate file pointer and remove it from the list. 156 */ 157 (void)fclose(io); 158 /* Die if you are not dead! */ 159 kill(the_pid, SIGTERM); 160 do { 161 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 162 } while (pid == -1 && errno == EINTR); 163 } 164 165 struct counters { 166 struct counters *next_cpu; 167 char counter_name[MAX_NLEN]; /* Name of counter */ 168 int cpu; /* CPU we are on */ 169 int pos; /* Index we are filling to. */ 170 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 171 uint64_t sum; /* Summary of entries */ 172 }; 173 174 extern struct counters *glob_cpu[MAX_CPU]; 175 struct counters *glob_cpu[MAX_CPU]; 176 177 extern struct counters *cnts; 178 struct counters *cnts=NULL; 179 180 extern int ncnts; 181 int ncnts=0; 182 183 extern int (*expression)(struct counters *, int); 184 int (*expression)(struct counters *, int); 185 186 static const char *threshold=NULL; 187 static const char *command; 188 189 struct cpu_entry { 190 const char *name; 191 const char *thresh; 192 const char *command; 193 int (*func)(struct counters *, int); 194 int counters_required; 195 }; 196 197 struct cpu_type { 198 char cputype[32]; 199 int number; 200 struct cpu_entry *ents; 201 void (*explain)(const char *name); 202 }; 203 extern struct cpu_type the_cpu; 204 struct cpu_type the_cpu; 205 206 static void 207 explain_name_sb(const char *name) 208 { 209 const char *mythresh; 210 if (strcmp(name, "allocstall1") == 0) { 211 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 212 mythresh = "thresh > .05"; 213 } else if (strcmp(name, "allocstall2") == 0) { 214 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 215 mythresh = "thresh > .05"; 216 } else if (strcmp(name, "br_miss") == 0) { 217 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 218 mythresh = "thresh >= .2"; 219 } else if (strcmp(name, "splitload") == 0) { 220 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 221 mythresh = "thresh >= .1"; 222 } else if (strcmp(name, "splitstore") == 0) { 223 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 224 mythresh = "thresh >= .01"; 225 } else if (strcmp(name, "contested") == 0) { 226 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 227 mythresh = "thresh >= .05"; 228 } else if (strcmp(name, "blockstorefwd") == 0) { 229 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 230 mythresh = "thresh >= .05"; 231 } else if (strcmp(name, "cache2") == 0) { 232 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 233 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 234 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 235 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 236 mythresh = "thresh >= .2"; 237 } else if (strcmp(name, "cache1") == 0) { 238 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 239 mythresh = "thresh >= .2"; 240 } else if (strcmp(name, "dtlbmissload") == 0) { 241 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 242 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 243 mythresh = "thresh >= .1"; 244 } else if (strcmp(name, "frontendstall") == 0) { 245 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 246 mythresh = "thresh >= .15"; 247 } else if (strcmp(name, "clears") == 0) { 248 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 249 printf(" MACHINE_CLEARS.SMC + \n"); 250 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 251 mythresh = "thresh >= .02"; 252 } else if (strcmp(name, "microassist") == 0) { 253 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 254 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 255 mythresh = "thresh >= .05"; 256 } else if (strcmp(name, "aliasing_4k") == 0) { 257 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 258 mythresh = "thresh >= .1"; 259 } else if (strcmp(name, "fpassist") == 0) { 260 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 261 mythresh = "look for a excessive value"; 262 } else if (strcmp(name, "otherassistavx") == 0) { 263 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 264 mythresh = "look for a excessive value"; 265 } else if (strcmp(name, "otherassistsse") == 0) { 266 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267 mythresh = "look for a excessive value"; 268 } else if (strcmp(name, "eff1") == 0) { 269 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 270 mythresh = "thresh < .9"; 271 } else if (strcmp(name, "eff2") == 0) { 272 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 273 mythresh = "thresh > 1.0"; 274 } else if (strcmp(name, "dtlbmissstore") == 0) { 275 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 276 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 277 mythresh = "thresh >= .05"; 278 } else { 279 printf("Unknown name:%s\n", name); 280 mythresh = "unknown entry"; 281 } 282 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 283 } 284 285 static void 286 explain_name_ib(const char *name) 287 { 288 const char *mythresh; 289 if (strcmp(name, "br_miss") == 0) { 290 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 291 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 292 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 293 mythresh = "thresh >= .2"; 294 } else if (strcmp(name, "eff1") == 0) { 295 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 296 mythresh = "thresh < .9"; 297 } else if (strcmp(name, "eff2") == 0) { 298 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 299 mythresh = "thresh > 1.0"; 300 } else if (strcmp(name, "cache1") == 0) { 301 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 302 mythresh = "thresh >= .2"; 303 } else if (strcmp(name, "cache2") == 0) { 304 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 305 mythresh = "thresh >= .2"; 306 } else if (strcmp(name, "itlbmiss") == 0) { 307 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 308 mythresh = "thresh > .05"; 309 } else if (strcmp(name, "icachemiss") == 0) { 310 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 311 mythresh = "thresh > .05"; 312 } else if (strcmp(name, "lcpstall") == 0) { 313 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 314 mythresh = "thresh > .05"; 315 } else if (strcmp(name, "datashare") == 0) { 316 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 317 mythresh = "thresh > .05"; 318 } else if (strcmp(name, "blockstorefwd") == 0) { 319 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 320 mythresh = "thresh >= .05"; 321 } else if (strcmp(name, "splitload") == 0) { 322 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 323 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 324 mythresh = "thresh >= .1"; 325 } else if (strcmp(name, "splitstore") == 0) { 326 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 327 mythresh = "thresh >= .01"; 328 } else if (strcmp(name, "aliasing_4k") == 0) { 329 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 330 mythresh = "thresh >= .1"; 331 } else if (strcmp(name, "dtlbmissload") == 0) { 332 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 333 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 334 mythresh = "thresh >= .1"; 335 } else if (strcmp(name, "dtlbmissstore") == 0) { 336 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 337 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 338 mythresh = "thresh >= .05"; 339 } else if (strcmp(name, "contested") == 0) { 340 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 341 mythresh = "thresh >= .05"; 342 } else if (strcmp(name, "clears") == 0) { 343 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 344 printf(" MACHINE_CLEARS.SMC + \n"); 345 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 346 mythresh = "thresh >= .02"; 347 } else if (strcmp(name, "microassist") == 0) { 348 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 349 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 350 mythresh = "thresh >= .05"; 351 } else if (strcmp(name, "fpassist") == 0) { 352 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 353 mythresh = "look for a excessive value"; 354 } else if (strcmp(name, "otherassistavx") == 0) { 355 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 356 mythresh = "look for a excessive value"; 357 } else if (strcmp(name, "otherassistsse") == 0) { 358 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359 mythresh = "look for a excessive value"; 360 } else { 361 printf("Unknown name:%s\n", name); 362 mythresh = "unknown entry"; 363 } 364 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 365 } 366 367 368 static void 369 explain_name_has(const char *name) 370 { 371 const char *mythresh; 372 if (strcmp(name, "eff1") == 0) { 373 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 374 mythresh = "thresh < .75"; 375 } else if (strcmp(name, "eff2") == 0) { 376 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 377 mythresh = "thresh > 1.0"; 378 } else if (strcmp(name, "itlbmiss") == 0) { 379 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 380 mythresh = "thresh > .05"; 381 } else if (strcmp(name, "icachemiss") == 0) { 382 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 383 mythresh = "thresh > .05"; 384 } else if (strcmp(name, "lcpstall") == 0) { 385 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 386 mythresh = "thresh > .05"; 387 } else if (strcmp(name, "cache1") == 0) { 388 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 389 mythresh = "thresh >= .2"; 390 } else if (strcmp(name, "cache2") == 0) { 391 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 392 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 393 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 394 printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 395 mythresh = "thresh >= .2"; 396 } else if (strcmp(name, "contested") == 0) { 397 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 398 mythresh = "thresh >= .05"; 399 } else if (strcmp(name, "datashare") == 0) { 400 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 401 mythresh = "thresh > .05"; 402 } else if (strcmp(name, "blockstorefwd") == 0) { 403 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 404 mythresh = "thresh >= .05"; 405 } else if (strcmp(name, "splitload") == 0) { 406 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 407 mythresh = "thresh >= .1"; 408 } else if (strcmp(name, "splitstore") == 0) { 409 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 410 mythresh = "thresh >= .01"; 411 } else if (strcmp(name, "aliasing_4k") == 0) { 412 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 413 mythresh = "thresh >= .1"; 414 } else if (strcmp(name, "dtlbmissload") == 0) { 415 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 416 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 417 mythresh = "thresh >= .1"; 418 } else if (strcmp(name, "br_miss") == 0) { 419 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 420 mythresh = "thresh >= .2"; 421 } else if (strcmp(name, "clears") == 0) { 422 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 423 printf(" MACHINE_CLEARS.SMC + \n"); 424 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 425 mythresh = "thresh >= .02"; 426 } else if (strcmp(name, "microassist") == 0) { 427 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 428 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 429 mythresh = "thresh >= .05"; 430 } else if (strcmp(name, "fpassist") == 0) { 431 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 432 mythresh = "look for a excessive value"; 433 } else if (strcmp(name, "otherassistavx") == 0) { 434 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 435 mythresh = "look for a excessive value"; 436 } else if (strcmp(name, "otherassistsse") == 0) { 437 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438 mythresh = "look for a excessive value"; 439 } else { 440 printf("Unknown name:%s\n", name); 441 mythresh = "unknown entry"; 442 } 443 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 444 } 445 446 447 448 static struct counters * 449 find_counter(struct counters *base, const char *name) 450 { 451 struct counters *at; 452 int len; 453 454 at = base; 455 len = strlen(name); 456 while(at) { 457 if (strncmp(at->counter_name, name, len) == 0) { 458 return(at); 459 } 460 at = at->next_cpu; 461 } 462 printf("Can't find counter %s\n", name); 463 printf("We have:\n"); 464 at = base; 465 while(at) { 466 printf("- %s\n", at->counter_name); 467 at = at->next_cpu; 468 } 469 exit(-1); 470 } 471 472 static int 473 allocstall1(struct counters *cpu, int pos) 474 { 475 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 476 int ret; 477 struct counters *partial; 478 struct counters *unhalt; 479 double un, par, res; 480 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 481 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 482 if (pos != -1) { 483 par = partial->vals[pos] * 1.0; 484 un = unhalt->vals[pos] * 1.0; 485 } else { 486 par = partial->sum * 1.0; 487 un = unhalt->sum * 1.0; 488 } 489 res = par/un; 490 ret = printf("%1.3f", res); 491 return(ret); 492 } 493 494 static int 495 allocstall2(struct counters *cpu, int pos) 496 { 497 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 498 int ret; 499 struct counters *partial; 500 struct counters *unhalt; 501 double un, par, res; 502 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 503 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 504 if (pos != -1) { 505 par = partial->vals[pos] * 1.0; 506 un = unhalt->vals[pos] * 1.0; 507 } else { 508 par = partial->sum * 1.0; 509 un = unhalt->sum * 1.0; 510 } 511 res = par/un; 512 ret = printf("%1.3f", res); 513 return(ret); 514 } 515 516 static int 517 br_mispredict(struct counters *cpu, int pos) 518 { 519 struct counters *brctr; 520 struct counters *unhalt; 521 int ret; 522 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 523 double br, un, con, res; 524 con = 20.0; 525 526 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 527 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 528 if (pos != -1) { 529 br = brctr->vals[pos] * 1.0; 530 un = unhalt->vals[pos] * 1.0; 531 } else { 532 br = brctr->sum * 1.0; 533 un = unhalt->sum * 1.0; 534 } 535 res = (con * br)/un; 536 ret = printf("%1.3f", res); 537 return(ret); 538 } 539 540 static int 541 br_mispredictib(struct counters *cpu, int pos) 542 { 543 struct counters *brctr; 544 struct counters *unhalt; 545 struct counters *clear, *clear2, *clear3; 546 struct counters *uops; 547 struct counters *recv; 548 struct counters *iss; 549 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 550 int ret; 551 /* 552 * (BR_MISP_RETIRED.ALL_BRANCHES / 553 * (BR_MISP_RETIRED.ALL_BRANCHES + 554 * MACHINE_CLEAR.COUNT) * 555 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 556 * 557 */ 558 double br, cl, cl2, cl3, uo, re, un, con, res, is; 559 con = 4.0; 560 561 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 562 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 563 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 564 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 565 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 566 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 567 iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 568 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 569 if (pos != -1) { 570 br = brctr->vals[pos] * 1.0; 571 cl = clear->vals[pos] * 1.0; 572 cl2 = clear2->vals[pos] * 1.0; 573 cl3 = clear3->vals[pos] * 1.0; 574 uo = uops->vals[pos] * 1.0; 575 re = recv->vals[pos] * 1.0; 576 is = iss->vals[pos] * 1.0; 577 un = unhalt->vals[pos] * 1.0; 578 } else { 579 br = brctr->sum * 1.0; 580 cl = clear->sum * 1.0; 581 cl2 = clear2->sum * 1.0; 582 cl3 = clear3->sum * 1.0; 583 uo = uops->sum * 1.0; 584 re = recv->sum * 1.0; 585 is = iss->sum * 1.0; 586 un = unhalt->sum * 1.0; 587 } 588 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 589 ret = printf("%1.3f", res); 590 return(ret); 591 } 592 593 594 static int 595 br_mispredict_broad(struct counters *cpu, int pos) 596 { 597 struct counters *brctr; 598 struct counters *unhalt; 599 struct counters *clear; 600 struct counters *uops; 601 struct counters *uops_ret; 602 struct counters *recv; 603 int ret; 604 double br, cl, uo, uo_r, re, con, un, res; 605 606 con = 4.0; 607 608 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 609 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 610 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 611 uops = find_counter(cpu, "UOPS_ISSUED.ANY"); 612 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 613 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 614 615 if (pos != -1) { 616 un = unhalt->vals[pos] * 1.0; 617 br = brctr->vals[pos] * 1.0; 618 cl = clear->vals[pos] * 1.0; 619 uo = uops->vals[pos] * 1.0; 620 uo_r = uops_ret->vals[pos] * 1.0; 621 re = recv->vals[pos] * 1.0; 622 } else { 623 un = unhalt->sum * 1.0; 624 br = brctr->sum * 1.0; 625 cl = clear->sum * 1.0; 626 uo = uops->sum * 1.0; 627 uo_r = uops_ret->sum * 1.0; 628 re = recv->sum * 1.0; 629 } 630 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); 631 ret = printf("%1.3f", res); 632 return(ret); 633 } 634 635 static int 636 splitloadib(struct counters *cpu, int pos) 637 { 638 int ret; 639 struct counters *mem; 640 struct counters *l1d, *ldblock; 641 struct counters *unhalt; 642 double un, memd, res, l1, ldb; 643 /* 644 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 645 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 646 */ 647 648 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 649 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 650 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 651 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 652 if (pos != -1) { 653 memd = mem->vals[pos] * 1.0; 654 l1 = l1d->vals[pos] * 1.0; 655 ldb = ldblock->vals[pos] * 1.0; 656 un = unhalt->vals[pos] * 1.0; 657 } else { 658 memd = mem->sum * 1.0; 659 l1 = l1d->sum * 1.0; 660 ldb = ldblock->sum * 1.0; 661 un = unhalt->sum * 1.0; 662 } 663 res = ((l1 / memd) * ldb)/un; 664 ret = printf("%1.3f", res); 665 return(ret); 666 } 667 668 669 static int 670 splitload(struct counters *cpu, int pos) 671 { 672 int ret; 673 struct counters *mem; 674 struct counters *unhalt; 675 double con, un, memd, res; 676 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 677 678 con = 5.0; 679 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 680 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); 681 if (pos != -1) { 682 memd = mem->vals[pos] * 1.0; 683 un = unhalt->vals[pos] * 1.0; 684 } else { 685 memd = mem->sum * 1.0; 686 un = unhalt->sum * 1.0; 687 } 688 res = (memd * con)/un; 689 ret = printf("%1.3f", res); 690 return(ret); 691 } 692 693 694 static int 695 splitload_sb(struct counters *cpu, int pos) 696 { 697 int ret; 698 struct counters *mem; 699 struct counters *unhalt; 700 double con, un, memd, res; 701 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 702 703 con = 5.0; 704 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 705 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 706 if (pos != -1) { 707 memd = mem->vals[pos] * 1.0; 708 un = unhalt->vals[pos] * 1.0; 709 } else { 710 memd = mem->sum * 1.0; 711 un = unhalt->sum * 1.0; 712 } 713 res = (memd * con)/un; 714 ret = printf("%1.3f", res); 715 return(ret); 716 } 717 718 719 static int 720 splitstore_sb(struct counters *cpu, int pos) 721 { 722 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 723 int ret; 724 struct counters *mem_split; 725 struct counters *mem_stores; 726 double memsplit, memstore, res; 727 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 728 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 729 if (pos != -1) { 730 memsplit = mem_split->vals[pos] * 1.0; 731 memstore = mem_stores->vals[pos] * 1.0; 732 } else { 733 memsplit = mem_split->sum * 1.0; 734 memstore = mem_stores->sum * 1.0; 735 } 736 res = memsplit/memstore; 737 ret = printf("%1.3f", res); 738 return(ret); 739 } 740 741 742 743 static int 744 splitstore(struct counters *cpu, int pos) 745 { 746 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ 747 int ret; 748 struct counters *mem_split; 749 struct counters *mem_stores; 750 double memsplit, memstore, res; 751 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); 752 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); 753 if (pos != -1) { 754 memsplit = mem_split->vals[pos] * 1.0; 755 memstore = mem_stores->vals[pos] * 1.0; 756 } else { 757 memsplit = mem_split->sum * 1.0; 758 memstore = mem_stores->sum * 1.0; 759 } 760 res = memsplit/memstore; 761 ret = printf("%1.3f", res); 762 return(ret); 763 } 764 765 766 static int 767 contested(struct counters *cpu, int pos) 768 { 769 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 770 int ret; 771 struct counters *mem; 772 struct counters *unhalt; 773 double con, un, memd, res; 774 775 con = 60.0; 776 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 777 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 778 if (pos != -1) { 779 memd = mem->vals[pos] * 1.0; 780 un = unhalt->vals[pos] * 1.0; 781 } else { 782 memd = mem->sum * 1.0; 783 un = unhalt->sum * 1.0; 784 } 785 res = (memd * con)/un; 786 ret = printf("%1.3f", res); 787 return(ret); 788 } 789 790 static int 791 contested_has(struct counters *cpu, int pos) 792 { 793 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 794 int ret; 795 struct counters *mem; 796 struct counters *unhalt; 797 double con, un, memd, res; 798 799 con = 84.0; 800 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 801 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 802 if (pos != -1) { 803 memd = mem->vals[pos] * 1.0; 804 un = unhalt->vals[pos] * 1.0; 805 } else { 806 memd = mem->sum * 1.0; 807 un = unhalt->sum * 1.0; 808 } 809 res = (memd * con)/un; 810 ret = printf("%1.3f", res); 811 return(ret); 812 } 813 814 static int 815 contestedbroad(struct counters *cpu, int pos) 816 { 817 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 818 int ret; 819 struct counters *mem; 820 struct counters *mem2; 821 struct counters *unhalt; 822 double con, un, memd, memtoo, res; 823 824 con = 84.0; 825 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 826 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 827 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); 828 829 if (pos != -1) { 830 memd = mem->vals[pos] * 1.0; 831 memtoo = mem2->vals[pos] * 1.0; 832 un = unhalt->vals[pos] * 1.0; 833 } else { 834 memd = mem->sum * 1.0; 835 memtoo = mem2->sum * 1.0; 836 un = unhalt->sum * 1.0; 837 } 838 res = ((memd * con) + memtoo)/un; 839 ret = printf("%1.3f", res); 840 return(ret); 841 } 842 843 844 static int 845 blockstoreforward(struct counters *cpu, int pos) 846 { 847 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 848 int ret; 849 struct counters *ldb; 850 struct counters *unhalt; 851 double con, un, ld, res; 852 853 con = 13.0; 854 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 855 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 856 if (pos != -1) { 857 ld = ldb->vals[pos] * 1.0; 858 un = unhalt->vals[pos] * 1.0; 859 } else { 860 ld = ldb->sum * 1.0; 861 un = unhalt->sum * 1.0; 862 } 863 res = (ld * con)/un; 864 ret = printf("%1.3f", res); 865 return(ret); 866 } 867 868 static int 869 cache2(struct counters *cpu, int pos) 870 { 871 /* ** Suspect *** 872 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 873 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 874 */ 875 int ret; 876 struct counters *mem1, *mem2, *mem3; 877 struct counters *unhalt; 878 double con1, con2, con3, un, me_1, me_2, me_3, res; 879 880 con1 = 26.0; 881 con2 = 43.0; 882 con3 = 60.0; 883 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 884 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 885 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 886 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 887 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 888 if (pos != -1) { 889 me_1 = mem1->vals[pos] * 1.0; 890 me_2 = mem2->vals[pos] * 1.0; 891 me_3 = mem3->vals[pos] * 1.0; 892 un = unhalt->vals[pos] * 1.0; 893 } else { 894 me_1 = mem1->sum * 1.0; 895 me_2 = mem2->sum * 1.0; 896 me_3 = mem3->sum * 1.0; 897 un = unhalt->sum * 1.0; 898 } 899 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 900 ret = printf("%1.3f", res); 901 return(ret); 902 } 903 904 static int 905 datasharing(struct counters *cpu, int pos) 906 { 907 /* 908 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 909 */ 910 int ret; 911 struct counters *mem; 912 struct counters *unhalt; 913 double con, res, me, un; 914 915 con = 43.0; 916 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 917 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 918 if (pos != -1) { 919 me = mem->vals[pos] * 1.0; 920 un = unhalt->vals[pos] * 1.0; 921 } else { 922 me = mem->sum * 1.0; 923 un = unhalt->sum * 1.0; 924 } 925 res = (me * con)/un; 926 ret = printf("%1.3f", res); 927 return(ret); 928 929 } 930 931 932 static int 933 datasharing_has(struct counters *cpu, int pos) 934 { 935 /* 936 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 937 */ 938 int ret; 939 struct counters *mem; 940 struct counters *unhalt; 941 double con, res, me, un; 942 943 con = 72.0; 944 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 945 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 946 if (pos != -1) { 947 me = mem->vals[pos] * 1.0; 948 un = unhalt->vals[pos] * 1.0; 949 } else { 950 me = mem->sum * 1.0; 951 un = unhalt->sum * 1.0; 952 } 953 res = (me * con)/un; 954 ret = printf("%1.3f", res); 955 return(ret); 956 957 } 958 959 960 static int 961 cache2ib(struct counters *cpu, int pos) 962 { 963 /* 964 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 965 */ 966 int ret; 967 struct counters *mem; 968 struct counters *unhalt; 969 double con, un, me, res; 970 971 con = 29.0; 972 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 973 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 974 if (pos != -1) { 975 me = mem->vals[pos] * 1.0; 976 un = unhalt->vals[pos] * 1.0; 977 } else { 978 me = mem->sum * 1.0; 979 un = unhalt->sum * 1.0; 980 } 981 res = (con * me)/un; 982 ret = printf("%1.3f", res); 983 return(ret); 984 } 985 986 static int 987 cache2has(struct counters *cpu, int pos) 988 { 989 /* 990 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 991 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 992 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 993 * / CPU_CLK_UNHALTED.THREAD_P 994 */ 995 int ret; 996 struct counters *mem1, *mem2, *mem3; 997 struct counters *unhalt; 998 double con1, con2, con3, un, me1, me2, me3, res; 999 1000 con1 = 36.0; 1001 con2 = 72.0; 1002 con3 = 84.0; 1003 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1004 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 1005 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 1006 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 1007 if (pos != -1) { 1008 me1 = mem1->vals[pos] * 1.0; 1009 me2 = mem2->vals[pos] * 1.0; 1010 me3 = mem3->vals[pos] * 1.0; 1011 un = unhalt->vals[pos] * 1.0; 1012 } else { 1013 me1 = mem1->sum * 1.0; 1014 me2 = mem2->sum * 1.0; 1015 me3 = mem3->sum * 1.0; 1016 un = unhalt->sum * 1.0; 1017 } 1018 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 1019 ret = printf("%1.3f", res); 1020 return(ret); 1021 } 1022 1023 1024 static int 1025 cache2broad(struct counters *cpu, int pos) 1026 { 1027 /* 1028 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 1029 */ 1030 int ret; 1031 struct counters *mem; 1032 struct counters *unhalt; 1033 double con, un, me, res; 1034 1035 con = 36.0; 1036 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1037 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); 1038 if (pos != -1) { 1039 me = mem->vals[pos] * 1.0; 1040 un = unhalt->vals[pos] * 1.0; 1041 } else { 1042 me = mem->sum * 1.0; 1043 un = unhalt->sum * 1.0; 1044 } 1045 res = (con * me)/un; 1046 ret = printf("%1.3f", res); 1047 return(ret); 1048 } 1049 1050 1051 static int 1052 cache1(struct counters *cpu, int pos) 1053 { 1054 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1055 int ret; 1056 struct counters *mem; 1057 struct counters *unhalt; 1058 double con, un, me, res; 1059 1060 con = 180.0; 1061 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1062 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 1063 if (pos != -1) { 1064 me = mem->vals[pos] * 1.0; 1065 un = unhalt->vals[pos] * 1.0; 1066 } else { 1067 me = mem->sum * 1.0; 1068 un = unhalt->sum * 1.0; 1069 } 1070 res = (me * con)/un; 1071 ret = printf("%1.3f", res); 1072 return(ret); 1073 } 1074 1075 static int 1076 cache1ib(struct counters *cpu, int pos) 1077 { 1078 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1079 int ret; 1080 struct counters *mem; 1081 struct counters *unhalt; 1082 double con, un, me, res; 1083 1084 con = 180.0; 1085 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1086 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 1087 if (pos != -1) { 1088 me = mem->vals[pos] * 1.0; 1089 un = unhalt->vals[pos] * 1.0; 1090 } else { 1091 me = mem->sum * 1.0; 1092 un = unhalt->sum * 1.0; 1093 } 1094 res = (me * con)/un; 1095 ret = printf("%1.3f", res); 1096 return(ret); 1097 } 1098 1099 1100 static int 1101 cache1broad(struct counters *cpu, int pos) 1102 { 1103 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1104 int ret; 1105 struct counters *mem; 1106 struct counters *unhalt; 1107 double con, un, me, res; 1108 1109 con = 180.0; 1110 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1111 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); 1112 if (pos != -1) { 1113 me = mem->vals[pos] * 1.0; 1114 un = unhalt->vals[pos] * 1.0; 1115 } else { 1116 me = mem->sum * 1.0; 1117 un = unhalt->sum * 1.0; 1118 } 1119 res = (me * con)/un; 1120 ret = printf("%1.3f", res); 1121 return(ret); 1122 } 1123 1124 1125 static int 1126 dtlb_missload(struct counters *cpu, int pos) 1127 { 1128 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 1129 int ret; 1130 struct counters *dtlb_m, *dtlb_d; 1131 struct counters *unhalt; 1132 double con, un, d1, d2, res; 1133 1134 con = 7.0; 1135 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1136 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 1137 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 1138 if (pos != -1) { 1139 d1 = dtlb_m->vals[pos] * 1.0; 1140 d2 = dtlb_d->vals[pos] * 1.0; 1141 un = unhalt->vals[pos] * 1.0; 1142 } else { 1143 d1 = dtlb_m->sum * 1.0; 1144 d2 = dtlb_d->sum * 1.0; 1145 un = unhalt->sum * 1.0; 1146 } 1147 res = ((d1 * con) + d2)/un; 1148 ret = printf("%1.3f", res); 1149 return(ret); 1150 } 1151 1152 static int 1153 dtlb_missstore(struct counters *cpu, int pos) 1154 { 1155 /* 1156 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 1157 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 1158 */ 1159 int ret; 1160 struct counters *dtsb_m, *dtsb_d; 1161 struct counters *unhalt; 1162 double con, un, d1, d2, res; 1163 1164 con = 7.0; 1165 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1166 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 1167 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 1168 if (pos != -1) { 1169 d1 = dtsb_m->vals[pos] * 1.0; 1170 d2 = dtsb_d->vals[pos] * 1.0; 1171 un = unhalt->vals[pos] * 1.0; 1172 } else { 1173 d1 = dtsb_m->sum * 1.0; 1174 d2 = dtsb_d->sum * 1.0; 1175 un = unhalt->sum * 1.0; 1176 } 1177 res = ((d1 * con) + d2)/un; 1178 ret = printf("%1.3f", res); 1179 return(ret); 1180 } 1181 1182 static int 1183 itlb_miss(struct counters *cpu, int pos) 1184 { 1185 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1186 int ret; 1187 struct counters *itlb; 1188 struct counters *unhalt; 1189 double un, d1, res; 1190 1191 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1192 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1193 if (pos != -1) { 1194 d1 = itlb->vals[pos] * 1.0; 1195 un = unhalt->vals[pos] * 1.0; 1196 } else { 1197 d1 = itlb->sum * 1.0; 1198 un = unhalt->sum * 1.0; 1199 } 1200 res = d1/un; 1201 ret = printf("%1.3f", res); 1202 return(ret); 1203 } 1204 1205 1206 static int 1207 itlb_miss_broad(struct counters *cpu, int pos) 1208 { 1209 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ 1210 int ret; 1211 struct counters *itlb; 1212 struct counters *unhalt; 1213 struct counters *four_k; 1214 double un, d1, res, k; 1215 1216 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1217 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1218 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); 1219 if (pos != -1) { 1220 d1 = itlb->vals[pos] * 1.0; 1221 un = unhalt->vals[pos] * 1.0; 1222 k = four_k->vals[pos] * 1.0; 1223 } else { 1224 d1 = itlb->sum * 1.0; 1225 un = unhalt->sum * 1.0; 1226 k = four_k->sum * 1.0; 1227 } 1228 res = (7.0 * k + d1)/un; 1229 ret = printf("%1.3f", res); 1230 return(ret); 1231 } 1232 1233 1234 static int 1235 icache_miss(struct counters *cpu, int pos) 1236 { 1237 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1238 1239 int ret; 1240 struct counters *itlb, *icache; 1241 struct counters *unhalt; 1242 double un, d1, ic, res; 1243 1244 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1245 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1246 icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1247 if (pos != -1) { 1248 d1 = itlb->vals[pos] * 1.0; 1249 ic = icache->vals[pos] * 1.0; 1250 un = unhalt->vals[pos] * 1.0; 1251 } else { 1252 d1 = itlb->sum * 1.0; 1253 ic = icache->sum * 1.0; 1254 un = unhalt->sum * 1.0; 1255 } 1256 res = (ic-d1)/un; 1257 ret = printf("%1.3f", res); 1258 return(ret); 1259 1260 } 1261 1262 static int 1263 icache_miss_has(struct counters *cpu, int pos) 1264 { 1265 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1266 1267 int ret; 1268 struct counters *icache; 1269 struct counters *unhalt; 1270 double un, con, ic, res; 1271 1272 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1273 icache = find_counter(cpu, "ICACHE.MISSES"); 1274 con = 36.0; 1275 if (pos != -1) { 1276 ic = icache->vals[pos] * 1.0; 1277 un = unhalt->vals[pos] * 1.0; 1278 } else { 1279 ic = icache->sum * 1.0; 1280 un = unhalt->sum * 1.0; 1281 } 1282 res = (con * ic)/un; 1283 ret = printf("%1.3f", res); 1284 return(ret); 1285 1286 } 1287 1288 static int 1289 lcp_stall(struct counters *cpu, int pos) 1290 { 1291 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1292 int ret; 1293 struct counters *ild; 1294 struct counters *unhalt; 1295 double un, d1, res; 1296 1297 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1298 ild = find_counter(cpu, "ILD_STALL.LCP"); 1299 if (pos != -1) { 1300 d1 = ild->vals[pos] * 1.0; 1301 un = unhalt->vals[pos] * 1.0; 1302 } else { 1303 d1 = ild->sum * 1.0; 1304 un = unhalt->sum * 1.0; 1305 } 1306 res = d1/un; 1307 ret = printf("%1.3f", res); 1308 return(ret); 1309 1310 } 1311 1312 1313 static int 1314 frontendstall(struct counters *cpu, int pos) 1315 { 1316 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1317 int ret; 1318 struct counters *idq; 1319 struct counters *unhalt; 1320 double con, un, id, res; 1321 1322 con = 4.0; 1323 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1324 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1325 if (pos != -1) { 1326 id = idq->vals[pos] * 1.0; 1327 un = unhalt->vals[pos] * 1.0; 1328 } else { 1329 id = idq->sum * 1.0; 1330 un = unhalt->sum * 1.0; 1331 } 1332 res = id/(un * con); 1333 ret = printf("%1.3f", res); 1334 return(ret); 1335 } 1336 1337 static int 1338 clears(struct counters *cpu, int pos) 1339 { 1340 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1341 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1342 1343 int ret; 1344 struct counters *clr1, *clr2, *clr3; 1345 struct counters *unhalt; 1346 double con, un, cl1, cl2, cl3, res; 1347 1348 con = 100.0; 1349 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1350 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1351 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1352 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1353 1354 if (pos != -1) { 1355 cl1 = clr1->vals[pos] * 1.0; 1356 cl2 = clr2->vals[pos] * 1.0; 1357 cl3 = clr3->vals[pos] * 1.0; 1358 un = unhalt->vals[pos] * 1.0; 1359 } else { 1360 cl1 = clr1->sum * 1.0; 1361 cl2 = clr2->sum * 1.0; 1362 cl3 = clr3->sum * 1.0; 1363 un = unhalt->sum * 1.0; 1364 } 1365 res = ((cl1 + cl2 + cl3) * con)/un; 1366 ret = printf("%1.3f", res); 1367 return(ret); 1368 } 1369 1370 1371 1372 static int 1373 clears_broad(struct counters *cpu, int pos) 1374 { 1375 int ret; 1376 struct counters *clr1, *clr2, *clr3, *cyc; 1377 struct counters *unhalt; 1378 double con, un, cl1, cl2, cl3, cy, res; 1379 1380 con = 100.0; 1381 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1382 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1383 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1384 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1385 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 1386 if (pos != -1) { 1387 cl1 = clr1->vals[pos] * 1.0; 1388 cl2 = clr2->vals[pos] * 1.0; 1389 cl3 = clr3->vals[pos] * 1.0; 1390 cy = cyc->vals[pos] * 1.0; 1391 un = unhalt->vals[pos] * 1.0; 1392 } else { 1393 cl1 = clr1->sum * 1.0; 1394 cl2 = clr2->sum * 1.0; 1395 cl3 = clr3->sum * 1.0; 1396 cy = cyc->sum * 1.0; 1397 un = unhalt->sum * 1.0; 1398 } 1399 /* Formula not listed but extrapulated to add the cy ?? */ 1400 res = ((cl1 + cl2 + cl3 + cy) * con)/un; 1401 ret = printf("%1.3f", res); 1402 return(ret); 1403 } 1404 1405 1406 1407 1408 1409 static int 1410 microassist(struct counters *cpu, int pos) 1411 { 1412 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1413 int ret; 1414 struct counters *idq; 1415 struct counters *unhalt; 1416 double un, id, res, con; 1417 1418 con = 4.0; 1419 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1420 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1421 if (pos != -1) { 1422 id = idq->vals[pos] * 1.0; 1423 un = unhalt->vals[pos] * 1.0; 1424 } else { 1425 id = idq->sum * 1.0; 1426 un = unhalt->sum * 1.0; 1427 } 1428 res = id/(un * con); 1429 ret = printf("%1.3f", res); 1430 return(ret); 1431 } 1432 1433 1434 static int 1435 microassist_broad(struct counters *cpu, int pos) 1436 { 1437 int ret; 1438 struct counters *idq; 1439 struct counters *unhalt; 1440 struct counters *uopiss; 1441 struct counters *uopret; 1442 double un, id, res, con, uoi, uor; 1443 1444 con = 4.0; 1445 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1446 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1447 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); 1448 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1449 if (pos != -1) { 1450 id = idq->vals[pos] * 1.0; 1451 un = unhalt->vals[pos] * 1.0; 1452 uoi = uopiss->vals[pos] * 1.0; 1453 uor = uopret->vals[pos] * 1.0; 1454 } else { 1455 id = idq->sum * 1.0; 1456 un = unhalt->sum * 1.0; 1457 uoi = uopiss->sum * 1.0; 1458 uor = uopret->sum * 1.0; 1459 } 1460 res = (uor/uoi) * (id/(un * con)); 1461 ret = printf("%1.3f", res); 1462 return(ret); 1463 } 1464 1465 1466 static int 1467 aliasing(struct counters *cpu, int pos) 1468 { 1469 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1470 int ret; 1471 struct counters *ld; 1472 struct counters *unhalt; 1473 double un, lds, con, res; 1474 1475 con = 5.0; 1476 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1477 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1478 if (pos != -1) { 1479 lds = ld->vals[pos] * 1.0; 1480 un = unhalt->vals[pos] * 1.0; 1481 } else { 1482 lds = ld->sum * 1.0; 1483 un = unhalt->sum * 1.0; 1484 } 1485 res = (lds * con)/un; 1486 ret = printf("%1.3f", res); 1487 return(ret); 1488 } 1489 1490 static int 1491 aliasing_broad(struct counters *cpu, int pos) 1492 { 1493 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1494 int ret; 1495 struct counters *ld; 1496 struct counters *unhalt; 1497 double un, lds, con, res; 1498 1499 con = 7.0; 1500 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1501 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1502 if (pos != -1) { 1503 lds = ld->vals[pos] * 1.0; 1504 un = unhalt->vals[pos] * 1.0; 1505 } else { 1506 lds = ld->sum * 1.0; 1507 un = unhalt->sum * 1.0; 1508 } 1509 res = (lds * con)/un; 1510 ret = printf("%1.3f", res); 1511 return(ret); 1512 } 1513 1514 1515 static int 1516 fpassists(struct counters *cpu, int pos) 1517 { 1518 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1519 int ret; 1520 struct counters *fp; 1521 struct counters *inst; 1522 double un, fpd, res; 1523 1524 inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1525 fp = find_counter(cpu, "FP_ASSIST.ANY"); 1526 if (pos != -1) { 1527 fpd = fp->vals[pos] * 1.0; 1528 un = inst->vals[pos] * 1.0; 1529 } else { 1530 fpd = fp->sum * 1.0; 1531 un = inst->sum * 1.0; 1532 } 1533 res = fpd/un; 1534 ret = printf("%1.3f", res); 1535 return(ret); 1536 } 1537 1538 static int 1539 otherassistavx(struct counters *cpu, int pos) 1540 { 1541 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1542 int ret; 1543 struct counters *oth; 1544 struct counters *unhalt; 1545 double un, ot, con, res; 1546 1547 con = 75.0; 1548 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1549 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1550 if (pos != -1) { 1551 ot = oth->vals[pos] * 1.0; 1552 un = unhalt->vals[pos] * 1.0; 1553 } else { 1554 ot = oth->sum * 1.0; 1555 un = unhalt->sum * 1.0; 1556 } 1557 res = (ot * con)/un; 1558 ret = printf("%1.3f", res); 1559 return(ret); 1560 } 1561 1562 static int 1563 otherassistsse(struct counters *cpu, int pos) 1564 { 1565 1566 int ret; 1567 struct counters *oth; 1568 struct counters *unhalt; 1569 double un, ot, con, res; 1570 1571 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1572 con = 75.0; 1573 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1574 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1575 if (pos != -1) { 1576 ot = oth->vals[pos] * 1.0; 1577 un = unhalt->vals[pos] * 1.0; 1578 } else { 1579 ot = oth->sum * 1.0; 1580 un = unhalt->sum * 1.0; 1581 } 1582 res = (ot * con)/un; 1583 ret = printf("%1.3f", res); 1584 return(ret); 1585 } 1586 1587 static int 1588 efficiency1(struct counters *cpu, int pos) 1589 { 1590 1591 int ret; 1592 struct counters *uops; 1593 struct counters *unhalt; 1594 double un, ot, con, res; 1595 1596 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1597 con = 4.0; 1598 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1599 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1600 if (pos != -1) { 1601 ot = uops->vals[pos] * 1.0; 1602 un = unhalt->vals[pos] * 1.0; 1603 } else { 1604 ot = uops->sum * 1.0; 1605 un = unhalt->sum * 1.0; 1606 } 1607 res = ot/(con * un); 1608 ret = printf("%1.3f", res); 1609 return(ret); 1610 } 1611 1612 static int 1613 efficiency2(struct counters *cpu, int pos) 1614 { 1615 1616 int ret; 1617 struct counters *uops; 1618 struct counters *unhalt; 1619 double un, ot, res; 1620 1621 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1622 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1623 uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1624 if (pos != -1) { 1625 ot = uops->vals[pos] * 1.0; 1626 un = unhalt->vals[pos] * 1.0; 1627 } else { 1628 ot = uops->sum * 1.0; 1629 un = unhalt->sum * 1.0; 1630 } 1631 res = un/ot; 1632 ret = printf("%1.3f", res); 1633 return(ret); 1634 } 1635 1636 #define SANDY_BRIDGE_COUNT 20 1637 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1638 /*01*/ { "allocstall1", "thresh > .05", 1639 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1640 allocstall1, 2 }, 1641 /* -- not defined for SB right (partial-rat_stalls) 02*/ 1642 { "allocstall2", "thresh > .05", 1643 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", 1644 allocstall2, 2 }, 1645 /*03*/ { "br_miss", "thresh >= .2", 1646 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1647 br_mispredict, 2 }, 1648 /*04*/ { "splitload", "thresh >= .1", 1649 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1650 splitload_sb, 2 }, 1651 /* 05*/ { "splitstore", "thresh >= .01", 1652 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1653 splitstore_sb, 2 }, 1654 /*06*/ { "contested", "thresh >= .05", 1655 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1656 contested, 2 }, 1657 /*07*/ { "blockstorefwd", "thresh >= .05", 1658 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1659 blockstoreforward, 2 }, 1660 /*08*/ { "cache2", "thresh >= .2", 1661 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1662 cache2, 4 }, 1663 /*09*/ { "cache1", "thresh >= .2", 1664 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1665 cache1, 2 }, 1666 /*10*/ { "dtlbmissload", "thresh >= .1", 1667 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1668 dtlb_missload, 3 }, 1669 /*11*/ { "dtlbmissstore", "thresh >= .05", 1670 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1671 dtlb_missstore, 3 }, 1672 /*12*/ { "frontendstall", "thresh >= .15", 1673 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1674 frontendstall, 2 }, 1675 /*13*/ { "clears", "thresh >= .02", 1676 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1677 clears, 4 }, 1678 /*14*/ { "microassist", "thresh >= .05", 1679 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1680 microassist, 2 }, 1681 /*15*/ { "aliasing_4k", "thresh >= .1", 1682 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1683 aliasing, 2 }, 1684 /*16*/ { "fpassist", "look for a excessive value", 1685 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1686 fpassists, 2 }, 1687 /*17*/ { "otherassistavx", "look for a excessive value", 1688 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1689 otherassistavx, 2}, 1690 /*18*/ { "otherassistsse", "look for a excessive value", 1691 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1692 otherassistsse, 2 }, 1693 /*19*/ { "eff1", "thresh < .9", 1694 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1695 efficiency1, 2 }, 1696 /*20*/ { "eff2", "thresh > 1.0", 1697 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1698 efficiency2, 2 }, 1699 }; 1700 1701 1702 #define IVY_BRIDGE_COUNT 21 1703 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1704 /*1*/ { "eff1", "thresh < .75", 1705 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1706 efficiency1, 2 }, 1707 /*2*/ { "eff2", "thresh > 1.0", 1708 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1709 efficiency2, 2 }, 1710 /*3*/ { "itlbmiss", "thresh > .05", 1711 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1712 itlb_miss, 2 }, 1713 /*4*/ { "icachemiss", "thresh > .05", 1714 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1715 icache_miss, 3 }, 1716 /*5*/ { "lcpstall", "thresh > .05", 1717 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1718 lcp_stall, 2 }, 1719 /*6*/ { "cache1", "thresh >= .2", 1720 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1721 cache1ib, 2 }, 1722 /*7*/ { "cache2", "thresh >= .2", 1723 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1724 cache2ib, 2 }, 1725 /*8*/ { "contested", "thresh >= .05", 1726 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1727 contested, 2 }, 1728 /*9*/ { "datashare", "thresh >= .05", 1729 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1730 datasharing, 2 }, 1731 /*10*/ { "blockstorefwd", "thresh >= .05", 1732 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1733 blockstoreforward, 2 }, 1734 /*11*/ { "splitload", "thresh >= .1", 1735 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1736 splitloadib, 4 }, 1737 /*12*/ { "splitstore", "thresh >= .01", 1738 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1739 splitstore, 2 }, 1740 /*13*/ { "aliasing_4k", "thresh >= .1", 1741 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1742 aliasing, 2 }, 1743 /*14*/ { "dtlbmissload", "thresh >= .1", 1744 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1745 dtlb_missload , 3}, 1746 /*15*/ { "dtlbmissstore", "thresh >= .05", 1747 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1748 dtlb_missstore, 3 }, 1749 /*16*/ { "br_miss", "thresh >= .2", 1750 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1751 br_mispredictib, 8 }, 1752 /*17*/ { "clears", "thresh >= .02", 1753 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1754 clears, 4 }, 1755 /*18*/ { "microassist", "thresh >= .05", 1756 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1757 microassist, 2 }, 1758 /*19*/ { "fpassist", "look for a excessive value", 1759 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1760 fpassists, 2 }, 1761 /*20*/ { "otherassistavx", "look for a excessive value", 1762 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1763 otherassistavx , 2}, 1764 /*21*/ { "otherassistsse", "look for a excessive value", 1765 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1766 otherassistsse, 2 }, 1767 }; 1768 1769 #define HASWELL_COUNT 20 1770 static struct cpu_entry haswell[HASWELL_COUNT] = { 1771 /*1*/ { "eff1", "thresh < .75", 1772 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1773 efficiency1, 2 }, 1774 /*2*/ { "eff2", "thresh > 1.0", 1775 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1776 efficiency2, 2 }, 1777 /*3*/ { "itlbmiss", "thresh > .05", 1778 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1779 itlb_miss, 2 }, 1780 /*4*/ { "icachemiss", "thresh > .05", 1781 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1782 icache_miss_has, 2 }, 1783 /*5*/ { "lcpstall", "thresh > .05", 1784 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1785 lcp_stall, 2 }, 1786 /*6*/ { "cache1", "thresh >= .2", 1787 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1788 cache1ib, 2 }, 1789 /*7*/ { "cache2", "thresh >= .2", 1790 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1791 cache2has, 4 }, 1792 /*8*/ { "contested", "thresh >= .05", 1793 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1794 contested_has, 2 }, 1795 /*9*/ { "datashare", "thresh >= .05", 1796 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1797 datasharing_has, 2 }, 1798 /*10*/ { "blockstorefwd", "thresh >= .05", 1799 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1800 blockstoreforward, 2 }, 1801 /*11*/ { "splitload", "thresh >= .1", 1802 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", 1803 splitload , 2}, 1804 /*12*/ { "splitstore", "thresh >= .01", 1805 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1806 splitstore, 2 }, 1807 /*13*/ { "aliasing_4k", "thresh >= .1", 1808 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1809 aliasing, 2 }, 1810 /*14*/ { "dtlbmissload", "thresh >= .1", 1811 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1812 dtlb_missload, 3 }, 1813 /*15*/ { "br_miss", "thresh >= .2", 1814 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1815 br_mispredict, 2 }, 1816 /*16*/ { "clears", "thresh >= .02", 1817 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1818 clears, 4 }, 1819 /*17*/ { "microassist", "thresh >= .05", 1820 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1821 microassist, 2 }, 1822 /*18*/ { "fpassist", "look for a excessive value", 1823 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1824 fpassists, 2 }, 1825 /*19*/ { "otherassistavx", "look for a excessive value", 1826 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1827 otherassistavx, 2 }, 1828 /*20*/ { "otherassistsse", "look for a excessive value", 1829 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1830 otherassistsse, 2 }, 1831 }; 1832 1833 1834 static void 1835 explain_name_broad(const char *name) 1836 { 1837 const char *mythresh; 1838 if (strcmp(name, "eff1") == 0) { 1839 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 1840 mythresh = "thresh < .75"; 1841 } else if (strcmp(name, "eff2") == 0) { 1842 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 1843 mythresh = "thresh > 1.0"; 1844 } else if (strcmp(name, "itlbmiss") == 0) { 1845 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1846 mythresh = "thresh > .05"; 1847 } else if (strcmp(name, "icachemiss") == 0) { 1848 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); 1849 mythresh = "thresh > .05"; 1850 } else if (strcmp(name, "lcpstall") == 0) { 1851 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 1852 mythresh = "thresh > .05"; 1853 } else if (strcmp(name, "cache1") == 0) { 1854 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 1855 mythresh = "thresh >= .1"; 1856 } else if (strcmp(name, "cache2") == 0) { 1857 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); 1858 mythresh = "thresh >= .2"; 1859 } else if (strcmp(name, "contested") == 0) { 1860 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1861 mythresh = "thresh >= .05"; 1862 } else if (strcmp(name, "datashare") == 0) { 1863 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 1864 mythresh = "thresh > .05"; 1865 } else if (strcmp(name, "blockstorefwd") == 0) { 1866 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 1867 mythresh = "thresh >= .05"; 1868 } else if (strcmp(name, "aliasing_4k") == 0) { 1869 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); 1870 mythresh = "thresh >= .1"; 1871 } else if (strcmp(name, "dtlbmissload") == 0) { 1872 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 1873 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 1874 mythresh = "thresh >= .1"; 1875 1876 } else if (strcmp(name, "br_miss") == 0) { 1877 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); 1878 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); 1879 printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); 1880 mythresh = "thresh >= .2"; 1881 } else if (strcmp(name, "clears") == 0) { 1882 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 1883 printf(" MACHINE_CLEARS.SMC + \n"); 1884 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 1885 mythresh = "thresh >= .02"; 1886 } else if (strcmp(name, "fpassist") == 0) { 1887 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 1888 mythresh = "look for a excessive value"; 1889 } else if (strcmp(name, "otherassistavx") == 0) { 1890 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 1891 mythresh = "look for a excessive value"; 1892 } else if (strcmp(name, "microassist") == 0) { 1893 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 1894 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 1895 mythresh = "thresh >= .05"; 1896 } else { 1897 printf("Unknown name:%s\n", name); 1898 mythresh = "unknown entry"; 1899 } 1900 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 1901 } 1902 1903 1904 #define BROADWELL_COUNT 17 1905 static struct cpu_entry broadwell[BROADWELL_COUNT] = { 1906 /*1*/ { "eff1", "thresh < .75", 1907 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1908 efficiency1, 2 }, 1909 /*2*/ { "eff2", "thresh > 1.0", 1910 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1911 efficiency2, 2 }, 1912 /*3*/ { "itlbmiss", "thresh > .05", 1913 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", 1914 itlb_miss_broad, 3 }, 1915 /*4*/ { "icachemiss", "thresh > .05", 1916 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1917 icache_miss_has, 2 }, 1918 /*5*/ { "lcpstall", "thresh > .05", 1919 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1920 lcp_stall, 2 }, 1921 /*6*/ { "cache1", "thresh >= .1", 1922 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1923 cache1broad, 2 }, 1924 /*7*/ { "cache2", "thresh >= .2", 1925 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1926 cache2broad, 2 }, 1927 /*8*/ { "contested", "thresh >= .05", 1928 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", 1929 contestedbroad, 2 }, 1930 /*9*/ { "datashare", "thresh >= .05", 1931 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1932 datasharing_has, 2 }, 1933 /*10*/ { "blockstorefwd", "thresh >= .05", 1934 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1935 blockstoreforward, 2 }, 1936 /*11*/ { "aliasing_4k", "thresh >= .1", 1937 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1938 aliasing_broad, 2 }, 1939 /*12*/ { "dtlbmissload", "thresh >= .1", 1940 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1941 dtlb_missload, 3 }, 1942 /*13*/ { "br_miss", "thresh >= .2", 1943 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1944 br_mispredict_broad, 7 }, 1945 /*14*/ { "clears", "thresh >= .02", 1946 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1947 clears_broad, 5 }, 1948 /*15*/ { "fpassist", "look for a excessive value", 1949 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1950 fpassists, 2 }, 1951 /*16*/ { "otherassistavx", "look for a excessive value", 1952 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1953 otherassistavx, 2 }, 1954 /*17*/ { "microassist", "thresh >= .2", 1955 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", 1956 microassist_broad, 4 }, 1957 }; 1958 1959 1960 static void 1961 set_sandybridge(void) 1962 { 1963 strcpy(the_cpu.cputype, "SandyBridge PMC"); 1964 the_cpu.number = SANDY_BRIDGE_COUNT; 1965 the_cpu.ents = sandy_bridge; 1966 the_cpu.explain = explain_name_sb; 1967 } 1968 1969 static void 1970 set_ivybridge(void) 1971 { 1972 strcpy(the_cpu.cputype, "IvyBridge PMC"); 1973 the_cpu.number = IVY_BRIDGE_COUNT; 1974 the_cpu.ents = ivy_bridge; 1975 the_cpu.explain = explain_name_ib; 1976 } 1977 1978 1979 static void 1980 set_haswell(void) 1981 { 1982 strcpy(the_cpu.cputype, "HASWELL PMC"); 1983 the_cpu.number = HASWELL_COUNT; 1984 the_cpu.ents = haswell; 1985 the_cpu.explain = explain_name_has; 1986 } 1987 1988 1989 static void 1990 set_broadwell(void) 1991 { 1992 strcpy(the_cpu.cputype, "HASWELL PMC"); 1993 the_cpu.number = BROADWELL_COUNT; 1994 the_cpu.ents = broadwell; 1995 the_cpu.explain = explain_name_broad; 1996 } 1997 1998 1999 static int 2000 set_expression(const char *name) 2001 { 2002 int found = 0, i; 2003 for(i=0 ; i< the_cpu.number; i++) { 2004 if (strcmp(name, the_cpu.ents[i].name) == 0) { 2005 found = 1; 2006 expression = the_cpu.ents[i].func; 2007 command = the_cpu.ents[i].command; 2008 threshold = the_cpu.ents[i].thresh; 2009 if (the_cpu.ents[i].counters_required > max_pmc_counters) { 2010 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", 2011 the_cpu.ents[i].name, 2012 the_cpu.ents[i].counters_required, max_pmc_counters); 2013 printf("Sorry this test can not be run\n"); 2014 if (run_all == 0) { 2015 exit(-1); 2016 } else { 2017 return(-1); 2018 } 2019 } 2020 break; 2021 } 2022 } 2023 if (!found) { 2024 printf("For CPU type %s we have no expression:%s\n", 2025 the_cpu.cputype, name); 2026 exit(-1); 2027 } 2028 return(0); 2029 } 2030 2031 2032 2033 2034 2035 static int 2036 validate_expression(char *name) 2037 { 2038 int i, found; 2039 2040 found = 0; 2041 for(i=0 ; i< the_cpu.number; i++) { 2042 if (strcmp(name, the_cpu.ents[i].name) == 0) { 2043 found = 1; 2044 break; 2045 } 2046 } 2047 if (!found) { 2048 return(-1); 2049 } 2050 return (0); 2051 } 2052 2053 static void 2054 do_expression(struct counters *cpu, int pos) 2055 { 2056 if (expression == NULL) 2057 return; 2058 (*expression)(cpu, pos); 2059 } 2060 2061 static void 2062 process_header(int idx, char *p) 2063 { 2064 struct counters *up; 2065 int i, len, nlen; 2066 /* 2067 * Given header element idx, at p in 2068 * form 's/NN/nameof' 2069 * process the entry to pull out the name and 2070 * the CPU number. 2071 */ 2072 if (strncmp(p, "s/", 2)) { 2073 printf("Check -- invalid header no s/ in %s\n", 2074 p); 2075 return; 2076 } 2077 up = &cnts[idx]; 2078 up->cpu = strtol(&p[2], NULL, 10); 2079 len = strlen(p); 2080 for (i=2; i<len; i++) { 2081 if (p[i] == '/') { 2082 nlen = strlen(&p[(i+1)]); 2083 if (nlen < (MAX_NLEN-1)) { 2084 strcpy(up->counter_name, &p[(i+1)]); 2085 } else { 2086 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 2087 } 2088 } 2089 } 2090 } 2091 2092 static void 2093 build_counters_from_header(FILE *io) 2094 { 2095 char buffer[8192], *p; 2096 int i, len, cnt; 2097 size_t mlen; 2098 2099 /* We have a new start, lets 2100 * setup our headers and cpus. 2101 */ 2102 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2103 printf("First line can't be read from file err:%d\n", errno); 2104 return; 2105 } 2106 /* 2107 * Ok output is an array of counters. Once 2108 * we start to read the values in we must 2109 * put them in there slot to match there CPU and 2110 * counter being updated. We create a mass array 2111 * of the counters, filling in the CPU and 2112 * counter name. 2113 */ 2114 /* How many do we get? */ 2115 len = strlen(buffer); 2116 for (i=0, cnt=0; i<len; i++) { 2117 if (strncmp(&buffer[i], "s/", 2) == 0) { 2118 cnt++; 2119 for(;i<len;i++) { 2120 if (buffer[i] == ' ') 2121 break; 2122 } 2123 } 2124 } 2125 mlen = sizeof(struct counters) * cnt; 2126 cnts = malloc(mlen); 2127 ncnts = cnt; 2128 if (cnts == NULL) { 2129 printf("No memory err:%d\n", errno); 2130 return; 2131 } 2132 memset(cnts, 0, mlen); 2133 for (i=0, cnt=0; i<len; i++) { 2134 if (strncmp(&buffer[i], "s/", 2) == 0) { 2135 p = &buffer[i]; 2136 for(;i<len;i++) { 2137 if (buffer[i] == ' ') { 2138 buffer[i] = 0; 2139 break; 2140 } 2141 } 2142 process_header(cnt, p); 2143 cnt++; 2144 } 2145 } 2146 if (verbose) 2147 printf("We have %d entries\n", cnt); 2148 } 2149 extern int max_to_collect; 2150 int max_to_collect = MAX_COUNTER_SLOTS; 2151 2152 static int 2153 read_a_line(FILE *io) 2154 { 2155 char buffer[8192], *p, *stop; 2156 int pos, i; 2157 2158 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2159 return(0); 2160 } 2161 p = buffer; 2162 for (i=0; i<ncnts; i++) { 2163 pos = cnts[i].pos; 2164 cnts[i].vals[pos] = strtol(p, &stop, 0); 2165 cnts[i].pos++; 2166 cnts[i].sum += cnts[i].vals[pos]; 2167 p = stop; 2168 } 2169 return (1); 2170 } 2171 2172 extern int cpu_count_out; 2173 int cpu_count_out=0; 2174 2175 static void 2176 print_header(void) 2177 { 2178 int i, cnt, printed_cnt; 2179 2180 printf("*********************************\n"); 2181 for(i=0, cnt=0; i<MAX_CPU; i++) { 2182 if (glob_cpu[i]) { 2183 cnt++; 2184 } 2185 } 2186 cpu_count_out = cnt; 2187 for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 2188 if (glob_cpu[i]) { 2189 printf("CPU%d", i); 2190 printed_cnt++; 2191 } 2192 if (printed_cnt == cnt) { 2193 printf("\n"); 2194 break; 2195 } else { 2196 printf("\t"); 2197 } 2198 } 2199 } 2200 2201 static void 2202 lace_cpus_together(void) 2203 { 2204 int i, j, lace_cpu; 2205 struct counters *cpat, *at; 2206 2207 for(i=0; i<ncnts; i++) { 2208 cpat = &cnts[i]; 2209 if (cpat->next_cpu) { 2210 /* Already laced in */ 2211 continue; 2212 } 2213 lace_cpu = cpat->cpu; 2214 if (lace_cpu >= MAX_CPU) { 2215 printf("CPU %d to big\n", lace_cpu); 2216 continue; 2217 } 2218 if (glob_cpu[lace_cpu] == NULL) { 2219 glob_cpu[lace_cpu] = cpat; 2220 } else { 2221 /* Already processed this cpu */ 2222 continue; 2223 } 2224 /* Ok look forward for cpu->cpu and link in */ 2225 for(j=(i+1); j<ncnts; j++) { 2226 at = &cnts[j]; 2227 if (at->next_cpu) { 2228 continue; 2229 } 2230 if (at->cpu == lace_cpu) { 2231 /* Found one */ 2232 cpat->next_cpu = at; 2233 cpat = at; 2234 } 2235 } 2236 } 2237 } 2238 2239 2240 static void 2241 process_file(char *filename) 2242 { 2243 FILE *io; 2244 int i; 2245 int line_at, not_done; 2246 pid_t pid_of_command=0; 2247 2248 if (filename == NULL) { 2249 io = my_popen(command, "r", &pid_of_command); 2250 if (io == NULL) { 2251 printf("Can't popen the command %s\n", command); 2252 return; 2253 } 2254 } else { 2255 io = fopen(filename, "r"); 2256 if (io == NULL) { 2257 printf("Can't process file %s err:%d\n", 2258 filename, errno); 2259 return; 2260 } 2261 } 2262 build_counters_from_header(io); 2263 if (cnts == NULL) { 2264 /* Nothing we can do */ 2265 printf("Nothing to do -- no counters built\n"); 2266 if (filename) { 2267 fclose(io); 2268 } else { 2269 my_pclose(io, pid_of_command); 2270 } 2271 return; 2272 } 2273 lace_cpus_together(); 2274 print_header(); 2275 if (verbose) { 2276 for (i=0; i<ncnts; i++) { 2277 printf("Counter:%s cpu:%d index:%d\n", 2278 cnts[i].counter_name, 2279 cnts[i].cpu, i); 2280 } 2281 } 2282 line_at = 0; 2283 not_done = 1; 2284 while(not_done) { 2285 if (read_a_line(io)) { 2286 line_at++; 2287 } else { 2288 break; 2289 } 2290 if (line_at >= max_to_collect) { 2291 not_done = 0; 2292 } 2293 if (filename == NULL) { 2294 int cnt; 2295 /* For the ones we dynamically open we print now */ 2296 for(i=0, cnt=0; i<MAX_CPU; i++) { 2297 do_expression(glob_cpu[i], (line_at-1)); 2298 cnt++; 2299 if (cnt == cpu_count_out) { 2300 printf("\n"); 2301 break; 2302 } else { 2303 printf("\t"); 2304 } 2305 } 2306 } 2307 } 2308 if (filename) { 2309 fclose(io); 2310 } else { 2311 my_pclose(io, pid_of_command); 2312 } 2313 } 2314 #if defined(__amd64__) 2315 #define cpuid(in,a,b,c,d)\ 2316 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 2317 2318 static __inline void 2319 do_cpuid(u_int ax, u_int cx, u_int *p) 2320 { 2321 __asm __volatile("cpuid" 2322 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 2323 : "0" (ax), "c" (cx) ); 2324 } 2325 2326 #else 2327 #define cpuid(in, a, b, c, d) 2328 #define do_cpuid(ax, cx, p) 2329 #endif 2330 2331 static void 2332 get_cpuid_set(void) 2333 { 2334 unsigned long eax, ebx, ecx, edx; 2335 int model; 2336 pid_t pid_of_command=0; 2337 size_t sz, len; 2338 FILE *io; 2339 char linebuf[1024], *str; 2340 u_int reg[4]; 2341 2342 eax = ebx = ecx = edx = 0; 2343 2344 cpuid(0, eax, ebx, ecx, edx); 2345 if (ebx == 0x68747541) { 2346 printf("AMD processors are not supported by this program\n"); 2347 printf("Sorry\n"); 2348 exit(0); 2349 } else if (ebx == 0x6972794) { 2350 printf("Cyrix processors are not supported by this program\n"); 2351 printf("Sorry\n"); 2352 exit(0); 2353 } else if (ebx == 0x756e6547) { 2354 printf("Genuine Intel\n"); 2355 } else { 2356 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 2357 exit(0); 2358 } 2359 cpuid(1, eax, ebx, ecx, edx); 2360 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 2361 printf("CPU model is 0x%x id:0x%lx\n", model, eax); 2362 switch (eax & 0xF00) { 2363 case 0x500: /* Pentium family processors */ 2364 printf("Intel Pentium P5\n"); 2365 goto not_supported; 2366 break; 2367 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 2368 switch (model) { 2369 case 0x1: 2370 printf("Intel Pentium P6\n"); 2371 goto not_supported; 2372 break; 2373 case 0x3: 2374 case 0x5: 2375 printf("Intel PII\n"); 2376 goto not_supported; 2377 break; 2378 case 0x6: case 0x16: 2379 printf("Intel CL\n"); 2380 goto not_supported; 2381 break; 2382 case 0x7: case 0x8: case 0xA: case 0xB: 2383 printf("Intel PIII\n"); 2384 goto not_supported; 2385 break; 2386 case 0x9: case 0xD: 2387 printf("Intel PM\n"); 2388 goto not_supported; 2389 break; 2390 case 0xE: 2391 printf("Intel CORE\n"); 2392 goto not_supported; 2393 break; 2394 case 0xF: 2395 printf("Intel CORE2\n"); 2396 goto not_supported; 2397 break; 2398 case 0x17: 2399 printf("Intel CORE2EXTREME\n"); 2400 goto not_supported; 2401 break; 2402 case 0x1C: /* Per Intel document 320047-002. */ 2403 printf("Intel ATOM\n"); 2404 goto not_supported; 2405 break; 2406 case 0x1A: 2407 case 0x1E: /* 2408 * Per Intel document 253669-032 9/2009, 2409 * pages A-2 and A-57 2410 */ 2411 case 0x1F: /* 2412 * Per Intel document 253669-032 9/2009, 2413 * pages A-2 and A-57 2414 */ 2415 printf("Intel COREI7\n"); 2416 goto not_supported; 2417 break; 2418 case 0x2E: 2419 printf("Intel NEHALEM\n"); 2420 goto not_supported; 2421 break; 2422 case 0x25: /* Per Intel document 253669-033US 12/2009. */ 2423 case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 2424 printf("Intel WESTMERE\n"); 2425 goto not_supported; 2426 break; 2427 case 0x2F: /* Westmere-EX, seen in wild */ 2428 printf("Intel WESTMERE\n"); 2429 goto not_supported; 2430 break; 2431 case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 2432 printf("Intel SANDYBRIDGE\n"); 2433 set_sandybridge(); 2434 break; 2435 case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 2436 printf("Intel SANDYBRIDGE_XEON\n"); 2437 set_sandybridge(); 2438 break; 2439 case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 2440 printf("Intel IVYBRIDGE\n"); 2441 set_ivybridge(); 2442 break; 2443 case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 2444 printf("Intel IVYBRIDGE_XEON\n"); 2445 set_ivybridge(); 2446 break; 2447 case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 2448 printf("Intel HASWELL (Xeon)\n"); 2449 set_haswell(); 2450 break; 2451 case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 2452 case 0x45: 2453 case 0x46: 2454 printf("Intel HASWELL\n"); 2455 set_haswell(); 2456 break; 2457 2458 case 0x4e: 2459 case 0x5e: 2460 printf("Intel SKY-LAKE\n"); 2461 goto not_supported; 2462 break; 2463 case 0x3D: 2464 case 0x47: 2465 printf("Intel BROADWELL\n"); 2466 set_broadwell(); 2467 break; 2468 case 0x4f: 2469 case 0x56: 2470 printf("Intel BROADWEL (Xeon)\n"); 2471 set_broadwell(); 2472 break; 2473 2474 case 0x4D: 2475 /* Per Intel document 330061-001 01/2014. */ 2476 printf("Intel ATOM_SILVERMONT\n"); 2477 goto not_supported; 2478 break; 2479 default: 2480 printf("Intel model 0x%x is not known -- sorry\n", 2481 model); 2482 goto not_supported; 2483 break; 2484 } 2485 break; 2486 case 0xF00: /* P4 */ 2487 printf("Intel unknown model %d\n", model); 2488 goto not_supported; 2489 break; 2490 } 2491 do_cpuid(0xa, 0, reg); 2492 max_pmc_counters = (reg[3] & 0x0000000f) + 1; 2493 printf("We have %d PMC counters to work with\n", max_pmc_counters); 2494 /* Ok lets load the list of all known PMC's */ 2495 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2496 if (valid_pmcs == NULL) { 2497 /* Likely */ 2498 pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2499 sz = sizeof(char *) * pmc_allocated_cnt; 2500 valid_pmcs = malloc(sz); 2501 if (valid_pmcs == NULL) { 2502 printf("No memory allocation fails at startup?\n"); 2503 exit(-1); 2504 } 2505 memset(valid_pmcs, 0, sz); 2506 } 2507 2508 while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2509 if (linebuf[0] != '\t') { 2510 /* sometimes headers ;-) */ 2511 continue; 2512 } 2513 len = strlen(linebuf); 2514 if (linebuf[(len-1)] == '\n') { 2515 /* Likely */ 2516 linebuf[(len-1)] = 0; 2517 } 2518 str = &linebuf[1]; 2519 len = strlen(str) + 1; 2520 valid_pmcs[valid_pmc_cnt] = malloc(len); 2521 if (valid_pmcs[valid_pmc_cnt] == NULL) { 2522 printf("No memory2 allocation fails at startup?\n"); 2523 exit(-1); 2524 } 2525 memset(valid_pmcs[valid_pmc_cnt], 0, len); 2526 strcpy(valid_pmcs[valid_pmc_cnt], str); 2527 valid_pmc_cnt++; 2528 if (valid_pmc_cnt >= pmc_allocated_cnt) { 2529 /* Got to expand -- unlikely */ 2530 char **more; 2531 2532 sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2533 more = malloc(sz); 2534 if (more == NULL) { 2535 printf("No memory3 allocation fails at startup?\n"); 2536 exit(-1); 2537 } 2538 memset(more, 0, sz); 2539 memcpy(more, valid_pmcs, sz); 2540 pmc_allocated_cnt *= 2; 2541 free(valid_pmcs); 2542 valid_pmcs = more; 2543 } 2544 } 2545 my_pclose(io, pid_of_command); 2546 return; 2547 not_supported: 2548 printf("Not supported\n"); 2549 exit(-1); 2550 } 2551 2552 static void 2553 explain_all(void) 2554 { 2555 int i; 2556 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2557 printf("-------------------------------------------------------------\n"); 2558 for(i=0; i<the_cpu.number; i++){ 2559 printf("For -e %s ", the_cpu.ents[i].name); 2560 (*the_cpu.explain)(the_cpu.ents[i].name); 2561 printf("----------------------------\n"); 2562 } 2563 } 2564 2565 static void 2566 test_for_a_pmc(const char *pmc, int out_so_far) 2567 { 2568 FILE *io; 2569 pid_t pid_of_command=0; 2570 char my_command[1024]; 2571 char line[1024]; 2572 char resp[1024]; 2573 int len, llen, i; 2574 2575 if (out_so_far < 50) { 2576 len = 50 - out_so_far; 2577 for(i=0; i<len; i++) { 2578 printf(" "); 2579 } 2580 } 2581 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2582 io = my_popen(my_command, "r", &pid_of_command); 2583 if (io == NULL) { 2584 printf("Failed -- popen fails\n"); 2585 return; 2586 } 2587 /* Setup what we expect */ 2588 len = sprintf(resp, "%s", pmc); 2589 if (fgets(line, sizeof(line), io) == NULL) { 2590 printf("Failed -- no output from pmstat\n"); 2591 goto out; 2592 } 2593 llen = strlen(line); 2594 if (line[(llen-1)] == '\n') { 2595 line[(llen-1)] = 0; 2596 llen--; 2597 } 2598 for(i=2; i<(llen-len); i++) { 2599 if (strncmp(&line[i], "ERROR", 5) == 0) { 2600 printf("Failed %s\n", line); 2601 goto out; 2602 } else if (strncmp(&line[i], resp, len) == 0) { 2603 int j, k; 2604 2605 if (fgets(line, sizeof(line), io) == NULL) { 2606 printf("Failed -- no second output from pmstat\n"); 2607 goto out; 2608 } 2609 len = strlen(line); 2610 for (j=0; j<len; j++) { 2611 if (line[j] == ' ') { 2612 j++; 2613 } else { 2614 break; 2615 } 2616 } 2617 printf("Pass"); 2618 len = strlen(&line[j]); 2619 if (len < 20) { 2620 for(k=0; k<(20-len); k++) { 2621 printf(" "); 2622 } 2623 } 2624 if (len) { 2625 printf("%s", &line[j]); 2626 } else { 2627 printf("\n"); 2628 } 2629 goto out; 2630 } 2631 } 2632 printf("Failed -- '%s' not '%s'\n", line, resp); 2633 out: 2634 my_pclose(io, pid_of_command); 2635 2636 } 2637 2638 static int 2639 add_it_to(char **vars, int cur_cnt, char *name) 2640 { 2641 int i; 2642 size_t len; 2643 for(i=0; i<cur_cnt; i++) { 2644 if (strcmp(vars[i], name) == 0) { 2645 /* Already have */ 2646 return(0); 2647 } 2648 } 2649 if (vars[cur_cnt] != NULL) { 2650 printf("Cur_cnt:%d filled with %s??\n", 2651 cur_cnt, vars[cur_cnt]); 2652 exit(-1); 2653 } 2654 /* Ok its new */ 2655 len = strlen(name) + 1; 2656 vars[cur_cnt] = malloc(len); 2657 if (vars[cur_cnt] == NULL) { 2658 printf("No memory %s\n", __FUNCTION__); 2659 exit(-1); 2660 } 2661 memset(vars[cur_cnt], 0, len); 2662 strcpy(vars[cur_cnt], name); 2663 return(1); 2664 } 2665 2666 static char * 2667 build_command_for_exp(struct expression *exp) 2668 { 2669 /* 2670 * Build the pmcstat command to handle 2671 * the passed in expression. 2672 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2673 * where NNN and QQQ represent the PMC's in the expression 2674 * uniquely.. 2675 */ 2676 char forming[1024]; 2677 int cnt_pmc, alloced_pmcs, i; 2678 struct expression *at; 2679 char **vars, *cmd; 2680 size_t mal; 2681 2682 alloced_pmcs = cnt_pmc = 0; 2683 /* first how many do we have */ 2684 at = exp; 2685 while (at) { 2686 if (at->type == TYPE_VALUE_PMC) { 2687 cnt_pmc++; 2688 } 2689 at = at->next; 2690 } 2691 if (cnt_pmc == 0) { 2692 printf("No PMC's in your expression -- nothing to do!!\n"); 2693 exit(0); 2694 } 2695 mal = cnt_pmc * sizeof(char *); 2696 vars = malloc(mal); 2697 if (vars == NULL) { 2698 printf("No memory\n"); 2699 exit(-1); 2700 } 2701 memset(vars, 0, mal); 2702 at = exp; 2703 while (at) { 2704 if (at->type == TYPE_VALUE_PMC) { 2705 if(add_it_to(vars, alloced_pmcs, at->name)) { 2706 alloced_pmcs++; 2707 } 2708 } 2709 at = at->next; 2710 } 2711 /* Now we have a unique list in vars so create our command */ 2712 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2713 for(i=0; i<alloced_pmcs; i++) { 2714 mal += strlen(vars[i]) + 4; /* var + " -s " */ 2715 } 2716 cmd = malloc((mal+2)); 2717 if (cmd == NULL) { 2718 printf("%s out of mem\n", __FUNCTION__); 2719 exit(-1); 2720 } 2721 memset(cmd, 0, (mal+2)); 2722 strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2723 at = exp; 2724 for(i=0; i<alloced_pmcs; i++) { 2725 sprintf(forming, " -s %s", vars[i]); 2726 strcat(cmd, forming); 2727 free(vars[i]); 2728 vars[i] = NULL; 2729 } 2730 free(vars); 2731 return(cmd); 2732 } 2733 2734 static int 2735 user_expr(struct counters *cpu, int pos) 2736 { 2737 int ret; 2738 double res; 2739 struct counters *var; 2740 struct expression *at; 2741 2742 at = master_exp; 2743 while (at) { 2744 if (at->type == TYPE_VALUE_PMC) { 2745 var = find_counter(cpu, at->name); 2746 if (var == NULL) { 2747 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2748 exit(-1); 2749 } 2750 if (pos != -1) { 2751 at->value = var->vals[pos] * 1.0; 2752 } else { 2753 at->value = var->sum * 1.0; 2754 } 2755 } 2756 at = at->next; 2757 } 2758 res = run_expr(master_exp, 1, NULL); 2759 ret = printf("%1.3f", res); 2760 return(ret); 2761 } 2762 2763 2764 static void 2765 set_manual_exp(struct expression *exp) 2766 { 2767 expression = user_expr; 2768 command = build_command_for_exp(exp); 2769 threshold = "User defined threshold"; 2770 } 2771 2772 static void 2773 run_tests(void) 2774 { 2775 int i, lenout; 2776 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2777 printf("------------------------------------------------------------------------\n"); 2778 for(i=0; i<valid_pmc_cnt; i++) { 2779 lenout = printf("%s", valid_pmcs[i]); 2780 fflush(stdout); 2781 test_for_a_pmc(valid_pmcs[i], lenout); 2782 } 2783 } 2784 static void 2785 list_all(void) 2786 { 2787 int i, cnt, j; 2788 printf("PMC Abbreviation\n"); 2789 printf("--------------------------------------------------------------\n"); 2790 for(i=0; i<valid_pmc_cnt; i++) { 2791 cnt = printf("%s", valid_pmcs[i]); 2792 for(j=cnt; j<52; j++) { 2793 printf(" "); 2794 } 2795 printf("%%%d\n", i); 2796 } 2797 } 2798 2799 2800 int 2801 main(int argc, char **argv) 2802 { 2803 int i, j, cnt; 2804 char *filename=NULL; 2805 const char *name=NULL; 2806 int help_only = 0; 2807 int test_mode = 0; 2808 int test_at = 0; 2809 2810 get_cpuid_set(); 2811 memset(glob_cpu, 0, sizeof(glob_cpu)); 2812 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) { 2813 switch (i) { 2814 case 'A': 2815 run_all = 1; 2816 break; 2817 case 'L': 2818 list_all(); 2819 return(0); 2820 case 'H': 2821 printf("**********************************\n"); 2822 explain_all(); 2823 printf("**********************************\n"); 2824 return(0); 2825 break; 2826 case 'T': 2827 test_mode = 1; 2828 break; 2829 case 'E': 2830 master_exp = parse_expression(optarg); 2831 if (master_exp) { 2832 set_manual_exp(master_exp); 2833 } 2834 break; 2835 case 'e': 2836 if (validate_expression(optarg)) { 2837 printf("Unknown expression %s\n", optarg); 2838 return(0); 2839 } 2840 name = optarg; 2841 set_expression(optarg); 2842 break; 2843 case 'm': 2844 max_to_collect = strtol(optarg, NULL, 0); 2845 if (max_to_collect > MAX_COUNTER_SLOTS) { 2846 /* You can't collect more than max in array */ 2847 max_to_collect = MAX_COUNTER_SLOTS; 2848 } 2849 break; 2850 case 'v': 2851 verbose++; 2852 break; 2853 case 'h': 2854 help_only = 1; 2855 break; 2856 case 'i': 2857 filename = optarg; 2858 break; 2859 case '?': 2860 default: 2861 use: 2862 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2863 argv[0]); 2864 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2865 printf("-v -- verbose dump debug type things -- you don't want this\n"); 2866 printf("-m N -- maximum to collect is N measurements\n"); 2867 printf("-e expr-name -- Do expression expr-name\n"); 2868 printf("-E 'your expression' -- Do your expression\n"); 2869 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2870 printf("-H -- Don't run anything, just explain all canned expressions\n"); 2871 printf("-T -- Test all PMC's defined by this processor\n"); 2872 printf("-A -- Run all canned tests\n"); 2873 return(0); 2874 break; 2875 } 2876 } 2877 if ((run_all == 0) && (name == NULL) && (filename == NULL) && 2878 (test_mode == 0) && (master_exp == NULL)) { 2879 printf("Without setting an expression we cannot dynamically gather information\n"); 2880 printf("you must supply a filename (and you probably want verbosity)\n"); 2881 goto use; 2882 } 2883 if (run_all && max_to_collect > 10) { 2884 max_to_collect = 3; 2885 } 2886 if (test_mode) { 2887 run_tests(); 2888 return(0); 2889 } 2890 printf("*********************************\n"); 2891 if ((master_exp == NULL) && name) { 2892 (*the_cpu.explain)(name); 2893 } else if (master_exp) { 2894 printf("Examine your expression "); 2895 print_exp(master_exp); 2896 printf("User defined threshold\n"); 2897 } 2898 if (help_only) { 2899 return(0); 2900 } 2901 if (run_all) { 2902 more: 2903 name = the_cpu.ents[test_at].name; 2904 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); 2905 test_at++; 2906 if (set_expression(name) == -1) { 2907 if (test_at >= the_cpu.number) { 2908 goto done; 2909 } else 2910 goto more; 2911 } 2912 2913 } 2914 process_file(filename); 2915 if (verbose >= 2) { 2916 for (i=0; i<ncnts; i++) { 2917 printf("Counter:%s cpu:%d index:%d\n", 2918 cnts[i].counter_name, 2919 cnts[i].cpu, i); 2920 for(j=0; j<cnts[i].pos; j++) { 2921 printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2922 } 2923 printf(" sum - %ld\n", (long int)cnts[i].sum); 2924 } 2925 } 2926 if (expression == NULL) { 2927 return(0); 2928 } 2929 if (max_to_collect > 1) { 2930 for(i=0, cnt=0; i<MAX_CPU; i++) { 2931 if (glob_cpu[i]) { 2932 do_expression(glob_cpu[i], -1); 2933 cnt++; 2934 if (cnt == cpu_count_out) { 2935 printf("\n"); 2936 break; 2937 } else { 2938 printf("\t"); 2939 } 2940 } 2941 } 2942 } 2943 if (run_all && (test_at < the_cpu.number)) { 2944 memset(glob_cpu, 0, sizeof(glob_cpu)); 2945 ncnts = 0; 2946 printf("*********************************\n"); 2947 goto more; 2948 } else if (run_all) { 2949 done: 2950 printf("*********************************\n"); 2951 } 2952 return(0); 2953 } 2954