1 /*- 2 * Copyright (c) 2014-2015 Netflix, Inc. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer, 9 * in this position and unchanged. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 #include <sys/types.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <unistd.h> 31 #include <string.h> 32 #include <strings.h> 33 #include <sys/errno.h> 34 #include <signal.h> 35 #include <sys/wait.h> 36 #include <getopt.h> 37 #include "eval_expr.h" 38 __FBSDID("$FreeBSD$"); 39 40 static int max_pmc_counters = 1; 41 static int run_all = 0; 42 43 #define MAX_COUNTER_SLOTS 1024 44 #define MAX_NLEN 64 45 #define MAX_CPU 64 46 static int verbose = 0; 47 48 extern char **environ; 49 extern struct expression *master_exp; 50 struct expression *master_exp=NULL; 51 52 #define PMC_INITIAL_ALLOC 512 53 extern char **valid_pmcs; 54 char **valid_pmcs = NULL; 55 extern int valid_pmc_cnt; 56 int valid_pmc_cnt=0; 57 extern int pmc_allocated_cnt; 58 int pmc_allocated_cnt=0; 59 60 /* 61 * The following two varients on popen and pclose with 62 * the cavet that they get you the PID so that you 63 * can supply it to pclose so it can send a SIGTERM 64 * to the process. 65 */ 66 static FILE * 67 my_popen(const char *command, const char *dir, pid_t *p_pid) 68 { 69 FILE *io_out, *io_in; 70 int pdesin[2], pdesout[2]; 71 char *argv[4]; 72 pid_t pid; 73 char cmd[4]; 74 char cmd2[1024]; 75 char arg1[4]; 76 77 if ((strcmp(dir, "r") != 0) && 78 (strcmp(dir, "w") != 0)) { 79 errno = EINVAL; 80 return(NULL); 81 } 82 if (pipe(pdesin) < 0) 83 return (NULL); 84 85 if (pipe(pdesout) < 0) { 86 (void)close(pdesin[0]); 87 (void)close(pdesin[1]); 88 return (NULL); 89 } 90 strcpy(cmd, "sh"); 91 strcpy(arg1, "-c"); 92 strcpy(cmd2, command); 93 argv[0] = cmd; 94 argv[1] = arg1; 95 argv[2] = cmd2; 96 argv[3] = NULL; 97 98 switch (pid = fork()) { 99 case -1: /* Error. */ 100 (void)close(pdesin[0]); 101 (void)close(pdesin[1]); 102 (void)close(pdesout[0]); 103 (void)close(pdesout[1]); 104 return (NULL); 105 /* NOTREACHED */ 106 case 0: /* Child. */ 107 /* Close out un-used sides */ 108 (void)close(pdesin[1]); 109 (void)close(pdesout[0]); 110 /* Now prepare the stdin of the process */ 111 close(0); 112 (void)dup(pdesin[0]); 113 (void)close(pdesin[0]); 114 /* Now prepare the stdout of the process */ 115 close(1); 116 (void)dup(pdesout[1]); 117 /* And lets do stderr just in case */ 118 close(2); 119 (void)dup(pdesout[1]); 120 (void)close(pdesout[1]); 121 /* Now run it */ 122 execve("/bin/sh", argv, environ); 123 exit(127); 124 /* NOTREACHED */ 125 } 126 /* Parent; assume fdopen can't fail. */ 127 /* Store the pid */ 128 *p_pid = pid; 129 if (strcmp(dir, "r") != 0) { 130 io_out = fdopen(pdesin[1], "w"); 131 (void)close(pdesin[0]); 132 (void)close(pdesout[0]); 133 (void)close(pdesout[1]); 134 return(io_out); 135 } else { 136 /* Prepare the input stream */ 137 io_in = fdopen(pdesout[0], "r"); 138 (void)close(pdesout[1]); 139 (void)close(pdesin[0]); 140 (void)close(pdesin[1]); 141 return (io_in); 142 } 143 } 144 145 /* 146 * pclose -- 147 * Pclose returns -1 if stream is not associated with a `popened' command, 148 * if already `pclosed', or waitpid returns an error. 149 */ 150 static void 151 my_pclose(FILE *io, pid_t the_pid) 152 { 153 int pstat; 154 pid_t pid; 155 156 /* 157 * Find the appropriate file pointer and remove it from the list. 158 */ 159 (void)fclose(io); 160 /* Die if you are not dead! */ 161 kill(the_pid, SIGTERM); 162 do { 163 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 164 } while (pid == -1 && errno == EINTR); 165 } 166 167 struct counters { 168 struct counters *next_cpu; 169 char counter_name[MAX_NLEN]; /* Name of counter */ 170 int cpu; /* CPU we are on */ 171 int pos; /* Index we are filling to. */ 172 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 173 uint64_t sum; /* Summary of entries */ 174 }; 175 176 extern struct counters *glob_cpu[MAX_CPU]; 177 struct counters *glob_cpu[MAX_CPU]; 178 179 extern struct counters *cnts; 180 struct counters *cnts=NULL; 181 182 extern int ncnts; 183 int ncnts=0; 184 185 extern int (*expression)(struct counters *, int); 186 int (*expression)(struct counters *, int); 187 188 static const char *threshold=NULL; 189 static const char *command; 190 191 struct cpu_entry { 192 const char *name; 193 const char *thresh; 194 const char *command; 195 int (*func)(struct counters *, int); 196 int counters_required; 197 }; 198 199 struct cpu_type { 200 char cputype[32]; 201 int number; 202 struct cpu_entry *ents; 203 void (*explain)(const char *name); 204 }; 205 extern struct cpu_type the_cpu; 206 struct cpu_type the_cpu; 207 208 static void 209 explain_name_sb(const char *name) 210 { 211 const char *mythresh; 212 if (strcmp(name, "allocstall1") == 0) { 213 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 214 mythresh = "thresh > .05"; 215 } else if (strcmp(name, "allocstall2") == 0) { 216 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 217 mythresh = "thresh > .05"; 218 } else if (strcmp(name, "br_miss") == 0) { 219 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 220 mythresh = "thresh >= .2"; 221 } else if (strcmp(name, "splitload") == 0) { 222 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 223 mythresh = "thresh >= .1"; 224 } else if (strcmp(name, "splitstore") == 0) { 225 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 226 mythresh = "thresh >= .01"; 227 } else if (strcmp(name, "contested") == 0) { 228 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 229 mythresh = "thresh >= .05"; 230 } else if (strcmp(name, "blockstorefwd") == 0) { 231 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 232 mythresh = "thresh >= .05"; 233 } else if (strcmp(name, "cache2") == 0) { 234 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 235 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 236 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 237 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 238 mythresh = "thresh >= .2"; 239 } else if (strcmp(name, "cache1") == 0) { 240 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 241 mythresh = "thresh >= .2"; 242 } else if (strcmp(name, "dtlbmissload") == 0) { 243 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 244 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 245 mythresh = "thresh >= .1"; 246 } else if (strcmp(name, "frontendstall") == 0) { 247 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 248 mythresh = "thresh >= .15"; 249 } else if (strcmp(name, "clears") == 0) { 250 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 251 printf(" MACHINE_CLEARS.SMC + \n"); 252 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 253 mythresh = "thresh >= .02"; 254 } else if (strcmp(name, "microassist") == 0) { 255 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 256 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 257 mythresh = "thresh >= .05"; 258 } else if (strcmp(name, "aliasing_4k") == 0) { 259 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 260 mythresh = "thresh >= .1"; 261 } else if (strcmp(name, "fpassist") == 0) { 262 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 263 mythresh = "look for a excessive value"; 264 } else if (strcmp(name, "otherassistavx") == 0) { 265 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 266 mythresh = "look for a excessive value"; 267 } else if (strcmp(name, "otherassistsse") == 0) { 268 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 269 mythresh = "look for a excessive value"; 270 } else if (strcmp(name, "eff1") == 0) { 271 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 272 mythresh = "thresh < .9"; 273 } else if (strcmp(name, "eff2") == 0) { 274 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 275 mythresh = "thresh > 1.0"; 276 } else if (strcmp(name, "dtlbmissstore") == 0) { 277 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 278 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 279 mythresh = "thresh >= .05"; 280 } else { 281 printf("Unknown name:%s\n", name); 282 mythresh = "unknown entry"; 283 } 284 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 285 } 286 287 static void 288 explain_name_ib(const char *name) 289 { 290 const char *mythresh; 291 if (strcmp(name, "br_miss") == 0) { 292 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 293 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 294 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 295 mythresh = "thresh >= .2"; 296 } else if (strcmp(name, "eff1") == 0) { 297 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 298 mythresh = "thresh < .9"; 299 } else if (strcmp(name, "eff2") == 0) { 300 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 301 mythresh = "thresh > 1.0"; 302 } else if (strcmp(name, "cache1") == 0) { 303 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 304 mythresh = "thresh >= .2"; 305 } else if (strcmp(name, "cache2") == 0) { 306 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 307 mythresh = "thresh >= .2"; 308 } else if (strcmp(name, "itlbmiss") == 0) { 309 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 310 mythresh = "thresh > .05"; 311 } else if (strcmp(name, "icachemiss") == 0) { 312 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 313 mythresh = "thresh > .05"; 314 } else if (strcmp(name, "lcpstall") == 0) { 315 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 316 mythresh = "thresh > .05"; 317 } else if (strcmp(name, "datashare") == 0) { 318 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 319 mythresh = "thresh > .05"; 320 } else if (strcmp(name, "blockstorefwd") == 0) { 321 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 322 mythresh = "thresh >= .05"; 323 } else if (strcmp(name, "splitload") == 0) { 324 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 325 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 326 mythresh = "thresh >= .1"; 327 } else if (strcmp(name, "splitstore") == 0) { 328 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 329 mythresh = "thresh >= .01"; 330 } else if (strcmp(name, "aliasing_4k") == 0) { 331 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 332 mythresh = "thresh >= .1"; 333 } else if (strcmp(name, "dtlbmissload") == 0) { 334 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 335 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 336 mythresh = "thresh >= .1"; 337 } else if (strcmp(name, "dtlbmissstore") == 0) { 338 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 339 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 340 mythresh = "thresh >= .05"; 341 } else if (strcmp(name, "contested") == 0) { 342 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 343 mythresh = "thresh >= .05"; 344 } else if (strcmp(name, "clears") == 0) { 345 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 346 printf(" MACHINE_CLEARS.SMC + \n"); 347 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 348 mythresh = "thresh >= .02"; 349 } else if (strcmp(name, "microassist") == 0) { 350 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 351 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 352 mythresh = "thresh >= .05"; 353 } else if (strcmp(name, "fpassist") == 0) { 354 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 355 mythresh = "look for a excessive value"; 356 } else if (strcmp(name, "otherassistavx") == 0) { 357 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 358 mythresh = "look for a excessive value"; 359 } else if (strcmp(name, "otherassistsse") == 0) { 360 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 361 mythresh = "look for a excessive value"; 362 } else { 363 printf("Unknown name:%s\n", name); 364 mythresh = "unknown entry"; 365 } 366 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 367 } 368 369 370 static void 371 explain_name_has(const char *name) 372 { 373 const char *mythresh; 374 if (strcmp(name, "eff1") == 0) { 375 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 376 mythresh = "thresh < .75"; 377 } else if (strcmp(name, "eff2") == 0) { 378 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 379 mythresh = "thresh > 1.0"; 380 } else if (strcmp(name, "itlbmiss") == 0) { 381 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 382 mythresh = "thresh > .05"; 383 } else if (strcmp(name, "icachemiss") == 0) { 384 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 385 mythresh = "thresh > .05"; 386 } else if (strcmp(name, "lcpstall") == 0) { 387 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 388 mythresh = "thresh > .05"; 389 } else if (strcmp(name, "cache1") == 0) { 390 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 391 mythresh = "thresh >= .2"; 392 } else if (strcmp(name, "cache2") == 0) { 393 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 394 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 395 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 396 printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 397 mythresh = "thresh >= .2"; 398 } else if (strcmp(name, "contested") == 0) { 399 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 400 mythresh = "thresh >= .05"; 401 } else if (strcmp(name, "datashare") == 0) { 402 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 403 mythresh = "thresh > .05"; 404 } else if (strcmp(name, "blockstorefwd") == 0) { 405 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 406 mythresh = "thresh >= .05"; 407 } else if (strcmp(name, "splitload") == 0) { 408 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 409 mythresh = "thresh >= .1"; 410 } else if (strcmp(name, "splitstore") == 0) { 411 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 412 mythresh = "thresh >= .01"; 413 } else if (strcmp(name, "aliasing_4k") == 0) { 414 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 415 mythresh = "thresh >= .1"; 416 } else if (strcmp(name, "dtlbmissload") == 0) { 417 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 418 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 419 mythresh = "thresh >= .1"; 420 } else if (strcmp(name, "br_miss") == 0) { 421 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 422 mythresh = "thresh >= .2"; 423 } else if (strcmp(name, "clears") == 0) { 424 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 425 printf(" MACHINE_CLEARS.SMC + \n"); 426 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 427 mythresh = "thresh >= .02"; 428 } else if (strcmp(name, "microassist") == 0) { 429 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 430 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 431 mythresh = "thresh >= .05"; 432 } else if (strcmp(name, "fpassist") == 0) { 433 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 434 mythresh = "look for a excessive value"; 435 } else if (strcmp(name, "otherassistavx") == 0) { 436 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 437 mythresh = "look for a excessive value"; 438 } else if (strcmp(name, "otherassistsse") == 0) { 439 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 440 mythresh = "look for a excessive value"; 441 } else { 442 printf("Unknown name:%s\n", name); 443 mythresh = "unknown entry"; 444 } 445 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 446 } 447 448 449 450 static struct counters * 451 find_counter(struct counters *base, const char *name) 452 { 453 struct counters *at; 454 int len; 455 456 at = base; 457 len = strlen(name); 458 while(at) { 459 if (strncmp(at->counter_name, name, len) == 0) { 460 return(at); 461 } 462 at = at->next_cpu; 463 } 464 printf("Can't find counter %s\n", name); 465 printf("We have:\n"); 466 at = base; 467 while(at) { 468 printf("- %s\n", at->counter_name); 469 at = at->next_cpu; 470 } 471 exit(-1); 472 } 473 474 static int 475 allocstall1(struct counters *cpu, int pos) 476 { 477 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 478 int ret; 479 struct counters *partial; 480 struct counters *unhalt; 481 double un, par, res; 482 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 483 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 484 if (pos != -1) { 485 par = partial->vals[pos] * 1.0; 486 un = unhalt->vals[pos] * 1.0; 487 } else { 488 par = partial->sum * 1.0; 489 un = unhalt->sum * 1.0; 490 } 491 res = par/un; 492 ret = printf("%1.3f", res); 493 return(ret); 494 } 495 496 static int 497 allocstall2(struct counters *cpu, int pos) 498 { 499 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 500 int ret; 501 struct counters *partial; 502 struct counters *unhalt; 503 double un, par, res; 504 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 505 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 506 if (pos != -1) { 507 par = partial->vals[pos] * 1.0; 508 un = unhalt->vals[pos] * 1.0; 509 } else { 510 par = partial->sum * 1.0; 511 un = unhalt->sum * 1.0; 512 } 513 res = par/un; 514 ret = printf("%1.3f", res); 515 return(ret); 516 } 517 518 static int 519 br_mispredict(struct counters *cpu, int pos) 520 { 521 struct counters *brctr; 522 struct counters *unhalt; 523 int ret; 524 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 525 double br, un, con, res; 526 con = 20.0; 527 528 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 529 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 530 if (pos != -1) { 531 br = brctr->vals[pos] * 1.0; 532 un = unhalt->vals[pos] * 1.0; 533 } else { 534 br = brctr->sum * 1.0; 535 un = unhalt->sum * 1.0; 536 } 537 res = (con * br)/un; 538 ret = printf("%1.3f", res); 539 return(ret); 540 } 541 542 static int 543 br_mispredictib(struct counters *cpu, int pos) 544 { 545 struct counters *brctr; 546 struct counters *unhalt; 547 struct counters *clear, *clear2, *clear3; 548 struct counters *uops; 549 struct counters *recv; 550 struct counters *iss; 551 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 552 int ret; 553 /* 554 * (BR_MISP_RETIRED.ALL_BRANCHES / 555 * (BR_MISP_RETIRED.ALL_BRANCHES + 556 * MACHINE_CLEAR.COUNT) * 557 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 558 * 559 */ 560 double br, cl, cl2, cl3, uo, re, un, con, res, is; 561 con = 4.0; 562 563 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 564 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 565 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 566 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 567 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 568 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 569 iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 570 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 571 if (pos != -1) { 572 br = brctr->vals[pos] * 1.0; 573 cl = clear->vals[pos] * 1.0; 574 cl2 = clear2->vals[pos] * 1.0; 575 cl3 = clear3->vals[pos] * 1.0; 576 uo = uops->vals[pos] * 1.0; 577 re = recv->vals[pos] * 1.0; 578 is = iss->vals[pos] * 1.0; 579 un = unhalt->vals[pos] * 1.0; 580 } else { 581 br = brctr->sum * 1.0; 582 cl = clear->sum * 1.0; 583 cl2 = clear2->sum * 1.0; 584 cl3 = clear3->sum * 1.0; 585 uo = uops->sum * 1.0; 586 re = recv->sum * 1.0; 587 is = iss->sum * 1.0; 588 un = unhalt->sum * 1.0; 589 } 590 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 591 ret = printf("%1.3f", res); 592 return(ret); 593 } 594 595 596 static int 597 br_mispredict_broad(struct counters *cpu, int pos) 598 { 599 struct counters *brctr; 600 struct counters *unhalt; 601 struct counters *clear; 602 struct counters *uops; 603 struct counters *uops_ret; 604 struct counters *recv; 605 int ret; 606 double br, cl, uo, uo_r, re, con, un, res; 607 608 con = 4.0; 609 610 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 611 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 612 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 613 uops = find_counter(cpu, "UOPS_ISSUED.ANY"); 614 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 615 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 616 617 if (pos != -1) { 618 un = unhalt->vals[pos] * 1.0; 619 br = brctr->vals[pos] * 1.0; 620 cl = clear->vals[pos] * 1.0; 621 uo = uops->vals[pos] * 1.0; 622 uo_r = uops_ret->vals[pos] * 1.0; 623 re = recv->vals[pos] * 1.0; 624 } else { 625 un = unhalt->sum * 1.0; 626 br = brctr->sum * 1.0; 627 cl = clear->sum * 1.0; 628 uo = uops->sum * 1.0; 629 uo_r = uops_ret->sum * 1.0; 630 re = recv->sum * 1.0; 631 } 632 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); 633 ret = printf("%1.3f", res); 634 return(ret); 635 } 636 637 static int 638 splitloadib(struct counters *cpu, int pos) 639 { 640 int ret; 641 struct counters *mem; 642 struct counters *l1d, *ldblock; 643 struct counters *unhalt; 644 double un, memd, res, l1, ldb; 645 /* 646 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 647 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 648 */ 649 650 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 651 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 652 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 653 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 654 if (pos != -1) { 655 memd = mem->vals[pos] * 1.0; 656 l1 = l1d->vals[pos] * 1.0; 657 ldb = ldblock->vals[pos] * 1.0; 658 un = unhalt->vals[pos] * 1.0; 659 } else { 660 memd = mem->sum * 1.0; 661 l1 = l1d->sum * 1.0; 662 ldb = ldblock->sum * 1.0; 663 un = unhalt->sum * 1.0; 664 } 665 res = ((l1 / memd) * ldb)/un; 666 ret = printf("%1.3f", res); 667 return(ret); 668 } 669 670 671 static int 672 splitload(struct counters *cpu, int pos) 673 { 674 int ret; 675 struct counters *mem; 676 struct counters *unhalt; 677 double con, un, memd, res; 678 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 679 680 con = 5.0; 681 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 682 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); 683 if (pos != -1) { 684 memd = mem->vals[pos] * 1.0; 685 un = unhalt->vals[pos] * 1.0; 686 } else { 687 memd = mem->sum * 1.0; 688 un = unhalt->sum * 1.0; 689 } 690 res = (memd * con)/un; 691 ret = printf("%1.3f", res); 692 return(ret); 693 } 694 695 696 static int 697 splitload_sb(struct counters *cpu, int pos) 698 { 699 int ret; 700 struct counters *mem; 701 struct counters *unhalt; 702 double con, un, memd, res; 703 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 704 705 con = 5.0; 706 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 707 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 708 if (pos != -1) { 709 memd = mem->vals[pos] * 1.0; 710 un = unhalt->vals[pos] * 1.0; 711 } else { 712 memd = mem->sum * 1.0; 713 un = unhalt->sum * 1.0; 714 } 715 res = (memd * con)/un; 716 ret = printf("%1.3f", res); 717 return(ret); 718 } 719 720 721 static int 722 splitstore_sb(struct counters *cpu, int pos) 723 { 724 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 725 int ret; 726 struct counters *mem_split; 727 struct counters *mem_stores; 728 double memsplit, memstore, res; 729 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 730 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 731 if (pos != -1) { 732 memsplit = mem_split->vals[pos] * 1.0; 733 memstore = mem_stores->vals[pos] * 1.0; 734 } else { 735 memsplit = mem_split->sum * 1.0; 736 memstore = mem_stores->sum * 1.0; 737 } 738 res = memsplit/memstore; 739 ret = printf("%1.3f", res); 740 return(ret); 741 } 742 743 744 745 static int 746 splitstore(struct counters *cpu, int pos) 747 { 748 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ 749 int ret; 750 struct counters *mem_split; 751 struct counters *mem_stores; 752 double memsplit, memstore, res; 753 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); 754 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); 755 if (pos != -1) { 756 memsplit = mem_split->vals[pos] * 1.0; 757 memstore = mem_stores->vals[pos] * 1.0; 758 } else { 759 memsplit = mem_split->sum * 1.0; 760 memstore = mem_stores->sum * 1.0; 761 } 762 res = memsplit/memstore; 763 ret = printf("%1.3f", res); 764 return(ret); 765 } 766 767 768 static int 769 contested(struct counters *cpu, int pos) 770 { 771 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 772 int ret; 773 struct counters *mem; 774 struct counters *unhalt; 775 double con, un, memd, res; 776 777 con = 60.0; 778 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 779 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 780 if (pos != -1) { 781 memd = mem->vals[pos] * 1.0; 782 un = unhalt->vals[pos] * 1.0; 783 } else { 784 memd = mem->sum * 1.0; 785 un = unhalt->sum * 1.0; 786 } 787 res = (memd * con)/un; 788 ret = printf("%1.3f", res); 789 return(ret); 790 } 791 792 static int 793 contested_has(struct counters *cpu, int pos) 794 { 795 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 796 int ret; 797 struct counters *mem; 798 struct counters *unhalt; 799 double con, un, memd, res; 800 801 con = 84.0; 802 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 803 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 804 if (pos != -1) { 805 memd = mem->vals[pos] * 1.0; 806 un = unhalt->vals[pos] * 1.0; 807 } else { 808 memd = mem->sum * 1.0; 809 un = unhalt->sum * 1.0; 810 } 811 res = (memd * con)/un; 812 ret = printf("%1.3f", res); 813 return(ret); 814 } 815 816 static int 817 contestedbroad(struct counters *cpu, int pos) 818 { 819 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 820 int ret; 821 struct counters *mem; 822 struct counters *mem2; 823 struct counters *unhalt; 824 double con, un, memd, memtoo, res; 825 826 con = 84.0; 827 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 828 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 829 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); 830 831 if (pos != -1) { 832 memd = mem->vals[pos] * 1.0; 833 memtoo = mem2->vals[pos] * 1.0; 834 un = unhalt->vals[pos] * 1.0; 835 } else { 836 memd = mem->sum * 1.0; 837 memtoo = mem2->sum * 1.0; 838 un = unhalt->sum * 1.0; 839 } 840 res = ((memd * con) + memtoo)/un; 841 ret = printf("%1.3f", res); 842 return(ret); 843 } 844 845 846 static int 847 blockstoreforward(struct counters *cpu, int pos) 848 { 849 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 850 int ret; 851 struct counters *ldb; 852 struct counters *unhalt; 853 double con, un, ld, res; 854 855 con = 13.0; 856 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 857 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 858 if (pos != -1) { 859 ld = ldb->vals[pos] * 1.0; 860 un = unhalt->vals[pos] * 1.0; 861 } else { 862 ld = ldb->sum * 1.0; 863 un = unhalt->sum * 1.0; 864 } 865 res = (ld * con)/un; 866 ret = printf("%1.3f", res); 867 return(ret); 868 } 869 870 static int 871 cache2(struct counters *cpu, int pos) 872 { 873 /* ** Suspect *** 874 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 875 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 876 */ 877 int ret; 878 struct counters *mem1, *mem2, *mem3; 879 struct counters *unhalt; 880 double con1, con2, con3, un, me_1, me_2, me_3, res; 881 882 con1 = 26.0; 883 con2 = 43.0; 884 con3 = 60.0; 885 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 886 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 887 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 888 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 889 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 890 if (pos != -1) { 891 me_1 = mem1->vals[pos] * 1.0; 892 me_2 = mem2->vals[pos] * 1.0; 893 me_3 = mem3->vals[pos] * 1.0; 894 un = unhalt->vals[pos] * 1.0; 895 } else { 896 me_1 = mem1->sum * 1.0; 897 me_2 = mem2->sum * 1.0; 898 me_3 = mem3->sum * 1.0; 899 un = unhalt->sum * 1.0; 900 } 901 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 902 ret = printf("%1.3f", res); 903 return(ret); 904 } 905 906 static int 907 datasharing(struct counters *cpu, int pos) 908 { 909 /* 910 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 911 */ 912 int ret; 913 struct counters *mem; 914 struct counters *unhalt; 915 double con, res, me, un; 916 917 con = 43.0; 918 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 919 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 920 if (pos != -1) { 921 me = mem->vals[pos] * 1.0; 922 un = unhalt->vals[pos] * 1.0; 923 } else { 924 me = mem->sum * 1.0; 925 un = unhalt->sum * 1.0; 926 } 927 res = (me * con)/un; 928 ret = printf("%1.3f", res); 929 return(ret); 930 931 } 932 933 934 static int 935 datasharing_has(struct counters *cpu, int pos) 936 { 937 /* 938 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 939 */ 940 int ret; 941 struct counters *mem; 942 struct counters *unhalt; 943 double con, res, me, un; 944 945 con = 72.0; 946 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 947 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 948 if (pos != -1) { 949 me = mem->vals[pos] * 1.0; 950 un = unhalt->vals[pos] * 1.0; 951 } else { 952 me = mem->sum * 1.0; 953 un = unhalt->sum * 1.0; 954 } 955 res = (me * con)/un; 956 ret = printf("%1.3f", res); 957 return(ret); 958 959 } 960 961 962 static int 963 cache2ib(struct counters *cpu, int pos) 964 { 965 /* 966 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 967 */ 968 int ret; 969 struct counters *mem; 970 struct counters *unhalt; 971 double con, un, me, res; 972 973 con = 29.0; 974 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 975 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 976 if (pos != -1) { 977 me = mem->vals[pos] * 1.0; 978 un = unhalt->vals[pos] * 1.0; 979 } else { 980 me = mem->sum * 1.0; 981 un = unhalt->sum * 1.0; 982 } 983 res = (con * me)/un; 984 ret = printf("%1.3f", res); 985 return(ret); 986 } 987 988 static int 989 cache2has(struct counters *cpu, int pos) 990 { 991 /* 992 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 993 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 994 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 995 * / CPU_CLK_UNHALTED.THREAD_P 996 */ 997 int ret; 998 struct counters *mem1, *mem2, *mem3; 999 struct counters *unhalt; 1000 double con1, con2, con3, un, me1, me2, me3, res; 1001 1002 con1 = 36.0; 1003 con2 = 72.0; 1004 con3 = 84.0; 1005 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1006 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 1007 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 1008 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 1009 if (pos != -1) { 1010 me1 = mem1->vals[pos] * 1.0; 1011 me2 = mem2->vals[pos] * 1.0; 1012 me3 = mem3->vals[pos] * 1.0; 1013 un = unhalt->vals[pos] * 1.0; 1014 } else { 1015 me1 = mem1->sum * 1.0; 1016 me2 = mem2->sum * 1.0; 1017 me3 = mem3->sum * 1.0; 1018 un = unhalt->sum * 1.0; 1019 } 1020 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 1021 ret = printf("%1.3f", res); 1022 return(ret); 1023 } 1024 1025 1026 static int 1027 cache2broad(struct counters *cpu, int pos) 1028 { 1029 /* 1030 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 1031 */ 1032 int ret; 1033 struct counters *mem; 1034 struct counters *unhalt; 1035 double con, un, me, res; 1036 1037 con = 36.0; 1038 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1039 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); 1040 if (pos != -1) { 1041 me = mem->vals[pos] * 1.0; 1042 un = unhalt->vals[pos] * 1.0; 1043 } else { 1044 me = mem->sum * 1.0; 1045 un = unhalt->sum * 1.0; 1046 } 1047 res = (con * me)/un; 1048 ret = printf("%1.3f", res); 1049 return(ret); 1050 } 1051 1052 1053 static int 1054 cache1(struct counters *cpu, int pos) 1055 { 1056 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1057 int ret; 1058 struct counters *mem; 1059 struct counters *unhalt; 1060 double con, un, me, res; 1061 1062 con = 180.0; 1063 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1064 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 1065 if (pos != -1) { 1066 me = mem->vals[pos] * 1.0; 1067 un = unhalt->vals[pos] * 1.0; 1068 } else { 1069 me = mem->sum * 1.0; 1070 un = unhalt->sum * 1.0; 1071 } 1072 res = (me * con)/un; 1073 ret = printf("%1.3f", res); 1074 return(ret); 1075 } 1076 1077 static int 1078 cache1ib(struct counters *cpu, int pos) 1079 { 1080 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1081 int ret; 1082 struct counters *mem; 1083 struct counters *unhalt; 1084 double con, un, me, res; 1085 1086 con = 180.0; 1087 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1088 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 1089 if (pos != -1) { 1090 me = mem->vals[pos] * 1.0; 1091 un = unhalt->vals[pos] * 1.0; 1092 } else { 1093 me = mem->sum * 1.0; 1094 un = unhalt->sum * 1.0; 1095 } 1096 res = (me * con)/un; 1097 ret = printf("%1.3f", res); 1098 return(ret); 1099 } 1100 1101 1102 static int 1103 cache1broad(struct counters *cpu, int pos) 1104 { 1105 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1106 int ret; 1107 struct counters *mem; 1108 struct counters *unhalt; 1109 double con, un, me, res; 1110 1111 con = 180.0; 1112 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1113 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); 1114 if (pos != -1) { 1115 me = mem->vals[pos] * 1.0; 1116 un = unhalt->vals[pos] * 1.0; 1117 } else { 1118 me = mem->sum * 1.0; 1119 un = unhalt->sum * 1.0; 1120 } 1121 res = (me * con)/un; 1122 ret = printf("%1.3f", res); 1123 return(ret); 1124 } 1125 1126 1127 static int 1128 dtlb_missload(struct counters *cpu, int pos) 1129 { 1130 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 1131 int ret; 1132 struct counters *dtlb_m, *dtlb_d; 1133 struct counters *unhalt; 1134 double con, un, d1, d2, res; 1135 1136 con = 7.0; 1137 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1138 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 1139 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 1140 if (pos != -1) { 1141 d1 = dtlb_m->vals[pos] * 1.0; 1142 d2 = dtlb_d->vals[pos] * 1.0; 1143 un = unhalt->vals[pos] * 1.0; 1144 } else { 1145 d1 = dtlb_m->sum * 1.0; 1146 d2 = dtlb_d->sum * 1.0; 1147 un = unhalt->sum * 1.0; 1148 } 1149 res = ((d1 * con) + d2)/un; 1150 ret = printf("%1.3f", res); 1151 return(ret); 1152 } 1153 1154 static int 1155 dtlb_missstore(struct counters *cpu, int pos) 1156 { 1157 /* 1158 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 1159 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 1160 */ 1161 int ret; 1162 struct counters *dtsb_m, *dtsb_d; 1163 struct counters *unhalt; 1164 double con, un, d1, d2, res; 1165 1166 con = 7.0; 1167 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1168 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 1169 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 1170 if (pos != -1) { 1171 d1 = dtsb_m->vals[pos] * 1.0; 1172 d2 = dtsb_d->vals[pos] * 1.0; 1173 un = unhalt->vals[pos] * 1.0; 1174 } else { 1175 d1 = dtsb_m->sum * 1.0; 1176 d2 = dtsb_d->sum * 1.0; 1177 un = unhalt->sum * 1.0; 1178 } 1179 res = ((d1 * con) + d2)/un; 1180 ret = printf("%1.3f", res); 1181 return(ret); 1182 } 1183 1184 static int 1185 itlb_miss(struct counters *cpu, int pos) 1186 { 1187 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1188 int ret; 1189 struct counters *itlb; 1190 struct counters *unhalt; 1191 double un, d1, res; 1192 1193 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1194 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1195 if (pos != -1) { 1196 d1 = itlb->vals[pos] * 1.0; 1197 un = unhalt->vals[pos] * 1.0; 1198 } else { 1199 d1 = itlb->sum * 1.0; 1200 un = unhalt->sum * 1.0; 1201 } 1202 res = d1/un; 1203 ret = printf("%1.3f", res); 1204 return(ret); 1205 } 1206 1207 1208 static int 1209 itlb_miss_broad(struct counters *cpu, int pos) 1210 { 1211 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ 1212 int ret; 1213 struct counters *itlb; 1214 struct counters *unhalt; 1215 struct counters *four_k; 1216 double un, d1, res, k; 1217 1218 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1219 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1220 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); 1221 if (pos != -1) { 1222 d1 = itlb->vals[pos] * 1.0; 1223 un = unhalt->vals[pos] * 1.0; 1224 k = four_k->vals[pos] * 1.0; 1225 } else { 1226 d1 = itlb->sum * 1.0; 1227 un = unhalt->sum * 1.0; 1228 k = four_k->sum * 1.0; 1229 } 1230 res = (7.0 * k + d1)/un; 1231 ret = printf("%1.3f", res); 1232 return(ret); 1233 } 1234 1235 1236 static int 1237 icache_miss(struct counters *cpu, int pos) 1238 { 1239 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1240 1241 int ret; 1242 struct counters *itlb, *icache; 1243 struct counters *unhalt; 1244 double un, d1, ic, res; 1245 1246 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1247 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1248 icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1249 if (pos != -1) { 1250 d1 = itlb->vals[pos] * 1.0; 1251 ic = icache->vals[pos] * 1.0; 1252 un = unhalt->vals[pos] * 1.0; 1253 } else { 1254 d1 = itlb->sum * 1.0; 1255 ic = icache->sum * 1.0; 1256 un = unhalt->sum * 1.0; 1257 } 1258 res = (ic-d1)/un; 1259 ret = printf("%1.3f", res); 1260 return(ret); 1261 1262 } 1263 1264 static int 1265 icache_miss_has(struct counters *cpu, int pos) 1266 { 1267 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1268 1269 int ret; 1270 struct counters *icache; 1271 struct counters *unhalt; 1272 double un, con, ic, res; 1273 1274 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1275 icache = find_counter(cpu, "ICACHE.MISSES"); 1276 con = 36.0; 1277 if (pos != -1) { 1278 ic = icache->vals[pos] * 1.0; 1279 un = unhalt->vals[pos] * 1.0; 1280 } else { 1281 ic = icache->sum * 1.0; 1282 un = unhalt->sum * 1.0; 1283 } 1284 res = (con * ic)/un; 1285 ret = printf("%1.3f", res); 1286 return(ret); 1287 1288 } 1289 1290 static int 1291 lcp_stall(struct counters *cpu, int pos) 1292 { 1293 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1294 int ret; 1295 struct counters *ild; 1296 struct counters *unhalt; 1297 double un, d1, res; 1298 1299 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1300 ild = find_counter(cpu, "ILD_STALL.LCP"); 1301 if (pos != -1) { 1302 d1 = ild->vals[pos] * 1.0; 1303 un = unhalt->vals[pos] * 1.0; 1304 } else { 1305 d1 = ild->sum * 1.0; 1306 un = unhalt->sum * 1.0; 1307 } 1308 res = d1/un; 1309 ret = printf("%1.3f", res); 1310 return(ret); 1311 1312 } 1313 1314 1315 static int 1316 frontendstall(struct counters *cpu, int pos) 1317 { 1318 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1319 int ret; 1320 struct counters *idq; 1321 struct counters *unhalt; 1322 double con, un, id, res; 1323 1324 con = 4.0; 1325 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1326 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1327 if (pos != -1) { 1328 id = idq->vals[pos] * 1.0; 1329 un = unhalt->vals[pos] * 1.0; 1330 } else { 1331 id = idq->sum * 1.0; 1332 un = unhalt->sum * 1.0; 1333 } 1334 res = id/(un * con); 1335 ret = printf("%1.3f", res); 1336 return(ret); 1337 } 1338 1339 static int 1340 clears(struct counters *cpu, int pos) 1341 { 1342 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1343 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1344 1345 int ret; 1346 struct counters *clr1, *clr2, *clr3; 1347 struct counters *unhalt; 1348 double con, un, cl1, cl2, cl3, res; 1349 1350 con = 100.0; 1351 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1352 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1353 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1354 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1355 1356 if (pos != -1) { 1357 cl1 = clr1->vals[pos] * 1.0; 1358 cl2 = clr2->vals[pos] * 1.0; 1359 cl3 = clr3->vals[pos] * 1.0; 1360 un = unhalt->vals[pos] * 1.0; 1361 } else { 1362 cl1 = clr1->sum * 1.0; 1363 cl2 = clr2->sum * 1.0; 1364 cl3 = clr3->sum * 1.0; 1365 un = unhalt->sum * 1.0; 1366 } 1367 res = ((cl1 + cl2 + cl3) * con)/un; 1368 ret = printf("%1.3f", res); 1369 return(ret); 1370 } 1371 1372 1373 1374 static int 1375 clears_broad(struct counters *cpu, int pos) 1376 { 1377 int ret; 1378 struct counters *clr1, *clr2, *clr3, *cyc; 1379 struct counters *unhalt; 1380 double con, un, cl1, cl2, cl3, cy, res; 1381 1382 con = 100.0; 1383 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1384 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1385 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1386 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1387 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 1388 if (pos != -1) { 1389 cl1 = clr1->vals[pos] * 1.0; 1390 cl2 = clr2->vals[pos] * 1.0; 1391 cl3 = clr3->vals[pos] * 1.0; 1392 cy = cyc->vals[pos] * 1.0; 1393 un = unhalt->vals[pos] * 1.0; 1394 } else { 1395 cl1 = clr1->sum * 1.0; 1396 cl2 = clr2->sum * 1.0; 1397 cl3 = clr3->sum * 1.0; 1398 cy = cyc->sum * 1.0; 1399 un = unhalt->sum * 1.0; 1400 } 1401 /* Formula not listed but extrapulated to add the cy ?? */ 1402 res = ((cl1 + cl2 + cl3 + cy) * con)/un; 1403 ret = printf("%1.3f", res); 1404 return(ret); 1405 } 1406 1407 1408 1409 1410 1411 static int 1412 microassist(struct counters *cpu, int pos) 1413 { 1414 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1415 int ret; 1416 struct counters *idq; 1417 struct counters *unhalt; 1418 double un, id, res, con; 1419 1420 con = 4.0; 1421 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1422 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1423 if (pos != -1) { 1424 id = idq->vals[pos] * 1.0; 1425 un = unhalt->vals[pos] * 1.0; 1426 } else { 1427 id = idq->sum * 1.0; 1428 un = unhalt->sum * 1.0; 1429 } 1430 res = id/(un * con); 1431 ret = printf("%1.3f", res); 1432 return(ret); 1433 } 1434 1435 1436 static int 1437 microassist_broad(struct counters *cpu, int pos) 1438 { 1439 int ret; 1440 struct counters *idq; 1441 struct counters *unhalt; 1442 struct counters *uopiss; 1443 struct counters *uopret; 1444 double un, id, res, con, uoi, uor; 1445 1446 con = 4.0; 1447 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1448 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1449 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); 1450 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1451 if (pos != -1) { 1452 id = idq->vals[pos] * 1.0; 1453 un = unhalt->vals[pos] * 1.0; 1454 uoi = uopiss->vals[pos] * 1.0; 1455 uor = uopret->vals[pos] * 1.0; 1456 } else { 1457 id = idq->sum * 1.0; 1458 un = unhalt->sum * 1.0; 1459 uoi = uopiss->sum * 1.0; 1460 uor = uopret->sum * 1.0; 1461 } 1462 res = (uor/uoi) * (id/(un * con)); 1463 ret = printf("%1.3f", res); 1464 return(ret); 1465 } 1466 1467 1468 static int 1469 aliasing(struct counters *cpu, int pos) 1470 { 1471 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1472 int ret; 1473 struct counters *ld; 1474 struct counters *unhalt; 1475 double un, lds, con, res; 1476 1477 con = 5.0; 1478 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1479 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1480 if (pos != -1) { 1481 lds = ld->vals[pos] * 1.0; 1482 un = unhalt->vals[pos] * 1.0; 1483 } else { 1484 lds = ld->sum * 1.0; 1485 un = unhalt->sum * 1.0; 1486 } 1487 res = (lds * con)/un; 1488 ret = printf("%1.3f", res); 1489 return(ret); 1490 } 1491 1492 static int 1493 aliasing_broad(struct counters *cpu, int pos) 1494 { 1495 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1496 int ret; 1497 struct counters *ld; 1498 struct counters *unhalt; 1499 double un, lds, con, res; 1500 1501 con = 7.0; 1502 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1503 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1504 if (pos != -1) { 1505 lds = ld->vals[pos] * 1.0; 1506 un = unhalt->vals[pos] * 1.0; 1507 } else { 1508 lds = ld->sum * 1.0; 1509 un = unhalt->sum * 1.0; 1510 } 1511 res = (lds * con)/un; 1512 ret = printf("%1.3f", res); 1513 return(ret); 1514 } 1515 1516 1517 static int 1518 fpassists(struct counters *cpu, int pos) 1519 { 1520 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1521 int ret; 1522 struct counters *fp; 1523 struct counters *inst; 1524 double un, fpd, res; 1525 1526 inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1527 fp = find_counter(cpu, "FP_ASSIST.ANY"); 1528 if (pos != -1) { 1529 fpd = fp->vals[pos] * 1.0; 1530 un = inst->vals[pos] * 1.0; 1531 } else { 1532 fpd = fp->sum * 1.0; 1533 un = inst->sum * 1.0; 1534 } 1535 res = fpd/un; 1536 ret = printf("%1.3f", res); 1537 return(ret); 1538 } 1539 1540 static int 1541 otherassistavx(struct counters *cpu, int pos) 1542 { 1543 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1544 int ret; 1545 struct counters *oth; 1546 struct counters *unhalt; 1547 double un, ot, con, res; 1548 1549 con = 75.0; 1550 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1551 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1552 if (pos != -1) { 1553 ot = oth->vals[pos] * 1.0; 1554 un = unhalt->vals[pos] * 1.0; 1555 } else { 1556 ot = oth->sum * 1.0; 1557 un = unhalt->sum * 1.0; 1558 } 1559 res = (ot * con)/un; 1560 ret = printf("%1.3f", res); 1561 return(ret); 1562 } 1563 1564 static int 1565 otherassistsse(struct counters *cpu, int pos) 1566 { 1567 1568 int ret; 1569 struct counters *oth; 1570 struct counters *unhalt; 1571 double un, ot, con, res; 1572 1573 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1574 con = 75.0; 1575 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1576 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1577 if (pos != -1) { 1578 ot = oth->vals[pos] * 1.0; 1579 un = unhalt->vals[pos] * 1.0; 1580 } else { 1581 ot = oth->sum * 1.0; 1582 un = unhalt->sum * 1.0; 1583 } 1584 res = (ot * con)/un; 1585 ret = printf("%1.3f", res); 1586 return(ret); 1587 } 1588 1589 static int 1590 efficiency1(struct counters *cpu, int pos) 1591 { 1592 1593 int ret; 1594 struct counters *uops; 1595 struct counters *unhalt; 1596 double un, ot, con, res; 1597 1598 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1599 con = 4.0; 1600 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1601 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1602 if (pos != -1) { 1603 ot = uops->vals[pos] * 1.0; 1604 un = unhalt->vals[pos] * 1.0; 1605 } else { 1606 ot = uops->sum * 1.0; 1607 un = unhalt->sum * 1.0; 1608 } 1609 res = ot/(con * un); 1610 ret = printf("%1.3f", res); 1611 return(ret); 1612 } 1613 1614 static int 1615 efficiency2(struct counters *cpu, int pos) 1616 { 1617 1618 int ret; 1619 struct counters *uops; 1620 struct counters *unhalt; 1621 double un, ot, res; 1622 1623 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1624 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1625 uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1626 if (pos != -1) { 1627 ot = uops->vals[pos] * 1.0; 1628 un = unhalt->vals[pos] * 1.0; 1629 } else { 1630 ot = uops->sum * 1.0; 1631 un = unhalt->sum * 1.0; 1632 } 1633 res = un/ot; 1634 ret = printf("%1.3f", res); 1635 return(ret); 1636 } 1637 1638 #define SANDY_BRIDGE_COUNT 20 1639 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1640 /*01*/ { "allocstall1", "thresh > .05", 1641 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1642 allocstall1, 2 }, 1643 /* -- not defined for SB right (partial-rat_stalls) 02*/ 1644 { "allocstall2", "thresh > .05", 1645 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", 1646 allocstall2, 2 }, 1647 /*03*/ { "br_miss", "thresh >= .2", 1648 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1649 br_mispredict, 2 }, 1650 /*04*/ { "splitload", "thresh >= .1", 1651 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1652 splitload_sb, 2 }, 1653 /* 05*/ { "splitstore", "thresh >= .01", 1654 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1655 splitstore_sb, 2 }, 1656 /*06*/ { "contested", "thresh >= .05", 1657 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1658 contested, 2 }, 1659 /*07*/ { "blockstorefwd", "thresh >= .05", 1660 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1661 blockstoreforward, 2 }, 1662 /*08*/ { "cache2", "thresh >= .2", 1663 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1664 cache2, 4 }, 1665 /*09*/ { "cache1", "thresh >= .2", 1666 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1667 cache1, 2 }, 1668 /*10*/ { "dtlbmissload", "thresh >= .1", 1669 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1670 dtlb_missload, 3 }, 1671 /*11*/ { "dtlbmissstore", "thresh >= .05", 1672 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1673 dtlb_missstore, 3 }, 1674 /*12*/ { "frontendstall", "thresh >= .15", 1675 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1676 frontendstall, 2 }, 1677 /*13*/ { "clears", "thresh >= .02", 1678 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1679 clears, 4 }, 1680 /*14*/ { "microassist", "thresh >= .05", 1681 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1682 microassist, 2 }, 1683 /*15*/ { "aliasing_4k", "thresh >= .1", 1684 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1685 aliasing, 2 }, 1686 /*16*/ { "fpassist", "look for a excessive value", 1687 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1688 fpassists, 2 }, 1689 /*17*/ { "otherassistavx", "look for a excessive value", 1690 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1691 otherassistavx, 2}, 1692 /*18*/ { "otherassistsse", "look for a excessive value", 1693 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1694 otherassistsse, 2 }, 1695 /*19*/ { "eff1", "thresh < .9", 1696 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1697 efficiency1, 2 }, 1698 /*20*/ { "eff2", "thresh > 1.0", 1699 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1700 efficiency2, 2 }, 1701 }; 1702 1703 1704 #define IVY_BRIDGE_COUNT 21 1705 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1706 /*1*/ { "eff1", "thresh < .75", 1707 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1708 efficiency1, 2 }, 1709 /*2*/ { "eff2", "thresh > 1.0", 1710 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1711 efficiency2, 2 }, 1712 /*3*/ { "itlbmiss", "thresh > .05", 1713 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1714 itlb_miss, 2 }, 1715 /*4*/ { "icachemiss", "thresh > .05", 1716 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1717 icache_miss, 3 }, 1718 /*5*/ { "lcpstall", "thresh > .05", 1719 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1720 lcp_stall, 2 }, 1721 /*6*/ { "cache1", "thresh >= .2", 1722 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1723 cache1ib, 2 }, 1724 /*7*/ { "cache2", "thresh >= .2", 1725 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1726 cache2ib, 2 }, 1727 /*8*/ { "contested", "thresh >= .05", 1728 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1729 contested, 2 }, 1730 /*9*/ { "datashare", "thresh >= .05", 1731 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1732 datasharing, 2 }, 1733 /*10*/ { "blockstorefwd", "thresh >= .05", 1734 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1735 blockstoreforward, 2 }, 1736 /*11*/ { "splitload", "thresh >= .1", 1737 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1738 splitloadib, 4 }, 1739 /*12*/ { "splitstore", "thresh >= .01", 1740 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1741 splitstore, 2 }, 1742 /*13*/ { "aliasing_4k", "thresh >= .1", 1743 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1744 aliasing, 2 }, 1745 /*14*/ { "dtlbmissload", "thresh >= .1", 1746 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1747 dtlb_missload , 3}, 1748 /*15*/ { "dtlbmissstore", "thresh >= .05", 1749 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1750 dtlb_missstore, 3 }, 1751 /*16*/ { "br_miss", "thresh >= .2", 1752 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1753 br_mispredictib, 8 }, 1754 /*17*/ { "clears", "thresh >= .02", 1755 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1756 clears, 4 }, 1757 /*18*/ { "microassist", "thresh >= .05", 1758 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1759 microassist, 2 }, 1760 /*19*/ { "fpassist", "look for a excessive value", 1761 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1762 fpassists, 2 }, 1763 /*20*/ { "otherassistavx", "look for a excessive value", 1764 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1765 otherassistavx , 2}, 1766 /*21*/ { "otherassistsse", "look for a excessive value", 1767 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1768 otherassistsse, 2 }, 1769 }; 1770 1771 #define HASWELL_COUNT 20 1772 static struct cpu_entry haswell[HASWELL_COUNT] = { 1773 /*1*/ { "eff1", "thresh < .75", 1774 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1775 efficiency1, 2 }, 1776 /*2*/ { "eff2", "thresh > 1.0", 1777 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1778 efficiency2, 2 }, 1779 /*3*/ { "itlbmiss", "thresh > .05", 1780 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1781 itlb_miss, 2 }, 1782 /*4*/ { "icachemiss", "thresh > .05", 1783 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1784 icache_miss_has, 2 }, 1785 /*5*/ { "lcpstall", "thresh > .05", 1786 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1787 lcp_stall, 2 }, 1788 /*6*/ { "cache1", "thresh >= .2", 1789 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1790 cache1ib, 2 }, 1791 /*7*/ { "cache2", "thresh >= .2", 1792 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1793 cache2has, 4 }, 1794 /*8*/ { "contested", "thresh >= .05", 1795 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1796 contested_has, 2 }, 1797 /*9*/ { "datashare", "thresh >= .05", 1798 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1799 datasharing_has, 2 }, 1800 /*10*/ { "blockstorefwd", "thresh >= .05", 1801 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1802 blockstoreforward, 2 }, 1803 /*11*/ { "splitload", "thresh >= .1", 1804 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", 1805 splitload , 2}, 1806 /*12*/ { "splitstore", "thresh >= .01", 1807 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1808 splitstore, 2 }, 1809 /*13*/ { "aliasing_4k", "thresh >= .1", 1810 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1811 aliasing, 2 }, 1812 /*14*/ { "dtlbmissload", "thresh >= .1", 1813 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1814 dtlb_missload, 3 }, 1815 /*15*/ { "br_miss", "thresh >= .2", 1816 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1817 br_mispredict, 2 }, 1818 /*16*/ { "clears", "thresh >= .02", 1819 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1820 clears, 4 }, 1821 /*17*/ { "microassist", "thresh >= .05", 1822 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1823 microassist, 2 }, 1824 /*18*/ { "fpassist", "look for a excessive value", 1825 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1826 fpassists, 2 }, 1827 /*19*/ { "otherassistavx", "look for a excessive value", 1828 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1829 otherassistavx, 2 }, 1830 /*20*/ { "otherassistsse", "look for a excessive value", 1831 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1832 otherassistsse, 2 }, 1833 }; 1834 1835 1836 static void 1837 explain_name_broad(const char *name) 1838 { 1839 const char *mythresh; 1840 if (strcmp(name, "eff1") == 0) { 1841 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 1842 mythresh = "thresh < .75"; 1843 } else if (strcmp(name, "eff2") == 0) { 1844 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 1845 mythresh = "thresh > 1.0"; 1846 } else if (strcmp(name, "itlbmiss") == 0) { 1847 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1848 mythresh = "thresh > .05"; 1849 } else if (strcmp(name, "icachemiss") == 0) { 1850 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); 1851 mythresh = "thresh > .05"; 1852 } else if (strcmp(name, "lcpstall") == 0) { 1853 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 1854 mythresh = "thresh > .05"; 1855 } else if (strcmp(name, "cache1") == 0) { 1856 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 1857 mythresh = "thresh >= .1"; 1858 } else if (strcmp(name, "cache2") == 0) { 1859 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); 1860 mythresh = "thresh >= .2"; 1861 } else if (strcmp(name, "contested") == 0) { 1862 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1863 mythresh = "thresh >= .05"; 1864 } else if (strcmp(name, "datashare") == 0) { 1865 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 1866 mythresh = "thresh > .05"; 1867 } else if (strcmp(name, "blockstorefwd") == 0) { 1868 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 1869 mythresh = "thresh >= .05"; 1870 } else if (strcmp(name, "aliasing_4k") == 0) { 1871 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); 1872 mythresh = "thresh >= .1"; 1873 } else if (strcmp(name, "dtlbmissload") == 0) { 1874 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 1875 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 1876 mythresh = "thresh >= .1"; 1877 1878 } else if (strcmp(name, "br_miss") == 0) { 1879 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); 1880 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); 1881 printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); 1882 mythresh = "thresh >= .2"; 1883 } else if (strcmp(name, "clears") == 0) { 1884 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 1885 printf(" MACHINE_CLEARS.SMC + \n"); 1886 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 1887 mythresh = "thresh >= .02"; 1888 } else if (strcmp(name, "fpassist") == 0) { 1889 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 1890 mythresh = "look for a excessive value"; 1891 } else if (strcmp(name, "otherassistavx") == 0) { 1892 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 1893 mythresh = "look for a excessive value"; 1894 } else if (strcmp(name, "microassist") == 0) { 1895 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 1896 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 1897 mythresh = "thresh >= .05"; 1898 } else { 1899 printf("Unknown name:%s\n", name); 1900 mythresh = "unknown entry"; 1901 } 1902 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 1903 } 1904 1905 1906 #define BROADWELL_COUNT 17 1907 static struct cpu_entry broadwell[BROADWELL_COUNT] = { 1908 /*1*/ { "eff1", "thresh < .75", 1909 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1910 efficiency1, 2 }, 1911 /*2*/ { "eff2", "thresh > 1.0", 1912 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1913 efficiency2, 2 }, 1914 /*3*/ { "itlbmiss", "thresh > .05", 1915 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", 1916 itlb_miss_broad, 3 }, 1917 /*4*/ { "icachemiss", "thresh > .05", 1918 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1919 icache_miss_has, 2 }, 1920 /*5*/ { "lcpstall", "thresh > .05", 1921 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1922 lcp_stall, 2 }, 1923 /*6*/ { "cache1", "thresh >= .1", 1924 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1925 cache1broad, 2 }, 1926 /*7*/ { "cache2", "thresh >= .2", 1927 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1928 cache2broad, 2 }, 1929 /*8*/ { "contested", "thresh >= .05", 1930 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", 1931 contestedbroad, 2 }, 1932 /*9*/ { "datashare", "thresh >= .05", 1933 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1934 datasharing_has, 2 }, 1935 /*10*/ { "blockstorefwd", "thresh >= .05", 1936 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1937 blockstoreforward, 2 }, 1938 /*11*/ { "aliasing_4k", "thresh >= .1", 1939 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1940 aliasing_broad, 2 }, 1941 /*12*/ { "dtlbmissload", "thresh >= .1", 1942 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1943 dtlb_missload, 3 }, 1944 /*13*/ { "br_miss", "thresh >= .2", 1945 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1946 br_mispredict_broad, 7 }, 1947 /*14*/ { "clears", "thresh >= .02", 1948 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1949 clears_broad, 5 }, 1950 /*15*/ { "fpassist", "look for a excessive value", 1951 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1952 fpassists, 2 }, 1953 /*16*/ { "otherassistavx", "look for a excessive value", 1954 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1955 otherassistavx, 2 }, 1956 /*17*/ { "microassist", "thresh >= .2", 1957 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", 1958 microassist_broad, 4 }, 1959 }; 1960 1961 1962 static void 1963 set_sandybridge(void) 1964 { 1965 strcpy(the_cpu.cputype, "SandyBridge PMC"); 1966 the_cpu.number = SANDY_BRIDGE_COUNT; 1967 the_cpu.ents = sandy_bridge; 1968 the_cpu.explain = explain_name_sb; 1969 } 1970 1971 static void 1972 set_ivybridge(void) 1973 { 1974 strcpy(the_cpu.cputype, "IvyBridge PMC"); 1975 the_cpu.number = IVY_BRIDGE_COUNT; 1976 the_cpu.ents = ivy_bridge; 1977 the_cpu.explain = explain_name_ib; 1978 } 1979 1980 1981 static void 1982 set_haswell(void) 1983 { 1984 strcpy(the_cpu.cputype, "HASWELL PMC"); 1985 the_cpu.number = HASWELL_COUNT; 1986 the_cpu.ents = haswell; 1987 the_cpu.explain = explain_name_has; 1988 } 1989 1990 1991 static void 1992 set_broadwell(void) 1993 { 1994 strcpy(the_cpu.cputype, "HASWELL PMC"); 1995 the_cpu.number = BROADWELL_COUNT; 1996 the_cpu.ents = broadwell; 1997 the_cpu.explain = explain_name_broad; 1998 } 1999 2000 2001 static int 2002 set_expression(const char *name) 2003 { 2004 int found = 0, i; 2005 for(i=0 ; i< the_cpu.number; i++) { 2006 if (strcmp(name, the_cpu.ents[i].name) == 0) { 2007 found = 1; 2008 expression = the_cpu.ents[i].func; 2009 command = the_cpu.ents[i].command; 2010 threshold = the_cpu.ents[i].thresh; 2011 if (the_cpu.ents[i].counters_required > max_pmc_counters) { 2012 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", 2013 the_cpu.ents[i].name, 2014 the_cpu.ents[i].counters_required, max_pmc_counters); 2015 printf("Sorry this test can not be run\n"); 2016 if (run_all == 0) { 2017 exit(-1); 2018 } else { 2019 return(-1); 2020 } 2021 } 2022 break; 2023 } 2024 } 2025 if (!found) { 2026 printf("For CPU type %s we have no expression:%s\n", 2027 the_cpu.cputype, name); 2028 exit(-1); 2029 } 2030 return(0); 2031 } 2032 2033 2034 2035 2036 2037 static int 2038 validate_expression(char *name) 2039 { 2040 int i, found; 2041 2042 found = 0; 2043 for(i=0 ; i< the_cpu.number; i++) { 2044 if (strcmp(name, the_cpu.ents[i].name) == 0) { 2045 found = 1; 2046 break; 2047 } 2048 } 2049 if (!found) { 2050 return(-1); 2051 } 2052 return (0); 2053 } 2054 2055 static void 2056 do_expression(struct counters *cpu, int pos) 2057 { 2058 if (expression == NULL) 2059 return; 2060 (*expression)(cpu, pos); 2061 } 2062 2063 static void 2064 process_header(int idx, char *p) 2065 { 2066 struct counters *up; 2067 int i, len, nlen; 2068 /* 2069 * Given header element idx, at p in 2070 * form 's/NN/nameof' 2071 * process the entry to pull out the name and 2072 * the CPU number. 2073 */ 2074 if (strncmp(p, "s/", 2)) { 2075 printf("Check -- invalid header no s/ in %s\n", 2076 p); 2077 return; 2078 } 2079 up = &cnts[idx]; 2080 up->cpu = strtol(&p[2], NULL, 10); 2081 len = strlen(p); 2082 for (i=2; i<len; i++) { 2083 if (p[i] == '/') { 2084 nlen = strlen(&p[(i+1)]); 2085 if (nlen < (MAX_NLEN-1)) { 2086 strcpy(up->counter_name, &p[(i+1)]); 2087 } else { 2088 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 2089 } 2090 } 2091 } 2092 } 2093 2094 static void 2095 build_counters_from_header(FILE *io) 2096 { 2097 char buffer[8192], *p; 2098 int i, len, cnt; 2099 size_t mlen; 2100 2101 /* We have a new start, lets 2102 * setup our headers and cpus. 2103 */ 2104 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2105 printf("First line can't be read from file err:%d\n", errno); 2106 return; 2107 } 2108 /* 2109 * Ok output is an array of counters. Once 2110 * we start to read the values in we must 2111 * put them in there slot to match there CPU and 2112 * counter being updated. We create a mass array 2113 * of the counters, filling in the CPU and 2114 * counter name. 2115 */ 2116 /* How many do we get? */ 2117 len = strlen(buffer); 2118 for (i=0, cnt=0; i<len; i++) { 2119 if (strncmp(&buffer[i], "s/", 2) == 0) { 2120 cnt++; 2121 for(;i<len;i++) { 2122 if (buffer[i] == ' ') 2123 break; 2124 } 2125 } 2126 } 2127 mlen = sizeof(struct counters) * cnt; 2128 cnts = malloc(mlen); 2129 ncnts = cnt; 2130 if (cnts == NULL) { 2131 printf("No memory err:%d\n", errno); 2132 return; 2133 } 2134 memset(cnts, 0, mlen); 2135 for (i=0, cnt=0; i<len; i++) { 2136 if (strncmp(&buffer[i], "s/", 2) == 0) { 2137 p = &buffer[i]; 2138 for(;i<len;i++) { 2139 if (buffer[i] == ' ') { 2140 buffer[i] = 0; 2141 break; 2142 } 2143 } 2144 process_header(cnt, p); 2145 cnt++; 2146 } 2147 } 2148 if (verbose) 2149 printf("We have %d entries\n", cnt); 2150 } 2151 extern int max_to_collect; 2152 int max_to_collect = MAX_COUNTER_SLOTS; 2153 2154 static int 2155 read_a_line(FILE *io) 2156 { 2157 char buffer[8192], *p, *stop; 2158 int pos, i; 2159 2160 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2161 return(0); 2162 } 2163 p = buffer; 2164 for (i=0; i<ncnts; i++) { 2165 pos = cnts[i].pos; 2166 cnts[i].vals[pos] = strtol(p, &stop, 0); 2167 cnts[i].pos++; 2168 cnts[i].sum += cnts[i].vals[pos]; 2169 p = stop; 2170 } 2171 return (1); 2172 } 2173 2174 extern int cpu_count_out; 2175 int cpu_count_out=0; 2176 2177 static void 2178 print_header(void) 2179 { 2180 int i, cnt, printed_cnt; 2181 2182 printf("*********************************\n"); 2183 for(i=0, cnt=0; i<MAX_CPU; i++) { 2184 if (glob_cpu[i]) { 2185 cnt++; 2186 } 2187 } 2188 cpu_count_out = cnt; 2189 for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 2190 if (glob_cpu[i]) { 2191 printf("CPU%d", i); 2192 printed_cnt++; 2193 } 2194 if (printed_cnt == cnt) { 2195 printf("\n"); 2196 break; 2197 } else { 2198 printf("\t"); 2199 } 2200 } 2201 } 2202 2203 static void 2204 lace_cpus_together(void) 2205 { 2206 int i, j, lace_cpu; 2207 struct counters *cpat, *at; 2208 2209 for(i=0; i<ncnts; i++) { 2210 cpat = &cnts[i]; 2211 if (cpat->next_cpu) { 2212 /* Already laced in */ 2213 continue; 2214 } 2215 lace_cpu = cpat->cpu; 2216 if (lace_cpu >= MAX_CPU) { 2217 printf("CPU %d to big\n", lace_cpu); 2218 continue; 2219 } 2220 if (glob_cpu[lace_cpu] == NULL) { 2221 glob_cpu[lace_cpu] = cpat; 2222 } else { 2223 /* Already processed this cpu */ 2224 continue; 2225 } 2226 /* Ok look forward for cpu->cpu and link in */ 2227 for(j=(i+1); j<ncnts; j++) { 2228 at = &cnts[j]; 2229 if (at->next_cpu) { 2230 continue; 2231 } 2232 if (at->cpu == lace_cpu) { 2233 /* Found one */ 2234 cpat->next_cpu = at; 2235 cpat = at; 2236 } 2237 } 2238 } 2239 } 2240 2241 2242 static void 2243 process_file(char *filename) 2244 { 2245 FILE *io; 2246 int i; 2247 int line_at, not_done; 2248 pid_t pid_of_command=0; 2249 2250 if (filename == NULL) { 2251 io = my_popen(command, "r", &pid_of_command); 2252 if (io == NULL) { 2253 printf("Can't popen the command %s\n", command); 2254 return; 2255 } 2256 } else { 2257 io = fopen(filename, "r"); 2258 if (io == NULL) { 2259 printf("Can't process file %s err:%d\n", 2260 filename, errno); 2261 return; 2262 } 2263 } 2264 build_counters_from_header(io); 2265 if (cnts == NULL) { 2266 /* Nothing we can do */ 2267 printf("Nothing to do -- no counters built\n"); 2268 if (filename) { 2269 fclose(io); 2270 } else { 2271 my_pclose(io, pid_of_command); 2272 } 2273 return; 2274 } 2275 lace_cpus_together(); 2276 print_header(); 2277 if (verbose) { 2278 for (i=0; i<ncnts; i++) { 2279 printf("Counter:%s cpu:%d index:%d\n", 2280 cnts[i].counter_name, 2281 cnts[i].cpu, i); 2282 } 2283 } 2284 line_at = 0; 2285 not_done = 1; 2286 while(not_done) { 2287 if (read_a_line(io)) { 2288 line_at++; 2289 } else { 2290 break; 2291 } 2292 if (line_at >= max_to_collect) { 2293 not_done = 0; 2294 } 2295 if (filename == NULL) { 2296 int cnt; 2297 /* For the ones we dynamically open we print now */ 2298 for(i=0, cnt=0; i<MAX_CPU; i++) { 2299 do_expression(glob_cpu[i], (line_at-1)); 2300 cnt++; 2301 if (cnt == cpu_count_out) { 2302 printf("\n"); 2303 break; 2304 } else { 2305 printf("\t"); 2306 } 2307 } 2308 } 2309 } 2310 if (filename) { 2311 fclose(io); 2312 } else { 2313 my_pclose(io, pid_of_command); 2314 } 2315 } 2316 #if defined(__amd64__) 2317 #define cpuid(in,a,b,c,d)\ 2318 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 2319 2320 static __inline void 2321 do_cpuid(u_int ax, u_int cx, u_int *p) 2322 { 2323 __asm __volatile("cpuid" 2324 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 2325 : "0" (ax), "c" (cx) ); 2326 } 2327 2328 #else 2329 #define cpuid(in, a, b, c, d) 2330 #define do_cpuid(ax, cx, p) 2331 #endif 2332 2333 static void 2334 get_cpuid_set(void) 2335 { 2336 unsigned long eax, ebx, ecx, edx; 2337 int model; 2338 pid_t pid_of_command=0; 2339 size_t sz, len; 2340 FILE *io; 2341 char linebuf[1024], *str; 2342 u_int reg[4]; 2343 2344 eax = ebx = ecx = edx = 0; 2345 2346 cpuid(0, eax, ebx, ecx, edx); 2347 if (ebx == 0x68747541) { 2348 printf("AMD processors are not supported by this program\n"); 2349 printf("Sorry\n"); 2350 exit(0); 2351 } else if (ebx == 0x6972794) { 2352 printf("Cyrix processors are not supported by this program\n"); 2353 printf("Sorry\n"); 2354 exit(0); 2355 } else if (ebx == 0x756e6547) { 2356 printf("Genuine Intel\n"); 2357 } else { 2358 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 2359 exit(0); 2360 } 2361 cpuid(1, eax, ebx, ecx, edx); 2362 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 2363 printf("CPU model is 0x%x id:0x%lx\n", model, eax); 2364 switch (eax & 0xF00) { 2365 case 0x500: /* Pentium family processors */ 2366 printf("Intel Pentium P5\n"); 2367 goto not_supported; 2368 break; 2369 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 2370 switch (model) { 2371 case 0x1: 2372 printf("Intel Pentium P6\n"); 2373 goto not_supported; 2374 break; 2375 case 0x3: 2376 case 0x5: 2377 printf("Intel PII\n"); 2378 goto not_supported; 2379 break; 2380 case 0x6: case 0x16: 2381 printf("Intel CL\n"); 2382 goto not_supported; 2383 break; 2384 case 0x7: case 0x8: case 0xA: case 0xB: 2385 printf("Intel PIII\n"); 2386 goto not_supported; 2387 break; 2388 case 0x9: case 0xD: 2389 printf("Intel PM\n"); 2390 goto not_supported; 2391 break; 2392 case 0xE: 2393 printf("Intel CORE\n"); 2394 goto not_supported; 2395 break; 2396 case 0xF: 2397 printf("Intel CORE2\n"); 2398 goto not_supported; 2399 break; 2400 case 0x17: 2401 printf("Intel CORE2EXTREME\n"); 2402 goto not_supported; 2403 break; 2404 case 0x1C: /* Per Intel document 320047-002. */ 2405 printf("Intel ATOM\n"); 2406 goto not_supported; 2407 break; 2408 case 0x1A: 2409 case 0x1E: /* 2410 * Per Intel document 253669-032 9/2009, 2411 * pages A-2 and A-57 2412 */ 2413 case 0x1F: /* 2414 * Per Intel document 253669-032 9/2009, 2415 * pages A-2 and A-57 2416 */ 2417 printf("Intel COREI7\n"); 2418 goto not_supported; 2419 break; 2420 case 0x2E: 2421 printf("Intel NEHALEM\n"); 2422 goto not_supported; 2423 break; 2424 case 0x25: /* Per Intel document 253669-033US 12/2009. */ 2425 case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 2426 printf("Intel WESTMERE\n"); 2427 goto not_supported; 2428 break; 2429 case 0x2F: /* Westmere-EX, seen in wild */ 2430 printf("Intel WESTMERE\n"); 2431 goto not_supported; 2432 break; 2433 case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 2434 printf("Intel SANDYBRIDGE\n"); 2435 set_sandybridge(); 2436 break; 2437 case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 2438 printf("Intel SANDYBRIDGE_XEON\n"); 2439 set_sandybridge(); 2440 break; 2441 case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 2442 printf("Intel IVYBRIDGE\n"); 2443 set_ivybridge(); 2444 break; 2445 case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 2446 printf("Intel IVYBRIDGE_XEON\n"); 2447 set_ivybridge(); 2448 break; 2449 case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 2450 printf("Intel HASWELL (Xeon)\n"); 2451 set_haswell(); 2452 break; 2453 case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 2454 case 0x45: 2455 case 0x46: 2456 printf("Intel HASWELL\n"); 2457 set_haswell(); 2458 break; 2459 2460 case 0x4e: 2461 case 0x5e: 2462 printf("Intel SKY-LAKE\n"); 2463 goto not_supported; 2464 break; 2465 case 0x3D: 2466 case 0x47: 2467 printf("Intel BROADWELL\n"); 2468 set_broadwell(); 2469 break; 2470 case 0x4f: 2471 case 0x56: 2472 printf("Intel BROADWEL (Xeon)\n"); 2473 set_broadwell(); 2474 break; 2475 2476 case 0x4D: 2477 /* Per Intel document 330061-001 01/2014. */ 2478 printf("Intel ATOM_SILVERMONT\n"); 2479 goto not_supported; 2480 break; 2481 default: 2482 printf("Intel model 0x%x is not known -- sorry\n", 2483 model); 2484 goto not_supported; 2485 break; 2486 } 2487 break; 2488 case 0xF00: /* P4 */ 2489 printf("Intel unknown model %d\n", model); 2490 goto not_supported; 2491 break; 2492 } 2493 do_cpuid(0xa, 0, reg); 2494 max_pmc_counters = (reg[3] & 0x0000000f) + 1; 2495 printf("We have %d PMC counters to work with\n", max_pmc_counters); 2496 /* Ok lets load the list of all known PMC's */ 2497 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2498 if (valid_pmcs == NULL) { 2499 /* Likely */ 2500 pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2501 sz = sizeof(char *) * pmc_allocated_cnt; 2502 valid_pmcs = malloc(sz); 2503 if (valid_pmcs == NULL) { 2504 printf("No memory allocation fails at startup?\n"); 2505 exit(-1); 2506 } 2507 memset(valid_pmcs, 0, sz); 2508 } 2509 2510 while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2511 if (linebuf[0] != '\t') { 2512 /* sometimes headers ;-) */ 2513 continue; 2514 } 2515 len = strlen(linebuf); 2516 if (linebuf[(len-1)] == '\n') { 2517 /* Likely */ 2518 linebuf[(len-1)] = 0; 2519 } 2520 str = &linebuf[1]; 2521 len = strlen(str) + 1; 2522 valid_pmcs[valid_pmc_cnt] = malloc(len); 2523 if (valid_pmcs[valid_pmc_cnt] == NULL) { 2524 printf("No memory2 allocation fails at startup?\n"); 2525 exit(-1); 2526 } 2527 memset(valid_pmcs[valid_pmc_cnt], 0, len); 2528 strcpy(valid_pmcs[valid_pmc_cnt], str); 2529 valid_pmc_cnt++; 2530 if (valid_pmc_cnt >= pmc_allocated_cnt) { 2531 /* Got to expand -- unlikely */ 2532 char **more; 2533 2534 sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2535 more = malloc(sz); 2536 if (more == NULL) { 2537 printf("No memory3 allocation fails at startup?\n"); 2538 exit(-1); 2539 } 2540 memset(more, 0, sz); 2541 memcpy(more, valid_pmcs, sz); 2542 pmc_allocated_cnt *= 2; 2543 free(valid_pmcs); 2544 valid_pmcs = more; 2545 } 2546 } 2547 my_pclose(io, pid_of_command); 2548 return; 2549 not_supported: 2550 printf("Not supported\n"); 2551 exit(-1); 2552 } 2553 2554 static void 2555 explain_all(void) 2556 { 2557 int i; 2558 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2559 printf("-------------------------------------------------------------\n"); 2560 for(i=0; i<the_cpu.number; i++){ 2561 printf("For -e %s ", the_cpu.ents[i].name); 2562 (*the_cpu.explain)(the_cpu.ents[i].name); 2563 printf("----------------------------\n"); 2564 } 2565 } 2566 2567 static void 2568 test_for_a_pmc(const char *pmc, int out_so_far) 2569 { 2570 FILE *io; 2571 pid_t pid_of_command=0; 2572 char my_command[1024]; 2573 char line[1024]; 2574 char resp[1024]; 2575 int len, llen, i; 2576 2577 if (out_so_far < 50) { 2578 len = 50 - out_so_far; 2579 for(i=0; i<len; i++) { 2580 printf(" "); 2581 } 2582 } 2583 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2584 io = my_popen(my_command, "r", &pid_of_command); 2585 if (io == NULL) { 2586 printf("Failed -- popen fails\n"); 2587 return; 2588 } 2589 /* Setup what we expect */ 2590 len = sprintf(resp, "%s", pmc); 2591 if (fgets(line, sizeof(line), io) == NULL) { 2592 printf("Failed -- no output from pmstat\n"); 2593 goto out; 2594 } 2595 llen = strlen(line); 2596 if (line[(llen-1)] == '\n') { 2597 line[(llen-1)] = 0; 2598 llen--; 2599 } 2600 for(i=2; i<(llen-len); i++) { 2601 if (strncmp(&line[i], "ERROR", 5) == 0) { 2602 printf("Failed %s\n", line); 2603 goto out; 2604 } else if (strncmp(&line[i], resp, len) == 0) { 2605 int j, k; 2606 2607 if (fgets(line, sizeof(line), io) == NULL) { 2608 printf("Failed -- no second output from pmstat\n"); 2609 goto out; 2610 } 2611 len = strlen(line); 2612 for (j=0; j<len; j++) { 2613 if (line[j] == ' ') { 2614 j++; 2615 } else { 2616 break; 2617 } 2618 } 2619 printf("Pass"); 2620 len = strlen(&line[j]); 2621 if (len < 20) { 2622 for(k=0; k<(20-len); k++) { 2623 printf(" "); 2624 } 2625 } 2626 if (len) { 2627 printf("%s", &line[j]); 2628 } else { 2629 printf("\n"); 2630 } 2631 goto out; 2632 } 2633 } 2634 printf("Failed -- '%s' not '%s'\n", line, resp); 2635 out: 2636 my_pclose(io, pid_of_command); 2637 2638 } 2639 2640 static int 2641 add_it_to(char **vars, int cur_cnt, char *name) 2642 { 2643 int i; 2644 size_t len; 2645 for(i=0; i<cur_cnt; i++) { 2646 if (strcmp(vars[i], name) == 0) { 2647 /* Already have */ 2648 return(0); 2649 } 2650 } 2651 if (vars[cur_cnt] != NULL) { 2652 printf("Cur_cnt:%d filled with %s??\n", 2653 cur_cnt, vars[cur_cnt]); 2654 exit(-1); 2655 } 2656 /* Ok its new */ 2657 len = strlen(name) + 1; 2658 vars[cur_cnt] = malloc(len); 2659 if (vars[cur_cnt] == NULL) { 2660 printf("No memory %s\n", __FUNCTION__); 2661 exit(-1); 2662 } 2663 memset(vars[cur_cnt], 0, len); 2664 strcpy(vars[cur_cnt], name); 2665 return(1); 2666 } 2667 2668 static char * 2669 build_command_for_exp(struct expression *exp) 2670 { 2671 /* 2672 * Build the pmcstat command to handle 2673 * the passed in expression. 2674 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2675 * where NNN and QQQ represent the PMC's in the expression 2676 * uniquely.. 2677 */ 2678 char forming[1024]; 2679 int cnt_pmc, alloced_pmcs, i; 2680 struct expression *at; 2681 char **vars, *cmd; 2682 size_t mal; 2683 2684 alloced_pmcs = cnt_pmc = 0; 2685 /* first how many do we have */ 2686 at = exp; 2687 while (at) { 2688 if (at->type == TYPE_VALUE_PMC) { 2689 cnt_pmc++; 2690 } 2691 at = at->next; 2692 } 2693 if (cnt_pmc == 0) { 2694 printf("No PMC's in your expression -- nothing to do!!\n"); 2695 exit(0); 2696 } 2697 mal = cnt_pmc * sizeof(char *); 2698 vars = malloc(mal); 2699 if (vars == NULL) { 2700 printf("No memory\n"); 2701 exit(-1); 2702 } 2703 memset(vars, 0, mal); 2704 at = exp; 2705 while (at) { 2706 if (at->type == TYPE_VALUE_PMC) { 2707 if(add_it_to(vars, alloced_pmcs, at->name)) { 2708 alloced_pmcs++; 2709 } 2710 } 2711 at = at->next; 2712 } 2713 /* Now we have a unique list in vars so create our command */ 2714 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2715 for(i=0; i<alloced_pmcs; i++) { 2716 mal += strlen(vars[i]) + 4; /* var + " -s " */ 2717 } 2718 cmd = malloc((mal+2)); 2719 if (cmd == NULL) { 2720 printf("%s out of mem\n", __FUNCTION__); 2721 exit(-1); 2722 } 2723 memset(cmd, 0, (mal+2)); 2724 strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2725 at = exp; 2726 for(i=0; i<alloced_pmcs; i++) { 2727 sprintf(forming, " -s %s", vars[i]); 2728 strcat(cmd, forming); 2729 free(vars[i]); 2730 vars[i] = NULL; 2731 } 2732 free(vars); 2733 return(cmd); 2734 } 2735 2736 static int 2737 user_expr(struct counters *cpu, int pos) 2738 { 2739 int ret; 2740 double res; 2741 struct counters *var; 2742 struct expression *at; 2743 2744 at = master_exp; 2745 while (at) { 2746 if (at->type == TYPE_VALUE_PMC) { 2747 var = find_counter(cpu, at->name); 2748 if (var == NULL) { 2749 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2750 exit(-1); 2751 } 2752 if (pos != -1) { 2753 at->value = var->vals[pos] * 1.0; 2754 } else { 2755 at->value = var->sum * 1.0; 2756 } 2757 } 2758 at = at->next; 2759 } 2760 res = run_expr(master_exp, 1, NULL); 2761 ret = printf("%1.3f", res); 2762 return(ret); 2763 } 2764 2765 2766 static void 2767 set_manual_exp(struct expression *exp) 2768 { 2769 expression = user_expr; 2770 command = build_command_for_exp(exp); 2771 threshold = "User defined threshold"; 2772 } 2773 2774 static void 2775 run_tests(void) 2776 { 2777 int i, lenout; 2778 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2779 printf("------------------------------------------------------------------------\n"); 2780 for(i=0; i<valid_pmc_cnt; i++) { 2781 lenout = printf("%s", valid_pmcs[i]); 2782 fflush(stdout); 2783 test_for_a_pmc(valid_pmcs[i], lenout); 2784 } 2785 } 2786 static void 2787 list_all(void) 2788 { 2789 int i, cnt, j; 2790 printf("PMC Abbreviation\n"); 2791 printf("--------------------------------------------------------------\n"); 2792 for(i=0; i<valid_pmc_cnt; i++) { 2793 cnt = printf("%s", valid_pmcs[i]); 2794 for(j=cnt; j<52; j++) { 2795 printf(" "); 2796 } 2797 printf("%%%d\n", i); 2798 } 2799 } 2800 2801 2802 int 2803 main(int argc, char **argv) 2804 { 2805 int i, j, cnt; 2806 char *filename=NULL; 2807 const char *name=NULL; 2808 int help_only = 0; 2809 int test_mode = 0; 2810 int test_at = 0; 2811 2812 get_cpuid_set(); 2813 memset(glob_cpu, 0, sizeof(glob_cpu)); 2814 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) { 2815 switch (i) { 2816 case 'A': 2817 run_all = 1; 2818 break; 2819 case 'L': 2820 list_all(); 2821 return(0); 2822 case 'H': 2823 printf("**********************************\n"); 2824 explain_all(); 2825 printf("**********************************\n"); 2826 return(0); 2827 break; 2828 case 'T': 2829 test_mode = 1; 2830 break; 2831 case 'E': 2832 master_exp = parse_expression(optarg); 2833 if (master_exp) { 2834 set_manual_exp(master_exp); 2835 } 2836 break; 2837 case 'e': 2838 if (validate_expression(optarg)) { 2839 printf("Unknown expression %s\n", optarg); 2840 return(0); 2841 } 2842 name = optarg; 2843 set_expression(optarg); 2844 break; 2845 case 'm': 2846 max_to_collect = strtol(optarg, NULL, 0); 2847 if (max_to_collect > MAX_COUNTER_SLOTS) { 2848 /* You can't collect more than max in array */ 2849 max_to_collect = MAX_COUNTER_SLOTS; 2850 } 2851 break; 2852 case 'v': 2853 verbose++; 2854 break; 2855 case 'h': 2856 help_only = 1; 2857 break; 2858 case 'i': 2859 filename = optarg; 2860 break; 2861 case '?': 2862 default: 2863 use: 2864 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2865 argv[0]); 2866 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2867 printf("-v -- verbose dump debug type things -- you don't want this\n"); 2868 printf("-m N -- maximum to collect is N measurements\n"); 2869 printf("-e expr-name -- Do expression expr-name\n"); 2870 printf("-E 'your expression' -- Do your expression\n"); 2871 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2872 printf("-H -- Don't run anything, just explain all canned expressions\n"); 2873 printf("-T -- Test all PMC's defined by this processor\n"); 2874 printf("-A -- Run all canned tests\n"); 2875 return(0); 2876 break; 2877 } 2878 } 2879 if ((run_all == 0) && (name == NULL) && (filename == NULL) && 2880 (test_mode == 0) && (master_exp == NULL)) { 2881 printf("Without setting an expression we cannot dynamically gather information\n"); 2882 printf("you must supply a filename (and you probably want verbosity)\n"); 2883 goto use; 2884 } 2885 if (run_all && max_to_collect > 10) { 2886 max_to_collect = 3; 2887 } 2888 if (test_mode) { 2889 run_tests(); 2890 return(0); 2891 } 2892 printf("*********************************\n"); 2893 if ((master_exp == NULL) && name) { 2894 (*the_cpu.explain)(name); 2895 } else if (master_exp) { 2896 printf("Examine your expression "); 2897 print_exp(master_exp); 2898 printf("User defined threshold\n"); 2899 } 2900 if (help_only) { 2901 return(0); 2902 } 2903 if (run_all) { 2904 more: 2905 name = the_cpu.ents[test_at].name; 2906 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); 2907 test_at++; 2908 if (set_expression(name) == -1) { 2909 if (test_at >= the_cpu.number) { 2910 goto done; 2911 } else 2912 goto more; 2913 } 2914 2915 } 2916 process_file(filename); 2917 if (verbose >= 2) { 2918 for (i=0; i<ncnts; i++) { 2919 printf("Counter:%s cpu:%d index:%d\n", 2920 cnts[i].counter_name, 2921 cnts[i].cpu, i); 2922 for(j=0; j<cnts[i].pos; j++) { 2923 printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2924 } 2925 printf(" sum - %ld\n", (long int)cnts[i].sum); 2926 } 2927 } 2928 if (expression == NULL) { 2929 return(0); 2930 } 2931 if (max_to_collect > 1) { 2932 for(i=0, cnt=0; i<MAX_CPU; i++) { 2933 if (glob_cpu[i]) { 2934 do_expression(glob_cpu[i], -1); 2935 cnt++; 2936 if (cnt == cpu_count_out) { 2937 printf("\n"); 2938 break; 2939 } else { 2940 printf("\t"); 2941 } 2942 } 2943 } 2944 } 2945 if (run_all && (test_at < the_cpu.number)) { 2946 memset(glob_cpu, 0, sizeof(glob_cpu)); 2947 ncnts = 0; 2948 printf("*********************************\n"); 2949 goto more; 2950 } else if (run_all) { 2951 done: 2952 printf("*********************************\n"); 2953 } 2954 return(0); 2955 } 2956