1 /*- 2 * Copyright (c) 2014, 2015 Netflix Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include <sys/types.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <unistd.h> 32 #include <string.h> 33 #include <strings.h> 34 #include <sys/errno.h> 35 #include <signal.h> 36 #include <sys/wait.h> 37 #include <getopt.h> 38 #include "eval_expr.h" 39 __FBSDID("$FreeBSD$"); 40 41 static int max_pmc_counters = 1; 42 static int run_all = 0; 43 44 #define MAX_COUNTER_SLOTS 1024 45 #define MAX_NLEN 64 46 #define MAX_CPU 64 47 static int verbose = 0; 48 49 extern char **environ; 50 extern struct expression *master_exp; 51 struct expression *master_exp=NULL; 52 53 #define PMC_INITIAL_ALLOC 512 54 extern char **valid_pmcs; 55 char **valid_pmcs = NULL; 56 extern int valid_pmc_cnt; 57 int valid_pmc_cnt=0; 58 extern int pmc_allocated_cnt; 59 int pmc_allocated_cnt=0; 60 61 /* 62 * The following two varients on popen and pclose with 63 * the cavet that they get you the PID so that you 64 * can supply it to pclose so it can send a SIGTERM 65 * to the process. 66 */ 67 static FILE * 68 my_popen(const char *command, const char *dir, pid_t *p_pid) 69 { 70 FILE *io_out, *io_in; 71 int pdesin[2], pdesout[2]; 72 char *argv[4]; 73 pid_t pid; 74 char cmd[4]; 75 char cmd2[1024]; 76 char arg1[4]; 77 78 if ((strcmp(dir, "r") != 0) && 79 (strcmp(dir, "w") != 0)) { 80 errno = EINVAL; 81 return(NULL); 82 } 83 if (pipe(pdesin) < 0) 84 return (NULL); 85 86 if (pipe(pdesout) < 0) { 87 (void)close(pdesin[0]); 88 (void)close(pdesin[1]); 89 return (NULL); 90 } 91 strcpy(cmd, "sh"); 92 strcpy(arg1, "-c"); 93 strcpy(cmd2, command); 94 argv[0] = cmd; 95 argv[1] = arg1; 96 argv[2] = cmd2; 97 argv[3] = NULL; 98 99 switch (pid = fork()) { 100 case -1: /* Error. */ 101 (void)close(pdesin[0]); 102 (void)close(pdesin[1]); 103 (void)close(pdesout[0]); 104 (void)close(pdesout[1]); 105 return (NULL); 106 /* NOTREACHED */ 107 case 0: /* Child. */ 108 /* Close out un-used sides */ 109 (void)close(pdesin[1]); 110 (void)close(pdesout[0]); 111 /* Now prepare the stdin of the process */ 112 close(0); 113 (void)dup(pdesin[0]); 114 (void)close(pdesin[0]); 115 /* Now prepare the stdout of the process */ 116 close(1); 117 (void)dup(pdesout[1]); 118 /* And lets do stderr just in case */ 119 close(2); 120 (void)dup(pdesout[1]); 121 (void)close(pdesout[1]); 122 /* Now run it */ 123 execve("/bin/sh", argv, environ); 124 exit(127); 125 /* NOTREACHED */ 126 } 127 /* Parent; assume fdopen can't fail. */ 128 /* Store the pid */ 129 *p_pid = pid; 130 if (strcmp(dir, "r") != 0) { 131 io_out = fdopen(pdesin[1], "w"); 132 (void)close(pdesin[0]); 133 (void)close(pdesout[0]); 134 (void)close(pdesout[1]); 135 return(io_out); 136 } else { 137 /* Prepare the input stream */ 138 io_in = fdopen(pdesout[0], "r"); 139 (void)close(pdesout[1]); 140 (void)close(pdesin[0]); 141 (void)close(pdesin[1]); 142 return (io_in); 143 } 144 } 145 146 /* 147 * pclose -- 148 * Pclose returns -1 if stream is not associated with a `popened' command, 149 * if already `pclosed', or waitpid returns an error. 150 */ 151 static void 152 my_pclose(FILE *io, pid_t the_pid) 153 { 154 int pstat; 155 pid_t pid; 156 157 /* 158 * Find the appropriate file pointer and remove it from the list. 159 */ 160 (void)fclose(io); 161 /* Die if you are not dead! */ 162 kill(the_pid, SIGTERM); 163 do { 164 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 165 } while (pid == -1 && errno == EINTR); 166 } 167 168 struct counters { 169 struct counters *next_cpu; 170 char counter_name[MAX_NLEN]; /* Name of counter */ 171 int cpu; /* CPU we are on */ 172 int pos; /* Index we are filling to. */ 173 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 174 uint64_t sum; /* Summary of entries */ 175 }; 176 177 extern struct counters *glob_cpu[MAX_CPU]; 178 struct counters *glob_cpu[MAX_CPU]; 179 180 extern struct counters *cnts; 181 struct counters *cnts=NULL; 182 183 extern int ncnts; 184 int ncnts=0; 185 186 extern int (*expression)(struct counters *, int); 187 int (*expression)(struct counters *, int); 188 189 static const char *threshold=NULL; 190 static const char *command; 191 192 struct cpu_entry { 193 const char *name; 194 const char *thresh; 195 const char *command; 196 int (*func)(struct counters *, int); 197 int counters_required; 198 }; 199 200 struct cpu_type { 201 char cputype[32]; 202 int number; 203 struct cpu_entry *ents; 204 void (*explain)(const char *name); 205 }; 206 extern struct cpu_type the_cpu; 207 struct cpu_type the_cpu; 208 209 static void 210 explain_name_sb(const char *name) 211 { 212 const char *mythresh; 213 if (strcmp(name, "allocstall1") == 0) { 214 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 215 mythresh = "thresh > .05"; 216 } else if (strcmp(name, "allocstall2") == 0) { 217 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 218 mythresh = "thresh > .05"; 219 } else if (strcmp(name, "br_miss") == 0) { 220 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 221 mythresh = "thresh >= .2"; 222 } else if (strcmp(name, "splitload") == 0) { 223 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 224 mythresh = "thresh >= .1"; 225 } else if (strcmp(name, "splitstore") == 0) { 226 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 227 mythresh = "thresh >= .01"; 228 } else if (strcmp(name, "contested") == 0) { 229 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 230 mythresh = "thresh >= .05"; 231 } else if (strcmp(name, "blockstorefwd") == 0) { 232 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 233 mythresh = "thresh >= .05"; 234 } else if (strcmp(name, "cache2") == 0) { 235 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 236 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 237 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 238 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 239 mythresh = "thresh >= .2"; 240 } else if (strcmp(name, "cache1") == 0) { 241 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 242 mythresh = "thresh >= .2"; 243 } else if (strcmp(name, "dtlbmissload") == 0) { 244 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 245 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 246 mythresh = "thresh >= .1"; 247 } else if (strcmp(name, "frontendstall") == 0) { 248 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 249 mythresh = "thresh >= .15"; 250 } else if (strcmp(name, "clears") == 0) { 251 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 252 printf(" MACHINE_CLEARS.SMC + \n"); 253 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 254 mythresh = "thresh >= .02"; 255 } else if (strcmp(name, "microassist") == 0) { 256 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 257 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 258 mythresh = "thresh >= .05"; 259 } else if (strcmp(name, "aliasing_4k") == 0) { 260 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 261 mythresh = "thresh >= .1"; 262 } else if (strcmp(name, "fpassist") == 0) { 263 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 264 mythresh = "look for a excessive value"; 265 } else if (strcmp(name, "otherassistavx") == 0) { 266 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267 mythresh = "look for a excessive value"; 268 } else if (strcmp(name, "otherassistsse") == 0) { 269 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 270 mythresh = "look for a excessive value"; 271 } else if (strcmp(name, "eff1") == 0) { 272 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 273 mythresh = "thresh < .9"; 274 } else if (strcmp(name, "eff2") == 0) { 275 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 276 mythresh = "thresh > 1.0"; 277 } else if (strcmp(name, "dtlbmissstore") == 0) { 278 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 279 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 280 mythresh = "thresh >= .05"; 281 } else { 282 printf("Unknown name:%s\n", name); 283 mythresh = "unknown entry"; 284 } 285 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 286 } 287 288 static void 289 explain_name_ib(const char *name) 290 { 291 const char *mythresh; 292 if (strcmp(name, "br_miss") == 0) { 293 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 294 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 295 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 296 mythresh = "thresh >= .2"; 297 } else if (strcmp(name, "eff1") == 0) { 298 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 299 mythresh = "thresh < .9"; 300 } else if (strcmp(name, "eff2") == 0) { 301 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 302 mythresh = "thresh > 1.0"; 303 } else if (strcmp(name, "cache1") == 0) { 304 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 305 mythresh = "thresh >= .2"; 306 } else if (strcmp(name, "cache2") == 0) { 307 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 308 mythresh = "thresh >= .2"; 309 } else if (strcmp(name, "itlbmiss") == 0) { 310 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 311 mythresh = "thresh > .05"; 312 } else if (strcmp(name, "icachemiss") == 0) { 313 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 314 mythresh = "thresh > .05"; 315 } else if (strcmp(name, "lcpstall") == 0) { 316 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 317 mythresh = "thresh > .05"; 318 } else if (strcmp(name, "datashare") == 0) { 319 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 320 mythresh = "thresh > .05"; 321 } else if (strcmp(name, "blockstorefwd") == 0) { 322 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 323 mythresh = "thresh >= .05"; 324 } else if (strcmp(name, "splitload") == 0) { 325 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 326 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 327 mythresh = "thresh >= .1"; 328 } else if (strcmp(name, "splitstore") == 0) { 329 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 330 mythresh = "thresh >= .01"; 331 } else if (strcmp(name, "aliasing_4k") == 0) { 332 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 333 mythresh = "thresh >= .1"; 334 } else if (strcmp(name, "dtlbmissload") == 0) { 335 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 336 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 337 mythresh = "thresh >= .1"; 338 } else if (strcmp(name, "dtlbmissstore") == 0) { 339 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 340 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 341 mythresh = "thresh >= .05"; 342 } else if (strcmp(name, "contested") == 0) { 343 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 344 mythresh = "thresh >= .05"; 345 } else if (strcmp(name, "clears") == 0) { 346 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 347 printf(" MACHINE_CLEARS.SMC + \n"); 348 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 349 mythresh = "thresh >= .02"; 350 } else if (strcmp(name, "microassist") == 0) { 351 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 352 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 353 mythresh = "thresh >= .05"; 354 } else if (strcmp(name, "fpassist") == 0) { 355 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 356 mythresh = "look for a excessive value"; 357 } else if (strcmp(name, "otherassistavx") == 0) { 358 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359 mythresh = "look for a excessive value"; 360 } else if (strcmp(name, "otherassistsse") == 0) { 361 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 362 mythresh = "look for a excessive value"; 363 } else { 364 printf("Unknown name:%s\n", name); 365 mythresh = "unknown entry"; 366 } 367 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 368 } 369 370 371 static void 372 explain_name_has(const char *name) 373 { 374 const char *mythresh; 375 if (strcmp(name, "eff1") == 0) { 376 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 377 mythresh = "thresh < .75"; 378 } else if (strcmp(name, "eff2") == 0) { 379 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 380 mythresh = "thresh > 1.0"; 381 } else if (strcmp(name, "itlbmiss") == 0) { 382 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 383 mythresh = "thresh > .05"; 384 } else if (strcmp(name, "icachemiss") == 0) { 385 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 386 mythresh = "thresh > .05"; 387 } else if (strcmp(name, "lcpstall") == 0) { 388 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 389 mythresh = "thresh > .05"; 390 } else if (strcmp(name, "cache1") == 0) { 391 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 392 mythresh = "thresh >= .2"; 393 } else if (strcmp(name, "cache2") == 0) { 394 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 395 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 396 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 397 printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 398 mythresh = "thresh >= .2"; 399 } else if (strcmp(name, "contested") == 0) { 400 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 401 mythresh = "thresh >= .05"; 402 } else if (strcmp(name, "datashare") == 0) { 403 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 404 mythresh = "thresh > .05"; 405 } else if (strcmp(name, "blockstorefwd") == 0) { 406 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 407 mythresh = "thresh >= .05"; 408 } else if (strcmp(name, "splitload") == 0) { 409 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 410 mythresh = "thresh >= .1"; 411 } else if (strcmp(name, "splitstore") == 0) { 412 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 413 mythresh = "thresh >= .01"; 414 } else if (strcmp(name, "aliasing_4k") == 0) { 415 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 416 mythresh = "thresh >= .1"; 417 } else if (strcmp(name, "dtlbmissload") == 0) { 418 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 419 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 420 mythresh = "thresh >= .1"; 421 } else if (strcmp(name, "br_miss") == 0) { 422 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 423 mythresh = "thresh >= .2"; 424 } else if (strcmp(name, "clears") == 0) { 425 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 426 printf(" MACHINE_CLEARS.SMC + \n"); 427 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 428 mythresh = "thresh >= .02"; 429 } else if (strcmp(name, "microassist") == 0) { 430 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 431 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 432 mythresh = "thresh >= .05"; 433 } else if (strcmp(name, "fpassist") == 0) { 434 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 435 mythresh = "look for a excessive value"; 436 } else if (strcmp(name, "otherassistavx") == 0) { 437 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438 mythresh = "look for a excessive value"; 439 } else if (strcmp(name, "otherassistsse") == 0) { 440 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 441 mythresh = "look for a excessive value"; 442 } else { 443 printf("Unknown name:%s\n", name); 444 mythresh = "unknown entry"; 445 } 446 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 447 } 448 449 450 451 static struct counters * 452 find_counter(struct counters *base, const char *name) 453 { 454 struct counters *at; 455 int len; 456 457 at = base; 458 len = strlen(name); 459 while(at) { 460 if (strncmp(at->counter_name, name, len) == 0) { 461 return(at); 462 } 463 at = at->next_cpu; 464 } 465 printf("Can't find counter %s\n", name); 466 printf("We have:\n"); 467 at = base; 468 while(at) { 469 printf("- %s\n", at->counter_name); 470 at = at->next_cpu; 471 } 472 exit(-1); 473 } 474 475 static int 476 allocstall1(struct counters *cpu, int pos) 477 { 478 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 479 int ret; 480 struct counters *partial; 481 struct counters *unhalt; 482 double un, par, res; 483 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 484 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 485 if (pos != -1) { 486 par = partial->vals[pos] * 1.0; 487 un = unhalt->vals[pos] * 1.0; 488 } else { 489 par = partial->sum * 1.0; 490 un = unhalt->sum * 1.0; 491 } 492 res = par/un; 493 ret = printf("%1.3f", res); 494 return(ret); 495 } 496 497 static int 498 allocstall2(struct counters *cpu, int pos) 499 { 500 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 501 int ret; 502 struct counters *partial; 503 struct counters *unhalt; 504 double un, par, res; 505 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 506 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 507 if (pos != -1) { 508 par = partial->vals[pos] * 1.0; 509 un = unhalt->vals[pos] * 1.0; 510 } else { 511 par = partial->sum * 1.0; 512 un = unhalt->sum * 1.0; 513 } 514 res = par/un; 515 ret = printf("%1.3f", res); 516 return(ret); 517 } 518 519 static int 520 br_mispredict(struct counters *cpu, int pos) 521 { 522 struct counters *brctr; 523 struct counters *unhalt; 524 int ret; 525 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 526 double br, un, con, res; 527 con = 20.0; 528 529 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 530 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 531 if (pos != -1) { 532 br = brctr->vals[pos] * 1.0; 533 un = unhalt->vals[pos] * 1.0; 534 } else { 535 br = brctr->sum * 1.0; 536 un = unhalt->sum * 1.0; 537 } 538 res = (con * br)/un; 539 ret = printf("%1.3f", res); 540 return(ret); 541 } 542 543 static int 544 br_mispredictib(struct counters *cpu, int pos) 545 { 546 struct counters *brctr; 547 struct counters *unhalt; 548 struct counters *clear, *clear2, *clear3; 549 struct counters *uops; 550 struct counters *recv; 551 struct counters *iss; 552 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 553 int ret; 554 /* 555 * (BR_MISP_RETIRED.ALL_BRANCHES / 556 * (BR_MISP_RETIRED.ALL_BRANCHES + 557 * MACHINE_CLEAR.COUNT) * 558 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 559 * 560 */ 561 double br, cl, cl2, cl3, uo, re, un, con, res, is; 562 con = 4.0; 563 564 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 565 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 566 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 567 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 568 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 569 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 570 iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 571 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 572 if (pos != -1) { 573 br = brctr->vals[pos] * 1.0; 574 cl = clear->vals[pos] * 1.0; 575 cl2 = clear2->vals[pos] * 1.0; 576 cl3 = clear3->vals[pos] * 1.0; 577 uo = uops->vals[pos] * 1.0; 578 re = recv->vals[pos] * 1.0; 579 is = iss->vals[pos] * 1.0; 580 un = unhalt->vals[pos] * 1.0; 581 } else { 582 br = brctr->sum * 1.0; 583 cl = clear->sum * 1.0; 584 cl2 = clear2->sum * 1.0; 585 cl3 = clear3->sum * 1.0; 586 uo = uops->sum * 1.0; 587 re = recv->sum * 1.0; 588 is = iss->sum * 1.0; 589 un = unhalt->sum * 1.0; 590 } 591 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 592 ret = printf("%1.3f", res); 593 return(ret); 594 } 595 596 597 static int 598 br_mispredict_broad(struct counters *cpu, int pos) 599 { 600 struct counters *brctr; 601 struct counters *unhalt; 602 struct counters *clear; 603 struct counters *uops; 604 struct counters *uops_ret; 605 struct counters *recv; 606 int ret; 607 double br, cl, uo, uo_r, re, con, un, res; 608 609 con = 4.0; 610 611 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 612 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 613 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 614 uops = find_counter(cpu, "UOPS_ISSUED.ANY"); 615 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 616 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 617 618 if (pos != -1) { 619 un = unhalt->vals[pos] * 1.0; 620 br = brctr->vals[pos] * 1.0; 621 cl = clear->vals[pos] * 1.0; 622 uo = uops->vals[pos] * 1.0; 623 uo_r = uops_ret->vals[pos] * 1.0; 624 re = recv->vals[pos] * 1.0; 625 } else { 626 un = unhalt->sum * 1.0; 627 br = brctr->sum * 1.0; 628 cl = clear->sum * 1.0; 629 uo = uops->sum * 1.0; 630 uo_r = uops_ret->sum * 1.0; 631 re = recv->sum * 1.0; 632 } 633 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); 634 ret = printf("%1.3f", res); 635 return(ret); 636 } 637 638 static int 639 splitloadib(struct counters *cpu, int pos) 640 { 641 int ret; 642 struct counters *mem; 643 struct counters *l1d, *ldblock; 644 struct counters *unhalt; 645 double un, memd, res, l1, ldb; 646 /* 647 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 648 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 649 */ 650 651 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 652 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 653 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 654 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 655 if (pos != -1) { 656 memd = mem->vals[pos] * 1.0; 657 l1 = l1d->vals[pos] * 1.0; 658 ldb = ldblock->vals[pos] * 1.0; 659 un = unhalt->vals[pos] * 1.0; 660 } else { 661 memd = mem->sum * 1.0; 662 l1 = l1d->sum * 1.0; 663 ldb = ldblock->sum * 1.0; 664 un = unhalt->sum * 1.0; 665 } 666 res = ((l1 / memd) * ldb)/un; 667 ret = printf("%1.3f", res); 668 return(ret); 669 } 670 671 672 static int 673 splitload(struct counters *cpu, int pos) 674 { 675 int ret; 676 struct counters *mem; 677 struct counters *unhalt; 678 double con, un, memd, res; 679 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 680 681 con = 5.0; 682 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 683 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); 684 if (pos != -1) { 685 memd = mem->vals[pos] * 1.0; 686 un = unhalt->vals[pos] * 1.0; 687 } else { 688 memd = mem->sum * 1.0; 689 un = unhalt->sum * 1.0; 690 } 691 res = (memd * con)/un; 692 ret = printf("%1.3f", res); 693 return(ret); 694 } 695 696 697 static int 698 splitload_sb(struct counters *cpu, int pos) 699 { 700 int ret; 701 struct counters *mem; 702 struct counters *unhalt; 703 double con, un, memd, res; 704 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 705 706 con = 5.0; 707 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 708 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 709 if (pos != -1) { 710 memd = mem->vals[pos] * 1.0; 711 un = unhalt->vals[pos] * 1.0; 712 } else { 713 memd = mem->sum * 1.0; 714 un = unhalt->sum * 1.0; 715 } 716 res = (memd * con)/un; 717 ret = printf("%1.3f", res); 718 return(ret); 719 } 720 721 722 static int 723 splitstore_sb(struct counters *cpu, int pos) 724 { 725 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 726 int ret; 727 struct counters *mem_split; 728 struct counters *mem_stores; 729 double memsplit, memstore, res; 730 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 731 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 732 if (pos != -1) { 733 memsplit = mem_split->vals[pos] * 1.0; 734 memstore = mem_stores->vals[pos] * 1.0; 735 } else { 736 memsplit = mem_split->sum * 1.0; 737 memstore = mem_stores->sum * 1.0; 738 } 739 res = memsplit/memstore; 740 ret = printf("%1.3f", res); 741 return(ret); 742 } 743 744 745 746 static int 747 splitstore(struct counters *cpu, int pos) 748 { 749 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ 750 int ret; 751 struct counters *mem_split; 752 struct counters *mem_stores; 753 double memsplit, memstore, res; 754 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); 755 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); 756 if (pos != -1) { 757 memsplit = mem_split->vals[pos] * 1.0; 758 memstore = mem_stores->vals[pos] * 1.0; 759 } else { 760 memsplit = mem_split->sum * 1.0; 761 memstore = mem_stores->sum * 1.0; 762 } 763 res = memsplit/memstore; 764 ret = printf("%1.3f", res); 765 return(ret); 766 } 767 768 769 static int 770 contested(struct counters *cpu, int pos) 771 { 772 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 773 int ret; 774 struct counters *mem; 775 struct counters *unhalt; 776 double con, un, memd, res; 777 778 con = 60.0; 779 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 780 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 781 if (pos != -1) { 782 memd = mem->vals[pos] * 1.0; 783 un = unhalt->vals[pos] * 1.0; 784 } else { 785 memd = mem->sum * 1.0; 786 un = unhalt->sum * 1.0; 787 } 788 res = (memd * con)/un; 789 ret = printf("%1.3f", res); 790 return(ret); 791 } 792 793 static int 794 contested_has(struct counters *cpu, int pos) 795 { 796 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 797 int ret; 798 struct counters *mem; 799 struct counters *unhalt; 800 double con, un, memd, res; 801 802 con = 84.0; 803 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 804 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 805 if (pos != -1) { 806 memd = mem->vals[pos] * 1.0; 807 un = unhalt->vals[pos] * 1.0; 808 } else { 809 memd = mem->sum * 1.0; 810 un = unhalt->sum * 1.0; 811 } 812 res = (memd * con)/un; 813 ret = printf("%1.3f", res); 814 return(ret); 815 } 816 817 static int 818 contestedbroad(struct counters *cpu, int pos) 819 { 820 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 821 int ret; 822 struct counters *mem; 823 struct counters *mem2; 824 struct counters *unhalt; 825 double con, un, memd, memtoo, res; 826 827 con = 84.0; 828 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 829 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 830 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); 831 832 if (pos != -1) { 833 memd = mem->vals[pos] * 1.0; 834 memtoo = mem2->vals[pos] * 1.0; 835 un = unhalt->vals[pos] * 1.0; 836 } else { 837 memd = mem->sum * 1.0; 838 memtoo = mem2->sum * 1.0; 839 un = unhalt->sum * 1.0; 840 } 841 res = ((memd * con) + memtoo)/un; 842 ret = printf("%1.3f", res); 843 return(ret); 844 } 845 846 847 static int 848 blockstoreforward(struct counters *cpu, int pos) 849 { 850 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 851 int ret; 852 struct counters *ldb; 853 struct counters *unhalt; 854 double con, un, ld, res; 855 856 con = 13.0; 857 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 858 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 859 if (pos != -1) { 860 ld = ldb->vals[pos] * 1.0; 861 un = unhalt->vals[pos] * 1.0; 862 } else { 863 ld = ldb->sum * 1.0; 864 un = unhalt->sum * 1.0; 865 } 866 res = (ld * con)/un; 867 ret = printf("%1.3f", res); 868 return(ret); 869 } 870 871 static int 872 cache2(struct counters *cpu, int pos) 873 { 874 /* ** Suspect *** 875 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 876 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 877 */ 878 int ret; 879 struct counters *mem1, *mem2, *mem3; 880 struct counters *unhalt; 881 double con1, con2, con3, un, me_1, me_2, me_3, res; 882 883 con1 = 26.0; 884 con2 = 43.0; 885 con3 = 60.0; 886 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 887 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 888 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 889 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 890 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 891 if (pos != -1) { 892 me_1 = mem1->vals[pos] * 1.0; 893 me_2 = mem2->vals[pos] * 1.0; 894 me_3 = mem3->vals[pos] * 1.0; 895 un = unhalt->vals[pos] * 1.0; 896 } else { 897 me_1 = mem1->sum * 1.0; 898 me_2 = mem2->sum * 1.0; 899 me_3 = mem3->sum * 1.0; 900 un = unhalt->sum * 1.0; 901 } 902 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 903 ret = printf("%1.3f", res); 904 return(ret); 905 } 906 907 static int 908 datasharing(struct counters *cpu, int pos) 909 { 910 /* 911 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 912 */ 913 int ret; 914 struct counters *mem; 915 struct counters *unhalt; 916 double con, res, me, un; 917 918 con = 43.0; 919 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 920 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 921 if (pos != -1) { 922 me = mem->vals[pos] * 1.0; 923 un = unhalt->vals[pos] * 1.0; 924 } else { 925 me = mem->sum * 1.0; 926 un = unhalt->sum * 1.0; 927 } 928 res = (me * con)/un; 929 ret = printf("%1.3f", res); 930 return(ret); 931 932 } 933 934 935 static int 936 datasharing_has(struct counters *cpu, int pos) 937 { 938 /* 939 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 940 */ 941 int ret; 942 struct counters *mem; 943 struct counters *unhalt; 944 double con, res, me, un; 945 946 con = 72.0; 947 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 948 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 949 if (pos != -1) { 950 me = mem->vals[pos] * 1.0; 951 un = unhalt->vals[pos] * 1.0; 952 } else { 953 me = mem->sum * 1.0; 954 un = unhalt->sum * 1.0; 955 } 956 res = (me * con)/un; 957 ret = printf("%1.3f", res); 958 return(ret); 959 960 } 961 962 963 static int 964 cache2ib(struct counters *cpu, int pos) 965 { 966 /* 967 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 968 */ 969 int ret; 970 struct counters *mem; 971 struct counters *unhalt; 972 double con, un, me, res; 973 974 con = 29.0; 975 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 976 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 977 if (pos != -1) { 978 me = mem->vals[pos] * 1.0; 979 un = unhalt->vals[pos] * 1.0; 980 } else { 981 me = mem->sum * 1.0; 982 un = unhalt->sum * 1.0; 983 } 984 res = (con * me)/un; 985 ret = printf("%1.3f", res); 986 return(ret); 987 } 988 989 static int 990 cache2has(struct counters *cpu, int pos) 991 { 992 /* 993 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 994 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 995 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 996 * / CPU_CLK_UNHALTED.THREAD_P 997 */ 998 int ret; 999 struct counters *mem1, *mem2, *mem3; 1000 struct counters *unhalt; 1001 double con1, con2, con3, un, me1, me2, me3, res; 1002 1003 con1 = 36.0; 1004 con2 = 72.0; 1005 con3 = 84.0; 1006 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1007 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 1008 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 1009 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 1010 if (pos != -1) { 1011 me1 = mem1->vals[pos] * 1.0; 1012 me2 = mem2->vals[pos] * 1.0; 1013 me3 = mem3->vals[pos] * 1.0; 1014 un = unhalt->vals[pos] * 1.0; 1015 } else { 1016 me1 = mem1->sum * 1.0; 1017 me2 = mem2->sum * 1.0; 1018 me3 = mem3->sum * 1.0; 1019 un = unhalt->sum * 1.0; 1020 } 1021 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 1022 ret = printf("%1.3f", res); 1023 return(ret); 1024 } 1025 1026 1027 static int 1028 cache2broad(struct counters *cpu, int pos) 1029 { 1030 /* 1031 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 1032 */ 1033 int ret; 1034 struct counters *mem; 1035 struct counters *unhalt; 1036 double con, un, me, res; 1037 1038 con = 36.0; 1039 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1040 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); 1041 if (pos != -1) { 1042 me = mem->vals[pos] * 1.0; 1043 un = unhalt->vals[pos] * 1.0; 1044 } else { 1045 me = mem->sum * 1.0; 1046 un = unhalt->sum * 1.0; 1047 } 1048 res = (con * me)/un; 1049 ret = printf("%1.3f", res); 1050 return(ret); 1051 } 1052 1053 1054 static int 1055 cache1(struct counters *cpu, int pos) 1056 { 1057 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1058 int ret; 1059 struct counters *mem; 1060 struct counters *unhalt; 1061 double con, un, me, res; 1062 1063 con = 180.0; 1064 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1065 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 1066 if (pos != -1) { 1067 me = mem->vals[pos] * 1.0; 1068 un = unhalt->vals[pos] * 1.0; 1069 } else { 1070 me = mem->sum * 1.0; 1071 un = unhalt->sum * 1.0; 1072 } 1073 res = (me * con)/un; 1074 ret = printf("%1.3f", res); 1075 return(ret); 1076 } 1077 1078 static int 1079 cache1ib(struct counters *cpu, int pos) 1080 { 1081 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1082 int ret; 1083 struct counters *mem; 1084 struct counters *unhalt; 1085 double con, un, me, res; 1086 1087 con = 180.0; 1088 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1089 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 1090 if (pos != -1) { 1091 me = mem->vals[pos] * 1.0; 1092 un = unhalt->vals[pos] * 1.0; 1093 } else { 1094 me = mem->sum * 1.0; 1095 un = unhalt->sum * 1.0; 1096 } 1097 res = (me * con)/un; 1098 ret = printf("%1.3f", res); 1099 return(ret); 1100 } 1101 1102 1103 static int 1104 cache1broad(struct counters *cpu, int pos) 1105 { 1106 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1107 int ret; 1108 struct counters *mem; 1109 struct counters *unhalt; 1110 double con, un, me, res; 1111 1112 con = 180.0; 1113 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1114 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); 1115 if (pos != -1) { 1116 me = mem->vals[pos] * 1.0; 1117 un = unhalt->vals[pos] * 1.0; 1118 } else { 1119 me = mem->sum * 1.0; 1120 un = unhalt->sum * 1.0; 1121 } 1122 res = (me * con)/un; 1123 ret = printf("%1.3f", res); 1124 return(ret); 1125 } 1126 1127 1128 static int 1129 dtlb_missload(struct counters *cpu, int pos) 1130 { 1131 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 1132 int ret; 1133 struct counters *dtlb_m, *dtlb_d; 1134 struct counters *unhalt; 1135 double con, un, d1, d2, res; 1136 1137 con = 7.0; 1138 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1139 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 1140 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 1141 if (pos != -1) { 1142 d1 = dtlb_m->vals[pos] * 1.0; 1143 d2 = dtlb_d->vals[pos] * 1.0; 1144 un = unhalt->vals[pos] * 1.0; 1145 } else { 1146 d1 = dtlb_m->sum * 1.0; 1147 d2 = dtlb_d->sum * 1.0; 1148 un = unhalt->sum * 1.0; 1149 } 1150 res = ((d1 * con) + d2)/un; 1151 ret = printf("%1.3f", res); 1152 return(ret); 1153 } 1154 1155 static int 1156 dtlb_missstore(struct counters *cpu, int pos) 1157 { 1158 /* 1159 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 1160 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 1161 */ 1162 int ret; 1163 struct counters *dtsb_m, *dtsb_d; 1164 struct counters *unhalt; 1165 double con, un, d1, d2, res; 1166 1167 con = 7.0; 1168 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1169 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 1170 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 1171 if (pos != -1) { 1172 d1 = dtsb_m->vals[pos] * 1.0; 1173 d2 = dtsb_d->vals[pos] * 1.0; 1174 un = unhalt->vals[pos] * 1.0; 1175 } else { 1176 d1 = dtsb_m->sum * 1.0; 1177 d2 = dtsb_d->sum * 1.0; 1178 un = unhalt->sum * 1.0; 1179 } 1180 res = ((d1 * con) + d2)/un; 1181 ret = printf("%1.3f", res); 1182 return(ret); 1183 } 1184 1185 static int 1186 itlb_miss(struct counters *cpu, int pos) 1187 { 1188 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1189 int ret; 1190 struct counters *itlb; 1191 struct counters *unhalt; 1192 double un, d1, res; 1193 1194 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1195 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1196 if (pos != -1) { 1197 d1 = itlb->vals[pos] * 1.0; 1198 un = unhalt->vals[pos] * 1.0; 1199 } else { 1200 d1 = itlb->sum * 1.0; 1201 un = unhalt->sum * 1.0; 1202 } 1203 res = d1/un; 1204 ret = printf("%1.3f", res); 1205 return(ret); 1206 } 1207 1208 1209 static int 1210 itlb_miss_broad(struct counters *cpu, int pos) 1211 { 1212 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ 1213 int ret; 1214 struct counters *itlb; 1215 struct counters *unhalt; 1216 struct counters *four_k; 1217 double un, d1, res, k; 1218 1219 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1220 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1221 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); 1222 if (pos != -1) { 1223 d1 = itlb->vals[pos] * 1.0; 1224 un = unhalt->vals[pos] * 1.0; 1225 k = four_k->vals[pos] * 1.0; 1226 } else { 1227 d1 = itlb->sum * 1.0; 1228 un = unhalt->sum * 1.0; 1229 k = four_k->sum * 1.0; 1230 } 1231 res = (7.0 * k + d1)/un; 1232 ret = printf("%1.3f", res); 1233 return(ret); 1234 } 1235 1236 1237 static int 1238 icache_miss(struct counters *cpu, int pos) 1239 { 1240 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1241 1242 int ret; 1243 struct counters *itlb, *icache; 1244 struct counters *unhalt; 1245 double un, d1, ic, res; 1246 1247 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1248 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1249 icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1250 if (pos != -1) { 1251 d1 = itlb->vals[pos] * 1.0; 1252 ic = icache->vals[pos] * 1.0; 1253 un = unhalt->vals[pos] * 1.0; 1254 } else { 1255 d1 = itlb->sum * 1.0; 1256 ic = icache->sum * 1.0; 1257 un = unhalt->sum * 1.0; 1258 } 1259 res = (ic-d1)/un; 1260 ret = printf("%1.3f", res); 1261 return(ret); 1262 1263 } 1264 1265 static int 1266 icache_miss_has(struct counters *cpu, int pos) 1267 { 1268 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1269 1270 int ret; 1271 struct counters *icache; 1272 struct counters *unhalt; 1273 double un, con, ic, res; 1274 1275 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1276 icache = find_counter(cpu, "ICACHE.MISSES"); 1277 con = 36.0; 1278 if (pos != -1) { 1279 ic = icache->vals[pos] * 1.0; 1280 un = unhalt->vals[pos] * 1.0; 1281 } else { 1282 ic = icache->sum * 1.0; 1283 un = unhalt->sum * 1.0; 1284 } 1285 res = (con * ic)/un; 1286 ret = printf("%1.3f", res); 1287 return(ret); 1288 1289 } 1290 1291 static int 1292 lcp_stall(struct counters *cpu, int pos) 1293 { 1294 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1295 int ret; 1296 struct counters *ild; 1297 struct counters *unhalt; 1298 double un, d1, res; 1299 1300 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1301 ild = find_counter(cpu, "ILD_STALL.LCP"); 1302 if (pos != -1) { 1303 d1 = ild->vals[pos] * 1.0; 1304 un = unhalt->vals[pos] * 1.0; 1305 } else { 1306 d1 = ild->sum * 1.0; 1307 un = unhalt->sum * 1.0; 1308 } 1309 res = d1/un; 1310 ret = printf("%1.3f", res); 1311 return(ret); 1312 1313 } 1314 1315 1316 static int 1317 frontendstall(struct counters *cpu, int pos) 1318 { 1319 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1320 int ret; 1321 struct counters *idq; 1322 struct counters *unhalt; 1323 double con, un, id, res; 1324 1325 con = 4.0; 1326 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1327 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1328 if (pos != -1) { 1329 id = idq->vals[pos] * 1.0; 1330 un = unhalt->vals[pos] * 1.0; 1331 } else { 1332 id = idq->sum * 1.0; 1333 un = unhalt->sum * 1.0; 1334 } 1335 res = id/(un * con); 1336 ret = printf("%1.3f", res); 1337 return(ret); 1338 } 1339 1340 static int 1341 clears(struct counters *cpu, int pos) 1342 { 1343 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1344 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1345 1346 int ret; 1347 struct counters *clr1, *clr2, *clr3; 1348 struct counters *unhalt; 1349 double con, un, cl1, cl2, cl3, res; 1350 1351 con = 100.0; 1352 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1353 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1354 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1355 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1356 1357 if (pos != -1) { 1358 cl1 = clr1->vals[pos] * 1.0; 1359 cl2 = clr2->vals[pos] * 1.0; 1360 cl3 = clr3->vals[pos] * 1.0; 1361 un = unhalt->vals[pos] * 1.0; 1362 } else { 1363 cl1 = clr1->sum * 1.0; 1364 cl2 = clr2->sum * 1.0; 1365 cl3 = clr3->sum * 1.0; 1366 un = unhalt->sum * 1.0; 1367 } 1368 res = ((cl1 + cl2 + cl3) * con)/un; 1369 ret = printf("%1.3f", res); 1370 return(ret); 1371 } 1372 1373 1374 1375 static int 1376 clears_broad(struct counters *cpu, int pos) 1377 { 1378 int ret; 1379 struct counters *clr1, *clr2, *clr3, *cyc; 1380 struct counters *unhalt; 1381 double con, un, cl1, cl2, cl3, cy, res; 1382 1383 con = 100.0; 1384 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1385 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1386 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1387 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1388 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 1389 if (pos != -1) { 1390 cl1 = clr1->vals[pos] * 1.0; 1391 cl2 = clr2->vals[pos] * 1.0; 1392 cl3 = clr3->vals[pos] * 1.0; 1393 cy = cyc->vals[pos] * 1.0; 1394 un = unhalt->vals[pos] * 1.0; 1395 } else { 1396 cl1 = clr1->sum * 1.0; 1397 cl2 = clr2->sum * 1.0; 1398 cl3 = clr3->sum * 1.0; 1399 cy = cyc->sum * 1.0; 1400 un = unhalt->sum * 1.0; 1401 } 1402 /* Formula not listed but extrapulated to add the cy ?? */ 1403 res = ((cl1 + cl2 + cl3 + cy) * con)/un; 1404 ret = printf("%1.3f", res); 1405 return(ret); 1406 } 1407 1408 1409 1410 1411 1412 static int 1413 microassist(struct counters *cpu, int pos) 1414 { 1415 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1416 int ret; 1417 struct counters *idq; 1418 struct counters *unhalt; 1419 double un, id, res, con; 1420 1421 con = 4.0; 1422 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1423 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1424 if (pos != -1) { 1425 id = idq->vals[pos] * 1.0; 1426 un = unhalt->vals[pos] * 1.0; 1427 } else { 1428 id = idq->sum * 1.0; 1429 un = unhalt->sum * 1.0; 1430 } 1431 res = id/(un * con); 1432 ret = printf("%1.3f", res); 1433 return(ret); 1434 } 1435 1436 1437 static int 1438 microassist_broad(struct counters *cpu, int pos) 1439 { 1440 int ret; 1441 struct counters *idq; 1442 struct counters *unhalt; 1443 struct counters *uopiss; 1444 struct counters *uopret; 1445 double un, id, res, con, uoi, uor; 1446 1447 con = 4.0; 1448 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1449 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1450 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); 1451 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1452 if (pos != -1) { 1453 id = idq->vals[pos] * 1.0; 1454 un = unhalt->vals[pos] * 1.0; 1455 uoi = uopiss->vals[pos] * 1.0; 1456 uor = uopret->vals[pos] * 1.0; 1457 } else { 1458 id = idq->sum * 1.0; 1459 un = unhalt->sum * 1.0; 1460 uoi = uopiss->sum * 1.0; 1461 uor = uopret->sum * 1.0; 1462 } 1463 res = (uor/uoi) * (id/(un * con)); 1464 ret = printf("%1.3f", res); 1465 return(ret); 1466 } 1467 1468 1469 static int 1470 aliasing(struct counters *cpu, int pos) 1471 { 1472 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1473 int ret; 1474 struct counters *ld; 1475 struct counters *unhalt; 1476 double un, lds, con, res; 1477 1478 con = 5.0; 1479 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1480 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1481 if (pos != -1) { 1482 lds = ld->vals[pos] * 1.0; 1483 un = unhalt->vals[pos] * 1.0; 1484 } else { 1485 lds = ld->sum * 1.0; 1486 un = unhalt->sum * 1.0; 1487 } 1488 res = (lds * con)/un; 1489 ret = printf("%1.3f", res); 1490 return(ret); 1491 } 1492 1493 static int 1494 aliasing_broad(struct counters *cpu, int pos) 1495 { 1496 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1497 int ret; 1498 struct counters *ld; 1499 struct counters *unhalt; 1500 double un, lds, con, res; 1501 1502 con = 7.0; 1503 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1504 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1505 if (pos != -1) { 1506 lds = ld->vals[pos] * 1.0; 1507 un = unhalt->vals[pos] * 1.0; 1508 } else { 1509 lds = ld->sum * 1.0; 1510 un = unhalt->sum * 1.0; 1511 } 1512 res = (lds * con)/un; 1513 ret = printf("%1.3f", res); 1514 return(ret); 1515 } 1516 1517 1518 static int 1519 fpassists(struct counters *cpu, int pos) 1520 { 1521 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1522 int ret; 1523 struct counters *fp; 1524 struct counters *inst; 1525 double un, fpd, res; 1526 1527 inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1528 fp = find_counter(cpu, "FP_ASSIST.ANY"); 1529 if (pos != -1) { 1530 fpd = fp->vals[pos] * 1.0; 1531 un = inst->vals[pos] * 1.0; 1532 } else { 1533 fpd = fp->sum * 1.0; 1534 un = inst->sum * 1.0; 1535 } 1536 res = fpd/un; 1537 ret = printf("%1.3f", res); 1538 return(ret); 1539 } 1540 1541 static int 1542 otherassistavx(struct counters *cpu, int pos) 1543 { 1544 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1545 int ret; 1546 struct counters *oth; 1547 struct counters *unhalt; 1548 double un, ot, con, res; 1549 1550 con = 75.0; 1551 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1552 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1553 if (pos != -1) { 1554 ot = oth->vals[pos] * 1.0; 1555 un = unhalt->vals[pos] * 1.0; 1556 } else { 1557 ot = oth->sum * 1.0; 1558 un = unhalt->sum * 1.0; 1559 } 1560 res = (ot * con)/un; 1561 ret = printf("%1.3f", res); 1562 return(ret); 1563 } 1564 1565 static int 1566 otherassistsse(struct counters *cpu, int pos) 1567 { 1568 1569 int ret; 1570 struct counters *oth; 1571 struct counters *unhalt; 1572 double un, ot, con, res; 1573 1574 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1575 con = 75.0; 1576 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1577 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1578 if (pos != -1) { 1579 ot = oth->vals[pos] * 1.0; 1580 un = unhalt->vals[pos] * 1.0; 1581 } else { 1582 ot = oth->sum * 1.0; 1583 un = unhalt->sum * 1.0; 1584 } 1585 res = (ot * con)/un; 1586 ret = printf("%1.3f", res); 1587 return(ret); 1588 } 1589 1590 static int 1591 efficiency1(struct counters *cpu, int pos) 1592 { 1593 1594 int ret; 1595 struct counters *uops; 1596 struct counters *unhalt; 1597 double un, ot, con, res; 1598 1599 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1600 con = 4.0; 1601 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1602 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1603 if (pos != -1) { 1604 ot = uops->vals[pos] * 1.0; 1605 un = unhalt->vals[pos] * 1.0; 1606 } else { 1607 ot = uops->sum * 1.0; 1608 un = unhalt->sum * 1.0; 1609 } 1610 res = ot/(con * un); 1611 ret = printf("%1.3f", res); 1612 return(ret); 1613 } 1614 1615 static int 1616 efficiency2(struct counters *cpu, int pos) 1617 { 1618 1619 int ret; 1620 struct counters *uops; 1621 struct counters *unhalt; 1622 double un, ot, res; 1623 1624 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1625 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1626 uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1627 if (pos != -1) { 1628 ot = uops->vals[pos] * 1.0; 1629 un = unhalt->vals[pos] * 1.0; 1630 } else { 1631 ot = uops->sum * 1.0; 1632 un = unhalt->sum * 1.0; 1633 } 1634 res = un/ot; 1635 ret = printf("%1.3f", res); 1636 return(ret); 1637 } 1638 1639 #define SANDY_BRIDGE_COUNT 20 1640 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1641 /*01*/ { "allocstall1", "thresh > .05", 1642 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1643 allocstall1, 2 }, 1644 /* -- not defined for SB right (partial-rat_stalls) 02*/ 1645 { "allocstall2", "thresh > .05", 1646 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", 1647 allocstall2, 2 }, 1648 /*03*/ { "br_miss", "thresh >= .2", 1649 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1650 br_mispredict, 2 }, 1651 /*04*/ { "splitload", "thresh >= .1", 1652 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1653 splitload_sb, 2 }, 1654 /* 05*/ { "splitstore", "thresh >= .01", 1655 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1656 splitstore_sb, 2 }, 1657 /*06*/ { "contested", "thresh >= .05", 1658 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1659 contested, 2 }, 1660 /*07*/ { "blockstorefwd", "thresh >= .05", 1661 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1662 blockstoreforward, 2 }, 1663 /*08*/ { "cache2", "thresh >= .2", 1664 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1665 cache2, 4 }, 1666 /*09*/ { "cache1", "thresh >= .2", 1667 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1668 cache1, 2 }, 1669 /*10*/ { "dtlbmissload", "thresh >= .1", 1670 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1671 dtlb_missload, 3 }, 1672 /*11*/ { "dtlbmissstore", "thresh >= .05", 1673 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1674 dtlb_missstore, 3 }, 1675 /*12*/ { "frontendstall", "thresh >= .15", 1676 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1677 frontendstall, 2 }, 1678 /*13*/ { "clears", "thresh >= .02", 1679 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1680 clears, 4 }, 1681 /*14*/ { "microassist", "thresh >= .05", 1682 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1683 microassist, 2 }, 1684 /*15*/ { "aliasing_4k", "thresh >= .1", 1685 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1686 aliasing, 2 }, 1687 /*16*/ { "fpassist", "look for a excessive value", 1688 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1689 fpassists, 2 }, 1690 /*17*/ { "otherassistavx", "look for a excessive value", 1691 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1692 otherassistavx, 2}, 1693 /*18*/ { "otherassistsse", "look for a excessive value", 1694 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1695 otherassistsse, 2 }, 1696 /*19*/ { "eff1", "thresh < .9", 1697 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1698 efficiency1, 2 }, 1699 /*20*/ { "eff2", "thresh > 1.0", 1700 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1701 efficiency2, 2 }, 1702 }; 1703 1704 1705 #define IVY_BRIDGE_COUNT 21 1706 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1707 /*1*/ { "eff1", "thresh < .75", 1708 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1709 efficiency1, 2 }, 1710 /*2*/ { "eff2", "thresh > 1.0", 1711 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1712 efficiency2, 2 }, 1713 /*3*/ { "itlbmiss", "thresh > .05", 1714 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1715 itlb_miss, 2 }, 1716 /*4*/ { "icachemiss", "thresh > .05", 1717 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1718 icache_miss, 3 }, 1719 /*5*/ { "lcpstall", "thresh > .05", 1720 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1721 lcp_stall, 2 }, 1722 /*6*/ { "cache1", "thresh >= .2", 1723 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1724 cache1ib, 2 }, 1725 /*7*/ { "cache2", "thresh >= .2", 1726 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1727 cache2ib, 2 }, 1728 /*8*/ { "contested", "thresh >= .05", 1729 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1730 contested, 2 }, 1731 /*9*/ { "datashare", "thresh >= .05", 1732 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1733 datasharing, 2 }, 1734 /*10*/ { "blockstorefwd", "thresh >= .05", 1735 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1736 blockstoreforward, 2 }, 1737 /*11*/ { "splitload", "thresh >= .1", 1738 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1739 splitloadib, 4 }, 1740 /*12*/ { "splitstore", "thresh >= .01", 1741 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1742 splitstore, 2 }, 1743 /*13*/ { "aliasing_4k", "thresh >= .1", 1744 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1745 aliasing, 2 }, 1746 /*14*/ { "dtlbmissload", "thresh >= .1", 1747 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1748 dtlb_missload , 3}, 1749 /*15*/ { "dtlbmissstore", "thresh >= .05", 1750 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1751 dtlb_missstore, 3 }, 1752 /*16*/ { "br_miss", "thresh >= .2", 1753 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1754 br_mispredictib, 8 }, 1755 /*17*/ { "clears", "thresh >= .02", 1756 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1757 clears, 4 }, 1758 /*18*/ { "microassist", "thresh >= .05", 1759 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1760 microassist, 2 }, 1761 /*19*/ { "fpassist", "look for a excessive value", 1762 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1763 fpassists, 2 }, 1764 /*20*/ { "otherassistavx", "look for a excessive value", 1765 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1766 otherassistavx , 2}, 1767 /*21*/ { "otherassistsse", "look for a excessive value", 1768 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1769 otherassistsse, 2 }, 1770 }; 1771 1772 #define HASWELL_COUNT 20 1773 static struct cpu_entry haswell[HASWELL_COUNT] = { 1774 /*1*/ { "eff1", "thresh < .75", 1775 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1776 efficiency1, 2 }, 1777 /*2*/ { "eff2", "thresh > 1.0", 1778 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1779 efficiency2, 2 }, 1780 /*3*/ { "itlbmiss", "thresh > .05", 1781 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1782 itlb_miss, 2 }, 1783 /*4*/ { "icachemiss", "thresh > .05", 1784 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1785 icache_miss_has, 2 }, 1786 /*5*/ { "lcpstall", "thresh > .05", 1787 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1788 lcp_stall, 2 }, 1789 /*6*/ { "cache1", "thresh >= .2", 1790 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1791 cache1ib, 2 }, 1792 /*7*/ { "cache2", "thresh >= .2", 1793 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1794 cache2has, 4 }, 1795 /*8*/ { "contested", "thresh >= .05", 1796 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1797 contested_has, 2 }, 1798 /*9*/ { "datashare", "thresh >= .05", 1799 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1800 datasharing_has, 2 }, 1801 /*10*/ { "blockstorefwd", "thresh >= .05", 1802 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1803 blockstoreforward, 2 }, 1804 /*11*/ { "splitload", "thresh >= .1", 1805 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", 1806 splitload , 2}, 1807 /*12*/ { "splitstore", "thresh >= .01", 1808 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1809 splitstore, 2 }, 1810 /*13*/ { "aliasing_4k", "thresh >= .1", 1811 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1812 aliasing, 2 }, 1813 /*14*/ { "dtlbmissload", "thresh >= .1", 1814 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1815 dtlb_missload, 3 }, 1816 /*15*/ { "br_miss", "thresh >= .2", 1817 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1818 br_mispredict, 2 }, 1819 /*16*/ { "clears", "thresh >= .02", 1820 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1821 clears, 4 }, 1822 /*17*/ { "microassist", "thresh >= .05", 1823 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1824 microassist, 2 }, 1825 /*18*/ { "fpassist", "look for a excessive value", 1826 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1827 fpassists, 2 }, 1828 /*19*/ { "otherassistavx", "look for a excessive value", 1829 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1830 otherassistavx, 2 }, 1831 /*20*/ { "otherassistsse", "look for a excessive value", 1832 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1833 otherassistsse, 2 }, 1834 }; 1835 1836 1837 static void 1838 explain_name_broad(const char *name) 1839 { 1840 const char *mythresh; 1841 if (strcmp(name, "eff1") == 0) { 1842 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 1843 mythresh = "thresh < .75"; 1844 } else if (strcmp(name, "eff2") == 0) { 1845 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 1846 mythresh = "thresh > 1.0"; 1847 } else if (strcmp(name, "itlbmiss") == 0) { 1848 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1849 mythresh = "thresh > .05"; 1850 } else if (strcmp(name, "icachemiss") == 0) { 1851 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); 1852 mythresh = "thresh > .05"; 1853 } else if (strcmp(name, "lcpstall") == 0) { 1854 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 1855 mythresh = "thresh > .05"; 1856 } else if (strcmp(name, "cache1") == 0) { 1857 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 1858 mythresh = "thresh >= .1"; 1859 } else if (strcmp(name, "cache2") == 0) { 1860 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); 1861 mythresh = "thresh >= .2"; 1862 } else if (strcmp(name, "contested") == 0) { 1863 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1864 mythresh = "thresh >= .05"; 1865 } else if (strcmp(name, "datashare") == 0) { 1866 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 1867 mythresh = "thresh > .05"; 1868 } else if (strcmp(name, "blockstorefwd") == 0) { 1869 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 1870 mythresh = "thresh >= .05"; 1871 } else if (strcmp(name, "aliasing_4k") == 0) { 1872 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); 1873 mythresh = "thresh >= .1"; 1874 } else if (strcmp(name, "dtlbmissload") == 0) { 1875 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 1876 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 1877 mythresh = "thresh >= .1"; 1878 1879 } else if (strcmp(name, "br_miss") == 0) { 1880 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); 1881 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); 1882 printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); 1883 mythresh = "thresh >= .2"; 1884 } else if (strcmp(name, "clears") == 0) { 1885 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 1886 printf(" MACHINE_CLEARS.SMC + \n"); 1887 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 1888 mythresh = "thresh >= .02"; 1889 } else if (strcmp(name, "fpassist") == 0) { 1890 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 1891 mythresh = "look for a excessive value"; 1892 } else if (strcmp(name, "otherassistavx") == 0) { 1893 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 1894 mythresh = "look for a excessive value"; 1895 } else if (strcmp(name, "microassist") == 0) { 1896 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 1897 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 1898 mythresh = "thresh >= .05"; 1899 } else { 1900 printf("Unknown name:%s\n", name); 1901 mythresh = "unknown entry"; 1902 } 1903 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 1904 } 1905 1906 1907 #define BROADWELL_COUNT 17 1908 static struct cpu_entry broadwell[BROADWELL_COUNT] = { 1909 /*1*/ { "eff1", "thresh < .75", 1910 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1911 efficiency1, 2 }, 1912 /*2*/ { "eff2", "thresh > 1.0", 1913 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1914 efficiency2, 2 }, 1915 /*3*/ { "itlbmiss", "thresh > .05", 1916 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", 1917 itlb_miss_broad, 3 }, 1918 /*4*/ { "icachemiss", "thresh > .05", 1919 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1920 icache_miss_has, 2 }, 1921 /*5*/ { "lcpstall", "thresh > .05", 1922 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1923 lcp_stall, 2 }, 1924 /*6*/ { "cache1", "thresh >= .1", 1925 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1926 cache1broad, 2 }, 1927 /*7*/ { "cache2", "thresh >= .2", 1928 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1929 cache2broad, 2 }, 1930 /*8*/ { "contested", "thresh >= .05", 1931 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", 1932 contestedbroad, 2 }, 1933 /*9*/ { "datashare", "thresh >= .05", 1934 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1935 datasharing_has, 2 }, 1936 /*10*/ { "blockstorefwd", "thresh >= .05", 1937 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1938 blockstoreforward, 2 }, 1939 /*11*/ { "aliasing_4k", "thresh >= .1", 1940 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1941 aliasing_broad, 2 }, 1942 /*12*/ { "dtlbmissload", "thresh >= .1", 1943 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1944 dtlb_missload, 3 }, 1945 /*13*/ { "br_miss", "thresh >= .2", 1946 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1947 br_mispredict_broad, 7 }, 1948 /*14*/ { "clears", "thresh >= .02", 1949 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1950 clears_broad, 5 }, 1951 /*15*/ { "fpassist", "look for a excessive value", 1952 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1953 fpassists, 2 }, 1954 /*16*/ { "otherassistavx", "look for a excessive value", 1955 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1956 otherassistavx, 2 }, 1957 /*17*/ { "microassist", "thresh >= .2", 1958 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", 1959 microassist_broad, 4 }, 1960 }; 1961 1962 1963 static void 1964 set_sandybridge(void) 1965 { 1966 strcpy(the_cpu.cputype, "SandyBridge PMC"); 1967 the_cpu.number = SANDY_BRIDGE_COUNT; 1968 the_cpu.ents = sandy_bridge; 1969 the_cpu.explain = explain_name_sb; 1970 } 1971 1972 static void 1973 set_ivybridge(void) 1974 { 1975 strcpy(the_cpu.cputype, "IvyBridge PMC"); 1976 the_cpu.number = IVY_BRIDGE_COUNT; 1977 the_cpu.ents = ivy_bridge; 1978 the_cpu.explain = explain_name_ib; 1979 } 1980 1981 1982 static void 1983 set_haswell(void) 1984 { 1985 strcpy(the_cpu.cputype, "HASWELL PMC"); 1986 the_cpu.number = HASWELL_COUNT; 1987 the_cpu.ents = haswell; 1988 the_cpu.explain = explain_name_has; 1989 } 1990 1991 1992 static void 1993 set_broadwell(void) 1994 { 1995 strcpy(the_cpu.cputype, "HASWELL PMC"); 1996 the_cpu.number = BROADWELL_COUNT; 1997 the_cpu.ents = broadwell; 1998 the_cpu.explain = explain_name_broad; 1999 } 2000 2001 2002 static int 2003 set_expression(const char *name) 2004 { 2005 int found = 0, i; 2006 for(i=0 ; i< the_cpu.number; i++) { 2007 if (strcmp(name, the_cpu.ents[i].name) == 0) { 2008 found = 1; 2009 expression = the_cpu.ents[i].func; 2010 command = the_cpu.ents[i].command; 2011 threshold = the_cpu.ents[i].thresh; 2012 if (the_cpu.ents[i].counters_required > max_pmc_counters) { 2013 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", 2014 the_cpu.ents[i].name, 2015 the_cpu.ents[i].counters_required, max_pmc_counters); 2016 printf("Sorry this test can not be run\n"); 2017 if (run_all == 0) { 2018 exit(-1); 2019 } else { 2020 return(-1); 2021 } 2022 } 2023 break; 2024 } 2025 } 2026 if (!found) { 2027 printf("For CPU type %s we have no expression:%s\n", 2028 the_cpu.cputype, name); 2029 exit(-1); 2030 } 2031 return(0); 2032 } 2033 2034 2035 2036 2037 2038 static int 2039 validate_expression(char *name) 2040 { 2041 int i, found; 2042 2043 found = 0; 2044 for(i=0 ; i< the_cpu.number; i++) { 2045 if (strcmp(name, the_cpu.ents[i].name) == 0) { 2046 found = 1; 2047 break; 2048 } 2049 } 2050 if (!found) { 2051 return(-1); 2052 } 2053 return (0); 2054 } 2055 2056 static void 2057 do_expression(struct counters *cpu, int pos) 2058 { 2059 if (expression == NULL) 2060 return; 2061 (*expression)(cpu, pos); 2062 } 2063 2064 static void 2065 process_header(int idx, char *p) 2066 { 2067 struct counters *up; 2068 int i, len, nlen; 2069 /* 2070 * Given header element idx, at p in 2071 * form 's/NN/nameof' 2072 * process the entry to pull out the name and 2073 * the CPU number. 2074 */ 2075 if (strncmp(p, "s/", 2)) { 2076 printf("Check -- invalid header no s/ in %s\n", 2077 p); 2078 return; 2079 } 2080 up = &cnts[idx]; 2081 up->cpu = strtol(&p[2], NULL, 10); 2082 len = strlen(p); 2083 for (i=2; i<len; i++) { 2084 if (p[i] == '/') { 2085 nlen = strlen(&p[(i+1)]); 2086 if (nlen < (MAX_NLEN-1)) { 2087 strcpy(up->counter_name, &p[(i+1)]); 2088 } else { 2089 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 2090 } 2091 } 2092 } 2093 } 2094 2095 static void 2096 build_counters_from_header(FILE *io) 2097 { 2098 char buffer[8192], *p; 2099 int i, len, cnt; 2100 size_t mlen; 2101 2102 /* We have a new start, lets 2103 * setup our headers and cpus. 2104 */ 2105 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2106 printf("First line can't be read from file err:%d\n", errno); 2107 return; 2108 } 2109 /* 2110 * Ok output is an array of counters. Once 2111 * we start to read the values in we must 2112 * put them in there slot to match there CPU and 2113 * counter being updated. We create a mass array 2114 * of the counters, filling in the CPU and 2115 * counter name. 2116 */ 2117 /* How many do we get? */ 2118 len = strlen(buffer); 2119 for (i=0, cnt=0; i<len; i++) { 2120 if (strncmp(&buffer[i], "s/", 2) == 0) { 2121 cnt++; 2122 for(;i<len;i++) { 2123 if (buffer[i] == ' ') 2124 break; 2125 } 2126 } 2127 } 2128 mlen = sizeof(struct counters) * cnt; 2129 cnts = malloc(mlen); 2130 ncnts = cnt; 2131 if (cnts == NULL) { 2132 printf("No memory err:%d\n", errno); 2133 return; 2134 } 2135 memset(cnts, 0, mlen); 2136 for (i=0, cnt=0; i<len; i++) { 2137 if (strncmp(&buffer[i], "s/", 2) == 0) { 2138 p = &buffer[i]; 2139 for(;i<len;i++) { 2140 if (buffer[i] == ' ') { 2141 buffer[i] = 0; 2142 break; 2143 } 2144 } 2145 process_header(cnt, p); 2146 cnt++; 2147 } 2148 } 2149 if (verbose) 2150 printf("We have %d entries\n", cnt); 2151 } 2152 extern int max_to_collect; 2153 int max_to_collect = MAX_COUNTER_SLOTS; 2154 2155 static int 2156 read_a_line(FILE *io) 2157 { 2158 char buffer[8192], *p, *stop; 2159 int pos, i; 2160 2161 if (fgets(buffer, sizeof(buffer), io) == NULL) { 2162 return(0); 2163 } 2164 p = buffer; 2165 for (i=0; i<ncnts; i++) { 2166 pos = cnts[i].pos; 2167 cnts[i].vals[pos] = strtol(p, &stop, 0); 2168 cnts[i].pos++; 2169 cnts[i].sum += cnts[i].vals[pos]; 2170 p = stop; 2171 } 2172 return (1); 2173 } 2174 2175 extern int cpu_count_out; 2176 int cpu_count_out=0; 2177 2178 static void 2179 print_header(void) 2180 { 2181 int i, cnt, printed_cnt; 2182 2183 printf("*********************************\n"); 2184 for(i=0, cnt=0; i<MAX_CPU; i++) { 2185 if (glob_cpu[i]) { 2186 cnt++; 2187 } 2188 } 2189 cpu_count_out = cnt; 2190 for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 2191 if (glob_cpu[i]) { 2192 printf("CPU%d", i); 2193 printed_cnt++; 2194 } 2195 if (printed_cnt == cnt) { 2196 printf("\n"); 2197 break; 2198 } else { 2199 printf("\t"); 2200 } 2201 } 2202 } 2203 2204 static void 2205 lace_cpus_together(void) 2206 { 2207 int i, j, lace_cpu; 2208 struct counters *cpat, *at; 2209 2210 for(i=0; i<ncnts; i++) { 2211 cpat = &cnts[i]; 2212 if (cpat->next_cpu) { 2213 /* Already laced in */ 2214 continue; 2215 } 2216 lace_cpu = cpat->cpu; 2217 if (lace_cpu >= MAX_CPU) { 2218 printf("CPU %d to big\n", lace_cpu); 2219 continue; 2220 } 2221 if (glob_cpu[lace_cpu] == NULL) { 2222 glob_cpu[lace_cpu] = cpat; 2223 } else { 2224 /* Already processed this cpu */ 2225 continue; 2226 } 2227 /* Ok look forward for cpu->cpu and link in */ 2228 for(j=(i+1); j<ncnts; j++) { 2229 at = &cnts[j]; 2230 if (at->next_cpu) { 2231 continue; 2232 } 2233 if (at->cpu == lace_cpu) { 2234 /* Found one */ 2235 cpat->next_cpu = at; 2236 cpat = at; 2237 } 2238 } 2239 } 2240 } 2241 2242 2243 static void 2244 process_file(char *filename) 2245 { 2246 FILE *io; 2247 int i; 2248 int line_at, not_done; 2249 pid_t pid_of_command=0; 2250 2251 if (filename == NULL) { 2252 io = my_popen(command, "r", &pid_of_command); 2253 } else { 2254 io = fopen(filename, "r"); 2255 if (io == NULL) { 2256 printf("Can't process file %s err:%d\n", 2257 filename, errno); 2258 return; 2259 } 2260 } 2261 build_counters_from_header(io); 2262 if (cnts == NULL) { 2263 /* Nothing we can do */ 2264 printf("Nothing to do -- no counters built\n"); 2265 if (io) { 2266 fclose(io); 2267 } 2268 return; 2269 } 2270 lace_cpus_together(); 2271 print_header(); 2272 if (verbose) { 2273 for (i=0; i<ncnts; i++) { 2274 printf("Counter:%s cpu:%d index:%d\n", 2275 cnts[i].counter_name, 2276 cnts[i].cpu, i); 2277 } 2278 } 2279 line_at = 0; 2280 not_done = 1; 2281 while(not_done) { 2282 if (read_a_line(io)) { 2283 line_at++; 2284 } else { 2285 break; 2286 } 2287 if (line_at >= max_to_collect) { 2288 not_done = 0; 2289 } 2290 if (filename == NULL) { 2291 int cnt; 2292 /* For the ones we dynamically open we print now */ 2293 for(i=0, cnt=0; i<MAX_CPU; i++) { 2294 do_expression(glob_cpu[i], (line_at-1)); 2295 cnt++; 2296 if (cnt == cpu_count_out) { 2297 printf("\n"); 2298 break; 2299 } else { 2300 printf("\t"); 2301 } 2302 } 2303 } 2304 } 2305 if (filename) { 2306 fclose(io); 2307 } else { 2308 my_pclose(io, pid_of_command); 2309 } 2310 } 2311 #if defined(__amd64__) 2312 #define cpuid(in,a,b,c,d)\ 2313 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 2314 2315 static __inline void 2316 do_cpuid(u_int ax, u_int cx, u_int *p) 2317 { 2318 __asm __volatile("cpuid" 2319 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 2320 : "0" (ax), "c" (cx) ); 2321 } 2322 2323 #else 2324 #define cpuid(in, a, b, c, d) 2325 static __inline void 2326 do_cpuid(u_int ax, u_int cx, u_int *p) 2327 { 2328 } 2329 2330 #endif 2331 2332 static void 2333 get_cpuid_set(void) 2334 { 2335 unsigned long eax, ebx, ecx, edx; 2336 int model; 2337 pid_t pid_of_command=0; 2338 size_t sz, len; 2339 FILE *io; 2340 char linebuf[1024], *str; 2341 u_int reg[4]; 2342 2343 eax = ebx = ecx = edx = 0; 2344 2345 cpuid(0, eax, ebx, ecx, edx); 2346 if (ebx == 0x68747541) { 2347 printf("AMD processors are not supported by this program\n"); 2348 printf("Sorry\n"); 2349 exit(0); 2350 } else if (ebx == 0x6972794) { 2351 printf("Cyrix processors are not supported by this program\n"); 2352 printf("Sorry\n"); 2353 exit(0); 2354 } else if (ebx == 0x756e6547) { 2355 printf("Genuine Intel\n"); 2356 } else { 2357 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 2358 exit(0); 2359 } 2360 cpuid(1, eax, ebx, ecx, edx); 2361 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 2362 printf("CPU model is 0x%x id:0x%lx\n", model, eax); 2363 switch (eax & 0xF00) { 2364 case 0x500: /* Pentium family processors */ 2365 printf("Intel Pentium P5\n"); 2366 goto not_supported; 2367 break; 2368 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 2369 switch (model) { 2370 case 0x1: 2371 printf("Intel Pentium P6\n"); 2372 goto not_supported; 2373 break; 2374 case 0x3: 2375 case 0x5: 2376 printf("Intel PII\n"); 2377 goto not_supported; 2378 break; 2379 case 0x6: case 0x16: 2380 printf("Intel CL\n"); 2381 goto not_supported; 2382 break; 2383 case 0x7: case 0x8: case 0xA: case 0xB: 2384 printf("Intel PIII\n"); 2385 goto not_supported; 2386 break; 2387 case 0x9: case 0xD: 2388 printf("Intel PM\n"); 2389 goto not_supported; 2390 break; 2391 case 0xE: 2392 printf("Intel CORE\n"); 2393 goto not_supported; 2394 break; 2395 case 0xF: 2396 printf("Intel CORE2\n"); 2397 goto not_supported; 2398 break; 2399 case 0x17: 2400 printf("Intel CORE2EXTREME\n"); 2401 goto not_supported; 2402 break; 2403 case 0x1C: /* Per Intel document 320047-002. */ 2404 printf("Intel ATOM\n"); 2405 goto not_supported; 2406 break; 2407 case 0x1A: 2408 case 0x1E: /* 2409 * Per Intel document 253669-032 9/2009, 2410 * pages A-2 and A-57 2411 */ 2412 case 0x1F: /* 2413 * Per Intel document 253669-032 9/2009, 2414 * pages A-2 and A-57 2415 */ 2416 printf("Intel COREI7\n"); 2417 goto not_supported; 2418 break; 2419 case 0x2E: 2420 printf("Intel NEHALEM\n"); 2421 goto not_supported; 2422 break; 2423 case 0x25: /* Per Intel document 253669-033US 12/2009. */ 2424 case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 2425 printf("Intel WESTMERE\n"); 2426 goto not_supported; 2427 break; 2428 case 0x2F: /* Westmere-EX, seen in wild */ 2429 printf("Intel WESTMERE\n"); 2430 goto not_supported; 2431 break; 2432 case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 2433 printf("Intel SANDYBRIDGE\n"); 2434 set_sandybridge(); 2435 break; 2436 case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 2437 printf("Intel SANDYBRIDGE_XEON\n"); 2438 set_sandybridge(); 2439 break; 2440 case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 2441 printf("Intel IVYBRIDGE\n"); 2442 set_ivybridge(); 2443 break; 2444 case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 2445 printf("Intel IVYBRIDGE_XEON\n"); 2446 set_ivybridge(); 2447 break; 2448 case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 2449 printf("Intel HASWELL (Xeon)\n"); 2450 set_haswell(); 2451 break; 2452 case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 2453 case 0x45: 2454 case 0x46: 2455 printf("Intel HASWELL\n"); 2456 set_haswell(); 2457 break; 2458 2459 case 0x4e: 2460 case 0x5e: 2461 printf("Intel SKY-LAKE\n"); 2462 goto not_supported; 2463 break; 2464 case 0x3D: 2465 case 0x47: 2466 printf("Intel BROADWELL\n"); 2467 set_broadwell(); 2468 break; 2469 case 0x4f: 2470 case 0x56: 2471 printf("Intel BROADWEL (Xeon)\n"); 2472 set_broadwell(); 2473 break; 2474 2475 case 0x4D: 2476 /* Per Intel document 330061-001 01/2014. */ 2477 printf("Intel ATOM_SILVERMONT\n"); 2478 goto not_supported; 2479 break; 2480 default: 2481 printf("Intel model 0x%x is not known -- sorry\n", 2482 model); 2483 goto not_supported; 2484 break; 2485 } 2486 break; 2487 case 0xF00: /* P4 */ 2488 printf("Intel unknown model %d\n", model); 2489 goto not_supported; 2490 break; 2491 } 2492 do_cpuid(0xa, 0, reg); 2493 max_pmc_counters = (reg[3] & 0x0000000f) + 1; 2494 printf("We have %d PMC counters to work with\n", max_pmc_counters); 2495 /* Ok lets load the list of all known PMC's */ 2496 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2497 if (valid_pmcs == NULL) { 2498 /* Likely */ 2499 pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2500 sz = sizeof(char *) * pmc_allocated_cnt; 2501 valid_pmcs = malloc(sz); 2502 if (valid_pmcs == NULL) { 2503 printf("No memory allocation fails at startup?\n"); 2504 exit(-1); 2505 } 2506 memset(valid_pmcs, 0, sz); 2507 } 2508 2509 while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2510 if (linebuf[0] != '\t') { 2511 /* sometimes headers ;-) */ 2512 continue; 2513 } 2514 len = strlen(linebuf); 2515 if (linebuf[(len-1)] == '\n') { 2516 /* Likely */ 2517 linebuf[(len-1)] = 0; 2518 } 2519 str = &linebuf[1]; 2520 len = strlen(str) + 1; 2521 valid_pmcs[valid_pmc_cnt] = malloc(len); 2522 if (valid_pmcs[valid_pmc_cnt] == NULL) { 2523 printf("No memory2 allocation fails at startup?\n"); 2524 exit(-1); 2525 } 2526 memset(valid_pmcs[valid_pmc_cnt], 0, len); 2527 strcpy(valid_pmcs[valid_pmc_cnt], str); 2528 valid_pmc_cnt++; 2529 if (valid_pmc_cnt >= pmc_allocated_cnt) { 2530 /* Got to expand -- unlikely */ 2531 char **more; 2532 2533 sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2534 more = malloc(sz); 2535 if (more == NULL) { 2536 printf("No memory3 allocation fails at startup?\n"); 2537 exit(-1); 2538 } 2539 memset(more, 0, sz); 2540 memcpy(more, valid_pmcs, sz); 2541 pmc_allocated_cnt *= 2; 2542 free(valid_pmcs); 2543 valid_pmcs = more; 2544 } 2545 } 2546 my_pclose(io, pid_of_command); 2547 return; 2548 not_supported: 2549 printf("Not supported\n"); 2550 exit(-1); 2551 } 2552 2553 static void 2554 explain_all(void) 2555 { 2556 int i; 2557 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2558 printf("-------------------------------------------------------------\n"); 2559 for(i=0; i<the_cpu.number; i++){ 2560 printf("For -e %s ", the_cpu.ents[i].name); 2561 (*the_cpu.explain)(the_cpu.ents[i].name); 2562 printf("----------------------------\n"); 2563 } 2564 } 2565 2566 static void 2567 test_for_a_pmc(const char *pmc, int out_so_far) 2568 { 2569 FILE *io; 2570 pid_t pid_of_command=0; 2571 char my_command[1024]; 2572 char line[1024]; 2573 char resp[1024]; 2574 int len, llen, i; 2575 2576 if (out_so_far < 50) { 2577 len = 50 - out_so_far; 2578 for(i=0; i<len; i++) { 2579 printf(" "); 2580 } 2581 } 2582 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2583 io = my_popen(my_command, "r", &pid_of_command); 2584 if (io == NULL) { 2585 printf("Failed -- popen fails\n"); 2586 return; 2587 } 2588 /* Setup what we expect */ 2589 len = sprintf(resp, "%s", pmc); 2590 if (fgets(line, sizeof(line), io) == NULL) { 2591 printf("Failed -- no output from pmstat\n"); 2592 goto out; 2593 } 2594 llen = strlen(line); 2595 if (line[(llen-1)] == '\n') { 2596 line[(llen-1)] = 0; 2597 llen--; 2598 } 2599 for(i=2; i<(llen-len); i++) { 2600 if (strncmp(&line[i], "ERROR", 5) == 0) { 2601 printf("Failed %s\n", line); 2602 goto out; 2603 } else if (strncmp(&line[i], resp, len) == 0) { 2604 int j, k; 2605 2606 if (fgets(line, sizeof(line), io) == NULL) { 2607 printf("Failed -- no second output from pmstat\n"); 2608 goto out; 2609 } 2610 len = strlen(line); 2611 for (j=0; j<len; j++) { 2612 if (line[j] == ' ') { 2613 j++; 2614 } else { 2615 break; 2616 } 2617 } 2618 printf("Pass"); 2619 len = strlen(&line[j]); 2620 if (len < 20) { 2621 for(k=0; k<(20-len); k++) { 2622 printf(" "); 2623 } 2624 } 2625 if (len) { 2626 printf("%s", &line[j]); 2627 } else { 2628 printf("\n"); 2629 } 2630 goto out; 2631 } 2632 } 2633 printf("Failed -- '%s' not '%s'\n", line, resp); 2634 out: 2635 my_pclose(io, pid_of_command); 2636 2637 } 2638 2639 static int 2640 add_it_to(char **vars, int cur_cnt, char *name) 2641 { 2642 int i; 2643 size_t len; 2644 for(i=0; i<cur_cnt; i++) { 2645 if (strcmp(vars[i], name) == 0) { 2646 /* Already have */ 2647 return(0); 2648 } 2649 } 2650 if (vars[cur_cnt] != NULL) { 2651 printf("Cur_cnt:%d filled with %s??\n", 2652 cur_cnt, vars[cur_cnt]); 2653 exit(-1); 2654 } 2655 /* Ok its new */ 2656 len = strlen(name) + 1; 2657 vars[cur_cnt] = malloc(len); 2658 if (vars[cur_cnt] == NULL) { 2659 printf("No memory %s\n", __FUNCTION__); 2660 exit(-1); 2661 } 2662 memset(vars[cur_cnt], 0, len); 2663 strcpy(vars[cur_cnt], name); 2664 return(1); 2665 } 2666 2667 static char * 2668 build_command_for_exp(struct expression *exp) 2669 { 2670 /* 2671 * Build the pmcstat command to handle 2672 * the passed in expression. 2673 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2674 * where NNN and QQQ represent the PMC's in the expression 2675 * uniquely.. 2676 */ 2677 char forming[1024]; 2678 int cnt_pmc, alloced_pmcs, i; 2679 struct expression *at; 2680 char **vars, *cmd; 2681 size_t mal; 2682 2683 alloced_pmcs = cnt_pmc = 0; 2684 /* first how many do we have */ 2685 at = exp; 2686 while (at) { 2687 if (at->type == TYPE_VALUE_PMC) { 2688 cnt_pmc++; 2689 } 2690 at = at->next; 2691 } 2692 if (cnt_pmc == 0) { 2693 printf("No PMC's in your expression -- nothing to do!!\n"); 2694 exit(0); 2695 } 2696 mal = cnt_pmc * sizeof(char *); 2697 vars = malloc(mal); 2698 if (vars == NULL) { 2699 printf("No memory\n"); 2700 exit(-1); 2701 } 2702 memset(vars, 0, mal); 2703 at = exp; 2704 while (at) { 2705 if (at->type == TYPE_VALUE_PMC) { 2706 if(add_it_to(vars, alloced_pmcs, at->name)) { 2707 alloced_pmcs++; 2708 } 2709 } 2710 at = at->next; 2711 } 2712 /* Now we have a unique list in vars so create our command */ 2713 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2714 for(i=0; i<alloced_pmcs; i++) { 2715 mal += strlen(vars[i]) + 4; /* var + " -s " */ 2716 } 2717 cmd = malloc((mal+2)); 2718 if (cmd == NULL) { 2719 printf("%s out of mem\n", __FUNCTION__); 2720 exit(-1); 2721 } 2722 memset(cmd, 0, (mal+2)); 2723 strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2724 at = exp; 2725 for(i=0; i<alloced_pmcs; i++) { 2726 sprintf(forming, " -s %s", vars[i]); 2727 strcat(cmd, forming); 2728 free(vars[i]); 2729 vars[i] = NULL; 2730 } 2731 free(vars); 2732 return(cmd); 2733 } 2734 2735 static int 2736 user_expr(struct counters *cpu, int pos) 2737 { 2738 int ret; 2739 double res; 2740 struct counters *var; 2741 struct expression *at; 2742 2743 at = master_exp; 2744 while (at) { 2745 if (at->type == TYPE_VALUE_PMC) { 2746 var = find_counter(cpu, at->name); 2747 if (var == NULL) { 2748 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2749 exit(-1); 2750 } 2751 if (pos != -1) { 2752 at->value = var->vals[pos] * 1.0; 2753 } else { 2754 at->value = var->sum * 1.0; 2755 } 2756 } 2757 at = at->next; 2758 } 2759 res = run_expr(master_exp, 1, NULL); 2760 ret = printf("%1.3f", res); 2761 return(ret); 2762 } 2763 2764 2765 static void 2766 set_manual_exp(struct expression *exp) 2767 { 2768 expression = user_expr; 2769 command = build_command_for_exp(exp); 2770 threshold = "User defined threshold"; 2771 } 2772 2773 static void 2774 run_tests(void) 2775 { 2776 int i, lenout; 2777 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2778 printf("------------------------------------------------------------------------\n"); 2779 for(i=0; i<valid_pmc_cnt; i++) { 2780 lenout = printf("%s", valid_pmcs[i]); 2781 fflush(stdout); 2782 test_for_a_pmc(valid_pmcs[i], lenout); 2783 } 2784 } 2785 static void 2786 list_all(void) 2787 { 2788 int i, cnt, j; 2789 printf("PMC Abbreviation\n"); 2790 printf("--------------------------------------------------------------\n"); 2791 for(i=0; i<valid_pmc_cnt; i++) { 2792 cnt = printf("%s", valid_pmcs[i]); 2793 for(j=cnt; j<52; j++) { 2794 printf(" "); 2795 } 2796 printf("%%%d\n", i); 2797 } 2798 } 2799 2800 2801 int 2802 main(int argc, char **argv) 2803 { 2804 int i, j, cnt; 2805 char *filename=NULL; 2806 const char *name=NULL; 2807 int help_only = 0; 2808 int test_mode = 0; 2809 int test_at = 0; 2810 2811 get_cpuid_set(); 2812 memset(glob_cpu, 0, sizeof(glob_cpu)); 2813 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) { 2814 switch (i) { 2815 case 'A': 2816 run_all = 1; 2817 break; 2818 case 'L': 2819 list_all(); 2820 return(0); 2821 case 'H': 2822 printf("**********************************\n"); 2823 explain_all(); 2824 printf("**********************************\n"); 2825 return(0); 2826 break; 2827 case 'T': 2828 test_mode = 1; 2829 break; 2830 case 'E': 2831 master_exp = parse_expression(optarg); 2832 if (master_exp) { 2833 set_manual_exp(master_exp); 2834 } 2835 break; 2836 case 'e': 2837 if (validate_expression(optarg)) { 2838 printf("Unknown expression %s\n", optarg); 2839 return(0); 2840 } 2841 name = optarg; 2842 set_expression(optarg); 2843 break; 2844 case 'm': 2845 max_to_collect = strtol(optarg, NULL, 0); 2846 if (max_to_collect > MAX_COUNTER_SLOTS) { 2847 /* You can't collect more than max in array */ 2848 max_to_collect = MAX_COUNTER_SLOTS; 2849 } 2850 break; 2851 case 'v': 2852 verbose++; 2853 break; 2854 case 'h': 2855 help_only = 1; 2856 break; 2857 case 'i': 2858 filename = optarg; 2859 break; 2860 case '?': 2861 default: 2862 use: 2863 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2864 argv[0]); 2865 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2866 printf("-v -- verbose dump debug type things -- you don't want this\n"); 2867 printf("-m N -- maximum to collect is N measurments\n"); 2868 printf("-e expr-name -- Do expression expr-name\n"); 2869 printf("-E 'your expression' -- Do your expression\n"); 2870 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2871 printf("-H -- Don't run anything, just explain all canned expressions\n"); 2872 printf("-T -- Test all PMC's defined by this processor\n"); 2873 printf("-A -- Run all canned tests\n"); 2874 return(0); 2875 break; 2876 }; 2877 } 2878 if ((run_all == 0) && (name == NULL) && (filename == NULL) && 2879 (test_mode == 0) && (master_exp == NULL)) { 2880 printf("Without setting an expression we cannot dynamically gather information\n"); 2881 printf("you must supply a filename (and you probably want verbosity)\n"); 2882 goto use; 2883 } 2884 if (run_all && max_to_collect > 10) { 2885 max_to_collect = 3; 2886 } 2887 if (test_mode) { 2888 run_tests(); 2889 return(0); 2890 } 2891 printf("*********************************\n"); 2892 if ((master_exp == NULL) && name) { 2893 (*the_cpu.explain)(name); 2894 } else if (master_exp) { 2895 printf("Examine your expression "); 2896 print_exp(master_exp); 2897 printf("User defined threshold\n"); 2898 } 2899 if (help_only) { 2900 return(0); 2901 } 2902 if (run_all) { 2903 more: 2904 name = the_cpu.ents[test_at].name; 2905 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); 2906 test_at++; 2907 if (set_expression(name) == -1) { 2908 if (test_at >= the_cpu.number) { 2909 goto done; 2910 } else 2911 goto more; 2912 } 2913 2914 } 2915 process_file(filename); 2916 if (verbose >= 2) { 2917 for (i=0; i<ncnts; i++) { 2918 printf("Counter:%s cpu:%d index:%d\n", 2919 cnts[i].counter_name, 2920 cnts[i].cpu, i); 2921 for(j=0; j<cnts[i].pos; j++) { 2922 printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2923 } 2924 printf(" sum - %ld\n", (long int)cnts[i].sum); 2925 } 2926 } 2927 if (expression == NULL) { 2928 return(0); 2929 } 2930 if (max_to_collect > 1) { 2931 for(i=0, cnt=0; i<MAX_CPU; i++) { 2932 if (glob_cpu[i]) { 2933 do_expression(glob_cpu[i], -1); 2934 cnt++; 2935 if (cnt == cpu_count_out) { 2936 printf("\n"); 2937 break; 2938 } else { 2939 printf("\t"); 2940 } 2941 } 2942 } 2943 } 2944 if (run_all && (test_at < the_cpu.number)) { 2945 memset(glob_cpu, 0, sizeof(glob_cpu)); 2946 ncnts = 0; 2947 printf("*********************************\n"); 2948 goto more; 2949 } else if (run_all) { 2950 done: 2951 printf("*********************************\n"); 2952 } 2953 return(0); 2954 } 2955