1 /*- 2 * Copyright (c) 2014, 2015 Netflix Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 #include <sys/types.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <unistd.h> 32 #include <string.h> 33 #include <strings.h> 34 #include <sys/errno.h> 35 #include <signal.h> 36 #include <sys/wait.h> 37 #include <getopt.h> 38 #include "eval_expr.h" 39 __FBSDID("$FreeBSD$"); 40 41 #define MAX_COUNTER_SLOTS 1024 42 #define MAX_NLEN 64 43 #define MAX_CPU 64 44 static int verbose = 0; 45 46 extern char **environ; 47 extern struct expression *master_exp; 48 struct expression *master_exp=NULL; 49 50 #define PMC_INITIAL_ALLOC 512 51 extern char **valid_pmcs; 52 char **valid_pmcs = NULL; 53 extern int valid_pmc_cnt; 54 int valid_pmc_cnt=0; 55 extern int pmc_allocated_cnt; 56 int pmc_allocated_cnt=0; 57 58 /* 59 * The following two varients on popen and pclose with 60 * the cavet that they get you the PID so that you 61 * can supply it to pclose so it can send a SIGTERM 62 * to the process. 63 */ 64 static FILE * 65 my_popen(const char *command, const char *dir, pid_t *p_pid) 66 { 67 FILE *io_out, *io_in; 68 int pdesin[2], pdesout[2]; 69 char *argv[4]; 70 pid_t pid; 71 char cmd[4]; 72 char cmd2[1024]; 73 char arg1[4]; 74 75 if ((strcmp(dir, "r") != 0) && 76 (strcmp(dir, "w") != 0)) { 77 errno = EINVAL; 78 return(NULL); 79 } 80 if (pipe(pdesin) < 0) 81 return (NULL); 82 83 if (pipe(pdesout) < 0) { 84 (void)close(pdesin[0]); 85 (void)close(pdesin[1]); 86 return (NULL); 87 } 88 strcpy(cmd, "sh"); 89 strcpy(arg1, "-c"); 90 strcpy(cmd2, command); 91 argv[0] = cmd; 92 argv[1] = arg1; 93 argv[2] = cmd2; 94 argv[3] = NULL; 95 96 switch (pid = fork()) { 97 case -1: /* Error. */ 98 (void)close(pdesin[0]); 99 (void)close(pdesin[1]); 100 (void)close(pdesout[0]); 101 (void)close(pdesout[1]); 102 return (NULL); 103 /* NOTREACHED */ 104 case 0: /* Child. */ 105 /* Close out un-used sides */ 106 (void)close(pdesin[1]); 107 (void)close(pdesout[0]); 108 /* Now prepare the stdin of the process */ 109 close(0); 110 (void)dup(pdesin[0]); 111 (void)close(pdesin[0]); 112 /* Now prepare the stdout of the process */ 113 close(1); 114 (void)dup(pdesout[1]); 115 /* And lets do stderr just in case */ 116 close(2); 117 (void)dup(pdesout[1]); 118 (void)close(pdesout[1]); 119 /* Now run it */ 120 execve("/bin/sh", argv, environ); 121 exit(127); 122 /* NOTREACHED */ 123 } 124 /* Parent; assume fdopen can't fail. */ 125 /* Store the pid */ 126 *p_pid = pid; 127 if (strcmp(dir, "r") != 0) { 128 io_out = fdopen(pdesin[1], "w"); 129 (void)close(pdesin[0]); 130 (void)close(pdesout[0]); 131 (void)close(pdesout[1]); 132 return(io_out); 133 } else { 134 /* Prepare the input stream */ 135 io_in = fdopen(pdesout[0], "r"); 136 (void)close(pdesout[1]); 137 (void)close(pdesin[0]); 138 (void)close(pdesin[1]); 139 return (io_in); 140 } 141 } 142 143 /* 144 * pclose -- 145 * Pclose returns -1 if stream is not associated with a `popened' command, 146 * if already `pclosed', or waitpid returns an error. 147 */ 148 static void 149 my_pclose(FILE *io, pid_t the_pid) 150 { 151 int pstat; 152 pid_t pid; 153 154 /* 155 * Find the appropriate file pointer and remove it from the list. 156 */ 157 (void)fclose(io); 158 /* Die if you are not dead! */ 159 kill(the_pid, SIGTERM); 160 do { 161 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 162 } while (pid == -1 && errno == EINTR); 163 } 164 165 struct counters { 166 struct counters *next_cpu; 167 char counter_name[MAX_NLEN]; /* Name of counter */ 168 int cpu; /* CPU we are on */ 169 int pos; /* Index we are filling to. */ 170 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 171 uint64_t sum; /* Summary of entries */ 172 }; 173 174 extern struct counters *glob_cpu[MAX_CPU]; 175 struct counters *glob_cpu[MAX_CPU]; 176 177 extern struct counters *cnts; 178 struct counters *cnts=NULL; 179 180 extern int ncnts; 181 int ncnts=0; 182 183 extern int (*expression)(struct counters *, int); 184 int (*expression)(struct counters *, int); 185 186 static const char *threshold=NULL; 187 static const char *command; 188 189 struct cpu_entry { 190 const char *name; 191 const char *thresh; 192 const char *command; 193 int (*func)(struct counters *, int); 194 }; 195 196 197 struct cpu_type { 198 char cputype[32]; 199 int number; 200 struct cpu_entry *ents; 201 void (*explain)(const char *name); 202 }; 203 extern struct cpu_type the_cpu; 204 struct cpu_type the_cpu; 205 206 static void 207 explain_name_sb(const char *name) 208 { 209 const char *mythresh; 210 if (strcmp(name, "allocstall1") == 0) { 211 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 212 mythresh = "thresh > .05"; 213 } else if (strcmp(name, "allocstall2") == 0) { 214 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 215 mythresh = "thresh > .05"; 216 } else if (strcmp(name, "br_miss") == 0) { 217 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 218 mythresh = "thresh >= .2"; 219 } else if (strcmp(name, "splitload") == 0) { 220 printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 221 mythresh = "thresh >= .1"; 222 } else if (strcmp(name, "splitstore") == 0) { 223 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 224 mythresh = "thresh >= .01"; 225 } else if (strcmp(name, "contested") == 0) { 226 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 227 mythresh = "thresh >= .05"; 228 } else if (strcmp(name, "blockstorefwd") == 0) { 229 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 230 mythresh = "thresh >= .05"; 231 } else if (strcmp(name, "cache2") == 0) { 232 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 233 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 234 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 235 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 236 mythresh = "thresh >= .2"; 237 } else if (strcmp(name, "cache1") == 0) { 238 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 239 mythresh = "thresh >= .2"; 240 } else if (strcmp(name, "dtlbmissload") == 0) { 241 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 242 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 243 mythresh = "thresh >= .1"; 244 } else if (strcmp(name, "frontendstall") == 0) { 245 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 246 mythresh = "thresh >= .15"; 247 } else if (strcmp(name, "clears") == 0) { 248 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 249 printf(" MACHINE_CLEARS.SMC + \n"); 250 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 251 mythresh = "thresh >= .02"; 252 } else if (strcmp(name, "microassist") == 0) { 253 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 254 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 255 mythresh = "thresh >= .05"; 256 } else if (strcmp(name, "aliasing_4k") == 0) { 257 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 258 mythresh = "thresh >= .1"; 259 } else if (strcmp(name, "fpassist") == 0) { 260 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 261 mythresh = "look for a excessive value"; 262 } else if (strcmp(name, "otherassistavx") == 0) { 263 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 264 mythresh = "look for a excessive value"; 265 } else if (strcmp(name, "otherassistsse") == 0) { 266 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267 mythresh = "look for a excessive value"; 268 } else if (strcmp(name, "eff1") == 0) { 269 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 270 mythresh = "thresh < .9"; 271 } else if (strcmp(name, "eff2") == 0) { 272 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 273 mythresh = "thresh > 1.0"; 274 } else if (strcmp(name, "dtlbmissstore") == 0) { 275 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 276 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 277 mythresh = "thresh >= .05"; 278 } else { 279 printf("Unknown name:%s\n", name); 280 mythresh = "unknown entry"; 281 } 282 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 283 } 284 285 static void 286 explain_name_ib(const char *name) 287 { 288 const char *mythresh; 289 if (strcmp(name, "br_miss") == 0) { 290 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 291 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 292 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 293 mythresh = "thresh >= .2"; 294 } else if (strcmp(name, "eff1") == 0) { 295 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 296 mythresh = "thresh < .9"; 297 } else if (strcmp(name, "eff2") == 0) { 298 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 299 mythresh = "thresh > 1.0"; 300 } else if (strcmp(name, "cache1") == 0) { 301 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 302 mythresh = "thresh >= .2"; 303 } else if (strcmp(name, "cache2") == 0) { 304 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 305 mythresh = "thresh >= .2"; 306 } else if (strcmp(name, "itlbmiss") == 0) { 307 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 308 mythresh = "thresh > .05"; 309 } else if (strcmp(name, "icachemiss") == 0) { 310 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 311 mythresh = "thresh > .05"; 312 } else if (strcmp(name, "lcpstall") == 0) { 313 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 314 mythresh = "thresh > .05"; 315 } else if (strcmp(name, "datashare") == 0) { 316 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 317 mythresh = "thresh > .05"; 318 } else if (strcmp(name, "blockstorefwd") == 0) { 319 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 320 mythresh = "thresh >= .05"; 321 } else if (strcmp(name, "splitload") == 0) { 322 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 323 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 324 mythresh = "thresh >= .1"; 325 } else if (strcmp(name, "splitstore") == 0) { 326 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 327 mythresh = "thresh >= .01"; 328 } else if (strcmp(name, "aliasing_4k") == 0) { 329 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 330 mythresh = "thresh >= .1"; 331 } else if (strcmp(name, "dtlbmissload") == 0) { 332 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 333 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 334 mythresh = "thresh >= .1"; 335 } else if (strcmp(name, "dtlbmissstore") == 0) { 336 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 337 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 338 mythresh = "thresh >= .05"; 339 } else if (strcmp(name, "contested") == 0) { 340 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 341 mythresh = "thresh >= .05"; 342 } else if (strcmp(name, "clears") == 0) { 343 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 344 printf(" MACHINE_CLEARS.SMC + \n"); 345 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 346 mythresh = "thresh >= .02"; 347 } else if (strcmp(name, "microassist") == 0) { 348 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 349 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 350 mythresh = "thresh >= .05"; 351 } else if (strcmp(name, "fpassist") == 0) { 352 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 353 mythresh = "look for a excessive value"; 354 } else if (strcmp(name, "otherassistavx") == 0) { 355 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 356 mythresh = "look for a excessive value"; 357 } else if (strcmp(name, "otherassistsse") == 0) { 358 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359 mythresh = "look for a excessive value"; 360 } else { 361 printf("Unknown name:%s\n", name); 362 mythresh = "unknown entry"; 363 } 364 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 365 } 366 367 368 static void 369 explain_name_has(const char *name) 370 { 371 const char *mythresh; 372 if (strcmp(name, "eff1") == 0) { 373 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 374 mythresh = "thresh < .75"; 375 } else if (strcmp(name, "eff2") == 0) { 376 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 377 mythresh = "thresh > 1.0"; 378 } else if (strcmp(name, "itlbmiss") == 0) { 379 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 380 mythresh = "thresh > .05"; 381 } else if (strcmp(name, "icachemiss") == 0) { 382 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 383 mythresh = "thresh > .05"; 384 } else if (strcmp(name, "lcpstall") == 0) { 385 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 386 mythresh = "thresh > .05"; 387 } else if (strcmp(name, "cache1") == 0) { 388 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 389 mythresh = "thresh >= .2"; 390 } else if (strcmp(name, "cache2") == 0) { 391 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 392 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 393 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 394 printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 395 mythresh = "thresh >= .2"; 396 } else if (strcmp(name, "contested") == 0) { 397 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 398 mythresh = "thresh >= .05"; 399 } else if (strcmp(name, "datashare") == 0) { 400 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 401 mythresh = "thresh > .05"; 402 } else if (strcmp(name, "blockstorefwd") == 0) { 403 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 404 mythresh = "thresh >= .05"; 405 } else if (strcmp(name, "splitload") == 0) { 406 printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 407 mythresh = "thresh >= .1"; 408 } else if (strcmp(name, "splitstore") == 0) { 409 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 410 mythresh = "thresh >= .01"; 411 } else if (strcmp(name, "aliasing_4k") == 0) { 412 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 413 mythresh = "thresh >= .1"; 414 } else if (strcmp(name, "dtlbmissload") == 0) { 415 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 416 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 417 mythresh = "thresh >= .1"; 418 } else if (strcmp(name, "br_miss") == 0) { 419 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 420 mythresh = "thresh >= .2"; 421 } else if (strcmp(name, "clears") == 0) { 422 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 423 printf(" MACHINE_CLEARS.SMC + \n"); 424 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 425 mythresh = "thresh >= .02"; 426 } else if (strcmp(name, "microassist") == 0) { 427 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 428 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 429 mythresh = "thresh >= .05"; 430 } else if (strcmp(name, "fpassist") == 0) { 431 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 432 mythresh = "look for a excessive value"; 433 } else if (strcmp(name, "otherassistavx") == 0) { 434 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 435 mythresh = "look for a excessive value"; 436 } else if (strcmp(name, "otherassistsse") == 0) { 437 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438 mythresh = "look for a excessive value"; 439 } else { 440 printf("Unknown name:%s\n", name); 441 mythresh = "unknown entry"; 442 } 443 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 444 } 445 446 447 static struct counters * 448 find_counter(struct counters *base, const char *name) 449 { 450 struct counters *at; 451 int len; 452 453 at = base; 454 len = strlen(name); 455 while(at) { 456 if (strncmp(at->counter_name, name, len) == 0) { 457 return(at); 458 } 459 at = at->next_cpu; 460 } 461 printf("Can't find counter %s\n", name); 462 printf("We have:\n"); 463 at = base; 464 while(at) { 465 printf("- %s\n", at->counter_name); 466 at = at->next_cpu; 467 } 468 exit(-1); 469 } 470 471 static int 472 allocstall1(struct counters *cpu, int pos) 473 { 474 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 475 int ret; 476 struct counters *partial; 477 struct counters *unhalt; 478 double un, par, res; 479 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 480 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 481 if (pos != -1) { 482 par = partial->vals[pos] * 1.0; 483 un = unhalt->vals[pos] * 1.0; 484 } else { 485 par = partial->sum * 1.0; 486 un = unhalt->sum * 1.0; 487 } 488 res = par/un; 489 ret = printf("%1.3f", res); 490 return(ret); 491 } 492 493 static int 494 allocstall2(struct counters *cpu, int pos) 495 { 496 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 497 int ret; 498 struct counters *partial; 499 struct counters *unhalt; 500 double un, par, res; 501 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 502 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 503 if (pos != -1) { 504 par = partial->vals[pos] * 1.0; 505 un = unhalt->vals[pos] * 1.0; 506 } else { 507 par = partial->sum * 1.0; 508 un = unhalt->sum * 1.0; 509 } 510 res = par/un; 511 ret = printf("%1.3f", res); 512 return(ret); 513 } 514 515 static int 516 br_mispredict(struct counters *cpu, int pos) 517 { 518 struct counters *brctr; 519 struct counters *unhalt; 520 int ret; 521 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 522 double br, un, con, res; 523 con = 20.0; 524 525 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 526 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 527 if (pos != -1) { 528 br = brctr->vals[pos] * 1.0; 529 un = unhalt->vals[pos] * 1.0; 530 } else { 531 br = brctr->sum * 1.0; 532 un = unhalt->sum * 1.0; 533 } 534 res = (con * br)/un; 535 ret = printf("%1.3f", res); 536 return(ret); 537 } 538 539 static int 540 br_mispredictib(struct counters *cpu, int pos) 541 { 542 struct counters *brctr; 543 struct counters *unhalt; 544 struct counters *clear, *clear2, *clear3; 545 struct counters *uops; 546 struct counters *recv; 547 struct counters *iss; 548 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 549 int ret; 550 /* 551 * (BR_MISP_RETIRED.ALL_BRANCHES / 552 * (BR_MISP_RETIRED.ALL_BRANCHES + 553 * MACHINE_CLEAR.COUNT) * 554 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 555 * 556 */ 557 double br, cl, cl2, cl3, uo, re, un, con, res, is; 558 con = 4.0; 559 560 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 561 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 562 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 563 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 564 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 565 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 566 iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 567 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 568 if (pos != -1) { 569 br = brctr->vals[pos] * 1.0; 570 cl = clear->vals[pos] * 1.0; 571 cl2 = clear2->vals[pos] * 1.0; 572 cl3 = clear3->vals[pos] * 1.0; 573 uo = uops->vals[pos] * 1.0; 574 re = recv->vals[pos] * 1.0; 575 is = iss->vals[pos] * 1.0; 576 un = unhalt->vals[pos] * 1.0; 577 } else { 578 br = brctr->sum * 1.0; 579 cl = clear->sum * 1.0; 580 cl2 = clear2->sum * 1.0; 581 cl3 = clear3->sum * 1.0; 582 uo = uops->sum * 1.0; 583 re = recv->sum * 1.0; 584 is = iss->sum * 1.0; 585 un = unhalt->sum * 1.0; 586 } 587 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 588 ret = printf("%1.3f", res); 589 return(ret); 590 } 591 592 static int 593 splitloadib(struct counters *cpu, int pos) 594 { 595 int ret; 596 struct counters *mem; 597 struct counters *l1d, *ldblock; 598 struct counters *unhalt; 599 double un, memd, res, l1, ldb; 600 /* 601 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 602 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 603 */ 604 605 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 606 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 607 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 608 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 609 if (pos != -1) { 610 memd = mem->vals[pos] * 1.0; 611 l1 = l1d->vals[pos] * 1.0; 612 ldb = ldblock->vals[pos] * 1.0; 613 un = unhalt->vals[pos] * 1.0; 614 } else { 615 memd = mem->sum * 1.0; 616 l1 = l1d->sum * 1.0; 617 ldb = ldblock->sum * 1.0; 618 un = unhalt->sum * 1.0; 619 } 620 res = ((l1 / memd) * ldb)/un; 621 ret = printf("%1.3f", res); 622 return(ret); 623 } 624 625 static int 626 splitload(struct counters *cpu, int pos) 627 { 628 int ret; 629 struct counters *mem; 630 struct counters *unhalt; 631 double con, un, memd, res; 632 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 633 634 con = 5.0; 635 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 636 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 637 if (pos != -1) { 638 memd = mem->vals[pos] * 1.0; 639 un = unhalt->vals[pos] * 1.0; 640 } else { 641 memd = mem->sum * 1.0; 642 un = unhalt->sum * 1.0; 643 } 644 res = (memd * con)/un; 645 ret = printf("%1.3f", res); 646 return(ret); 647 } 648 649 static int 650 splitstore(struct counters *cpu, int pos) 651 { 652 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 653 int ret; 654 struct counters *mem_split; 655 struct counters *mem_stores; 656 double memsplit, memstore, res; 657 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 658 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 659 if (pos != -1) { 660 memsplit = mem_split->vals[pos] * 1.0; 661 memstore = mem_stores->vals[pos] * 1.0; 662 } else { 663 memsplit = mem_split->sum * 1.0; 664 memstore = mem_stores->sum * 1.0; 665 } 666 res = memsplit/memstore; 667 ret = printf("%1.3f", res); 668 return(ret); 669 } 670 671 672 static int 673 contested(struct counters *cpu, int pos) 674 { 675 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 676 int ret; 677 struct counters *mem; 678 struct counters *unhalt; 679 double con, un, memd, res; 680 681 con = 60.0; 682 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 683 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 684 if (pos != -1) { 685 memd = mem->vals[pos] * 1.0; 686 un = unhalt->vals[pos] * 1.0; 687 } else { 688 memd = mem->sum * 1.0; 689 un = unhalt->sum * 1.0; 690 } 691 res = (memd * con)/un; 692 ret = printf("%1.3f", res); 693 return(ret); 694 } 695 696 static int 697 contested_has(struct counters *cpu, int pos) 698 { 699 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 700 int ret; 701 struct counters *mem; 702 struct counters *unhalt; 703 double con, un, memd, res; 704 705 con = 84.0; 706 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 707 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 708 if (pos != -1) { 709 memd = mem->vals[pos] * 1.0; 710 un = unhalt->vals[pos] * 1.0; 711 } else { 712 memd = mem->sum * 1.0; 713 un = unhalt->sum * 1.0; 714 } 715 res = (memd * con)/un; 716 ret = printf("%1.3f", res); 717 return(ret); 718 } 719 720 721 static int 722 blockstoreforward(struct counters *cpu, int pos) 723 { 724 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 725 int ret; 726 struct counters *ldb; 727 struct counters *unhalt; 728 double con, un, ld, res; 729 730 con = 13.0; 731 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 732 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 733 if (pos != -1) { 734 ld = ldb->vals[pos] * 1.0; 735 un = unhalt->vals[pos] * 1.0; 736 } else { 737 ld = ldb->sum * 1.0; 738 un = unhalt->sum * 1.0; 739 } 740 res = (ld * con)/un; 741 ret = printf("%1.3f", res); 742 return(ret); 743 } 744 745 static int 746 cache2(struct counters *cpu, int pos) 747 { 748 /* ** Suspect *** 749 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 750 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 751 */ 752 int ret; 753 struct counters *mem1, *mem2, *mem3; 754 struct counters *unhalt; 755 double con1, con2, con3, un, me_1, me_2, me_3, res; 756 757 con1 = 26.0; 758 con2 = 43.0; 759 con3 = 60.0; 760 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 761 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 762 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 763 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 764 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 765 if (pos != -1) { 766 me_1 = mem1->vals[pos] * 1.0; 767 me_2 = mem2->vals[pos] * 1.0; 768 me_3 = mem3->vals[pos] * 1.0; 769 un = unhalt->vals[pos] * 1.0; 770 } else { 771 me_1 = mem1->sum * 1.0; 772 me_2 = mem2->sum * 1.0; 773 me_3 = mem3->sum * 1.0; 774 un = unhalt->sum * 1.0; 775 } 776 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 777 ret = printf("%1.3f", res); 778 return(ret); 779 } 780 781 static int 782 datasharing(struct counters *cpu, int pos) 783 { 784 /* 785 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 786 */ 787 int ret; 788 struct counters *mem; 789 struct counters *unhalt; 790 double con, res, me, un; 791 792 con = 43.0; 793 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 794 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 795 if (pos != -1) { 796 me = mem->vals[pos] * 1.0; 797 un = unhalt->vals[pos] * 1.0; 798 } else { 799 me = mem->sum * 1.0; 800 un = unhalt->sum * 1.0; 801 } 802 res = (me * con)/un; 803 ret = printf("%1.3f", res); 804 return(ret); 805 806 } 807 808 809 static int 810 datasharing_has(struct counters *cpu, int pos) 811 { 812 /* 813 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 814 */ 815 int ret; 816 struct counters *mem; 817 struct counters *unhalt; 818 double con, res, me, un; 819 820 con = 72.0; 821 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 822 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 823 if (pos != -1) { 824 me = mem->vals[pos] * 1.0; 825 un = unhalt->vals[pos] * 1.0; 826 } else { 827 me = mem->sum * 1.0; 828 un = unhalt->sum * 1.0; 829 } 830 res = (me * con)/un; 831 ret = printf("%1.3f", res); 832 return(ret); 833 834 } 835 836 837 static int 838 cache2ib(struct counters *cpu, int pos) 839 { 840 /* 841 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 842 */ 843 int ret; 844 struct counters *mem; 845 struct counters *unhalt; 846 double con, un, me, res; 847 848 con = 29.0; 849 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 850 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 851 if (pos != -1) { 852 me = mem->vals[pos] * 1.0; 853 un = unhalt->vals[pos] * 1.0; 854 } else { 855 me = mem->sum * 1.0; 856 un = unhalt->sum * 1.0; 857 } 858 res = (con * me)/un; 859 ret = printf("%1.3f", res); 860 return(ret); 861 } 862 863 static int 864 cache2has(struct counters *cpu, int pos) 865 { 866 /* 867 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 868 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 869 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 870 * / CPU_CLK_UNHALTED.THREAD_P 871 */ 872 int ret; 873 struct counters *mem1, *mem2, *mem3; 874 struct counters *unhalt; 875 double con1, con2, con3, un, me1, me2, me3, res; 876 877 con1 = 36.0; 878 con2 = 72.0; 879 con3 = 84.0; 880 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 881 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 882 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 883 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 884 if (pos != -1) { 885 me1 = mem1->vals[pos] * 1.0; 886 me2 = mem2->vals[pos] * 1.0; 887 me3 = mem3->vals[pos] * 1.0; 888 un = unhalt->vals[pos] * 1.0; 889 } else { 890 me1 = mem1->sum * 1.0; 891 me2 = mem2->sum * 1.0; 892 me3 = mem3->sum * 1.0; 893 un = unhalt->sum * 1.0; 894 } 895 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 896 ret = printf("%1.3f", res); 897 return(ret); 898 } 899 900 static int 901 cache1(struct counters *cpu, int pos) 902 { 903 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 904 int ret; 905 struct counters *mem; 906 struct counters *unhalt; 907 double con, un, me, res; 908 909 con = 180.0; 910 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 911 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 912 if (pos != -1) { 913 me = mem->vals[pos] * 1.0; 914 un = unhalt->vals[pos] * 1.0; 915 } else { 916 me = mem->sum * 1.0; 917 un = unhalt->sum * 1.0; 918 } 919 res = (me * con)/un; 920 ret = printf("%1.3f", res); 921 return(ret); 922 } 923 924 static int 925 cache1ib(struct counters *cpu, int pos) 926 { 927 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 928 int ret; 929 struct counters *mem; 930 struct counters *unhalt; 931 double con, un, me, res; 932 933 con = 180.0; 934 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 935 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 936 if (pos != -1) { 937 me = mem->vals[pos] * 1.0; 938 un = unhalt->vals[pos] * 1.0; 939 } else { 940 me = mem->sum * 1.0; 941 un = unhalt->sum * 1.0; 942 } 943 res = (me * con)/un; 944 ret = printf("%1.3f", res); 945 return(ret); 946 } 947 948 949 static int 950 dtlb_missload(struct counters *cpu, int pos) 951 { 952 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 953 int ret; 954 struct counters *dtlb_m, *dtlb_d; 955 struct counters *unhalt; 956 double con, un, d1, d2, res; 957 958 con = 7.0; 959 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 960 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 961 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 962 if (pos != -1) { 963 d1 = dtlb_m->vals[pos] * 1.0; 964 d2 = dtlb_d->vals[pos] * 1.0; 965 un = unhalt->vals[pos] * 1.0; 966 } else { 967 d1 = dtlb_m->sum * 1.0; 968 d2 = dtlb_d->sum * 1.0; 969 un = unhalt->sum * 1.0; 970 } 971 res = ((d1 * con) + d2)/un; 972 ret = printf("%1.3f", res); 973 return(ret); 974 } 975 976 static int 977 dtlb_missstore(struct counters *cpu, int pos) 978 { 979 /* 980 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 981 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 982 */ 983 int ret; 984 struct counters *dtsb_m, *dtsb_d; 985 struct counters *unhalt; 986 double con, un, d1, d2, res; 987 988 con = 7.0; 989 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 990 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 991 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 992 if (pos != -1) { 993 d1 = dtsb_m->vals[pos] * 1.0; 994 d2 = dtsb_d->vals[pos] * 1.0; 995 un = unhalt->vals[pos] * 1.0; 996 } else { 997 d1 = dtsb_m->sum * 1.0; 998 d2 = dtsb_d->sum * 1.0; 999 un = unhalt->sum * 1.0; 1000 } 1001 res = ((d1 * con) + d2)/un; 1002 ret = printf("%1.3f", res); 1003 return(ret); 1004 } 1005 1006 static int 1007 itlb_miss(struct counters *cpu, int pos) 1008 { 1009 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1010 int ret; 1011 struct counters *itlb; 1012 struct counters *unhalt; 1013 double un, d1, res; 1014 1015 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1016 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1017 if (pos != -1) { 1018 d1 = itlb->vals[pos] * 1.0; 1019 un = unhalt->vals[pos] * 1.0; 1020 } else { 1021 d1 = itlb->sum * 1.0; 1022 un = unhalt->sum * 1.0; 1023 } 1024 res = d1/un; 1025 ret = printf("%1.3f", res); 1026 return(ret); 1027 } 1028 1029 static int 1030 icache_miss(struct counters *cpu, int pos) 1031 { 1032 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1033 1034 int ret; 1035 struct counters *itlb, *icache; 1036 struct counters *unhalt; 1037 double un, d1, ic, res; 1038 1039 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1040 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1041 icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1042 if (pos != -1) { 1043 d1 = itlb->vals[pos] * 1.0; 1044 ic = icache->vals[pos] * 1.0; 1045 un = unhalt->vals[pos] * 1.0; 1046 } else { 1047 d1 = itlb->sum * 1.0; 1048 ic = icache->sum * 1.0; 1049 un = unhalt->sum * 1.0; 1050 } 1051 res = (ic-d1)/un; 1052 ret = printf("%1.3f", res); 1053 return(ret); 1054 1055 } 1056 1057 static int 1058 icache_miss_has(struct counters *cpu, int pos) 1059 { 1060 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1061 1062 int ret; 1063 struct counters *icache; 1064 struct counters *unhalt; 1065 double un, con, ic, res; 1066 1067 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1068 icache = find_counter(cpu, "ICACHE.MISSES"); 1069 con = 36.0; 1070 if (pos != -1) { 1071 ic = icache->vals[pos] * 1.0; 1072 un = unhalt->vals[pos] * 1.0; 1073 } else { 1074 ic = icache->sum * 1.0; 1075 un = unhalt->sum * 1.0; 1076 } 1077 res = (con * ic)/un; 1078 ret = printf("%1.3f", res); 1079 return(ret); 1080 1081 } 1082 1083 static int 1084 lcp_stall(struct counters *cpu, int pos) 1085 { 1086 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1087 int ret; 1088 struct counters *ild; 1089 struct counters *unhalt; 1090 double un, d1, res; 1091 1092 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1093 ild = find_counter(cpu, "ILD_STALL.LCP"); 1094 if (pos != -1) { 1095 d1 = ild->vals[pos] * 1.0; 1096 un = unhalt->vals[pos] * 1.0; 1097 } else { 1098 d1 = ild->sum * 1.0; 1099 un = unhalt->sum * 1.0; 1100 } 1101 res = d1/un; 1102 ret = printf("%1.3f", res); 1103 return(ret); 1104 1105 } 1106 1107 1108 static int 1109 frontendstall(struct counters *cpu, int pos) 1110 { 1111 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1112 int ret; 1113 struct counters *idq; 1114 struct counters *unhalt; 1115 double con, un, id, res; 1116 1117 con = 4.0; 1118 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1119 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1120 if (pos != -1) { 1121 id = idq->vals[pos] * 1.0; 1122 un = unhalt->vals[pos] * 1.0; 1123 } else { 1124 id = idq->sum * 1.0; 1125 un = unhalt->sum * 1.0; 1126 } 1127 res = id/(un * con); 1128 ret = printf("%1.3f", res); 1129 return(ret); 1130 } 1131 1132 static int 1133 clears(struct counters *cpu, int pos) 1134 { 1135 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1136 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1137 1138 int ret; 1139 struct counters *clr1, *clr2, *clr3; 1140 struct counters *unhalt; 1141 double con, un, cl1, cl2, cl3, res; 1142 1143 con = 100.0; 1144 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1145 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1146 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1147 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1148 1149 if (pos != -1) { 1150 cl1 = clr1->vals[pos] * 1.0; 1151 cl2 = clr2->vals[pos] * 1.0; 1152 cl3 = clr3->vals[pos] * 1.0; 1153 un = unhalt->vals[pos] * 1.0; 1154 } else { 1155 cl1 = clr1->sum * 1.0; 1156 cl2 = clr2->sum * 1.0; 1157 cl3 = clr3->sum * 1.0; 1158 un = unhalt->sum * 1.0; 1159 } 1160 res = ((cl1 + cl2 + cl3) * con)/un; 1161 ret = printf("%1.3f", res); 1162 return(ret); 1163 } 1164 1165 static int 1166 microassist(struct counters *cpu, int pos) 1167 { 1168 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1169 int ret; 1170 struct counters *idq; 1171 struct counters *unhalt; 1172 double un, id, res, con; 1173 1174 con = 4.0; 1175 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1176 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1177 if (pos != -1) { 1178 id = idq->vals[pos] * 1.0; 1179 un = unhalt->vals[pos] * 1.0; 1180 } else { 1181 id = idq->sum * 1.0; 1182 un = unhalt->sum * 1.0; 1183 } 1184 res = id/(un * con); 1185 ret = printf("%1.3f", res); 1186 return(ret); 1187 } 1188 1189 1190 static int 1191 aliasing(struct counters *cpu, int pos) 1192 { 1193 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1194 int ret; 1195 struct counters *ld; 1196 struct counters *unhalt; 1197 double un, lds, con, res; 1198 1199 con = 5.0; 1200 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1201 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1202 if (pos != -1) { 1203 lds = ld->vals[pos] * 1.0; 1204 un = unhalt->vals[pos] * 1.0; 1205 } else { 1206 lds = ld->sum * 1.0; 1207 un = unhalt->sum * 1.0; 1208 } 1209 res = (lds * con)/un; 1210 ret = printf("%1.3f", res); 1211 return(ret); 1212 } 1213 1214 static int 1215 fpassists(struct counters *cpu, int pos) 1216 { 1217 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1218 int ret; 1219 struct counters *fp; 1220 struct counters *inst; 1221 double un, fpd, res; 1222 1223 inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1224 fp = find_counter(cpu, "FP_ASSIST.ANY"); 1225 if (pos != -1) { 1226 fpd = fp->vals[pos] * 1.0; 1227 un = inst->vals[pos] * 1.0; 1228 } else { 1229 fpd = fp->sum * 1.0; 1230 un = inst->sum * 1.0; 1231 } 1232 res = fpd/un; 1233 ret = printf("%1.3f", res); 1234 return(ret); 1235 } 1236 1237 static int 1238 otherassistavx(struct counters *cpu, int pos) 1239 { 1240 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1241 int ret; 1242 struct counters *oth; 1243 struct counters *unhalt; 1244 double un, ot, con, res; 1245 1246 con = 75.0; 1247 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1248 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1249 if (pos != -1) { 1250 ot = oth->vals[pos] * 1.0; 1251 un = unhalt->vals[pos] * 1.0; 1252 } else { 1253 ot = oth->sum * 1.0; 1254 un = unhalt->sum * 1.0; 1255 } 1256 res = (ot * con)/un; 1257 ret = printf("%1.3f", res); 1258 return(ret); 1259 } 1260 1261 static int 1262 otherassistsse(struct counters *cpu, int pos) 1263 { 1264 1265 int ret; 1266 struct counters *oth; 1267 struct counters *unhalt; 1268 double un, ot, con, res; 1269 1270 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1271 con = 75.0; 1272 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1273 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1274 if (pos != -1) { 1275 ot = oth->vals[pos] * 1.0; 1276 un = unhalt->vals[pos] * 1.0; 1277 } else { 1278 ot = oth->sum * 1.0; 1279 un = unhalt->sum * 1.0; 1280 } 1281 res = (ot * con)/un; 1282 ret = printf("%1.3f", res); 1283 return(ret); 1284 } 1285 1286 static int 1287 efficiency1(struct counters *cpu, int pos) 1288 { 1289 1290 int ret; 1291 struct counters *uops; 1292 struct counters *unhalt; 1293 double un, ot, con, res; 1294 1295 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1296 con = 4.0; 1297 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1298 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1299 if (pos != -1) { 1300 ot = uops->vals[pos] * 1.0; 1301 un = unhalt->vals[pos] * 1.0; 1302 } else { 1303 ot = uops->sum * 1.0; 1304 un = unhalt->sum * 1.0; 1305 } 1306 res = ot/(con * un); 1307 ret = printf("%1.3f", res); 1308 return(ret); 1309 } 1310 1311 static int 1312 efficiency2(struct counters *cpu, int pos) 1313 { 1314 1315 int ret; 1316 struct counters *uops; 1317 struct counters *unhalt; 1318 double un, ot, res; 1319 1320 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1321 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1322 uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1323 if (pos != -1) { 1324 ot = uops->vals[pos] * 1.0; 1325 un = unhalt->vals[pos] * 1.0; 1326 } else { 1327 ot = uops->sum * 1.0; 1328 un = unhalt->sum * 1.0; 1329 } 1330 res = un/ot; 1331 ret = printf("%1.3f", res); 1332 return(ret); 1333 } 1334 1335 #define SANDY_BRIDGE_COUNT 20 1336 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1337 /*01*/ { "allocstall1", "thresh > .05", 1338 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1339 allocstall1 }, 1340 /*02*/ { "allocstall2", "thresh > .05", 1341 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", 1342 allocstall2 }, 1343 /*03*/ { "br_miss", "thresh >= .2", 1344 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1345 br_mispredict }, 1346 /*04*/ { "splitload", "thresh >= .1", 1347 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1348 splitload }, 1349 /*05*/ { "splitstore", "thresh >= .01", 1350 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1351 splitstore }, 1352 /*06*/ { "contested", "thresh >= .05", 1353 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1354 contested }, 1355 /*07*/ { "blockstorefwd", "thresh >= .05", 1356 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1357 blockstoreforward }, 1358 /*08*/ { "cache2", "thresh >= .2", 1359 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1360 cache2 }, 1361 /*09*/ { "cache1", "thresh >= .2", 1362 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1363 cache1 }, 1364 /*10*/ { "dtlbmissload", "thresh >= .1", 1365 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1366 dtlb_missload }, 1367 /*11*/ { "dtlbmissstore", "thresh >= .05", 1368 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1369 dtlb_missstore }, 1370 /*12*/ { "frontendstall", "thresh >= .15", 1371 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1372 frontendstall }, 1373 /*13*/ { "clears", "thresh >= .02", 1374 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1375 clears }, 1376 /*14*/ { "microassist", "thresh >= .05", 1377 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1378 microassist }, 1379 /*15*/ { "aliasing_4k", "thresh >= .1", 1380 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1381 aliasing }, 1382 /*16*/ { "fpassist", "look for a excessive value", 1383 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1384 fpassists }, 1385 /*17*/ { "otherassistavx", "look for a excessive value", 1386 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1387 otherassistavx }, 1388 /*18*/ { "otherassistsse", "look for a excessive value", 1389 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1390 otherassistsse }, 1391 /*19*/ { "eff1", "thresh < .9", 1392 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1393 efficiency1 }, 1394 /*20*/ { "eff2", "thresh > 1.0", 1395 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1396 efficiency2 }, 1397 }; 1398 1399 1400 #define IVY_BRIDGE_COUNT 21 1401 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1402 /*1*/ { "eff1", "thresh < .75", 1403 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1404 efficiency1 }, 1405 /*2*/ { "eff2", "thresh > 1.0", 1406 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1407 efficiency2 }, 1408 /*3*/ { "itlbmiss", "thresh > .05", 1409 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1410 itlb_miss }, 1411 /*4*/ { "icachemiss", "thresh > .05", 1412 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1413 icache_miss }, 1414 /*5*/ { "lcpstall", "thresh > .05", 1415 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1416 lcp_stall }, 1417 /*6*/ { "cache1", "thresh >= .2", 1418 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1419 cache1ib }, 1420 /*7*/ { "cache2", "thresh >= .2", 1421 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1422 cache2ib }, 1423 /*8*/ { "contested", "thresh >= .05", 1424 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1425 contested }, 1426 /*9*/ { "datashare", "thresh >= .05", 1427 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1428 datasharing }, 1429 /*10*/ { "blockstorefwd", "thresh >= .05", 1430 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1431 blockstoreforward }, 1432 /*11*/ { "splitload", "thresh >= .1", 1433 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1434 splitloadib }, 1435 /*12*/ { "splitstore", "thresh >= .01", 1436 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1437 splitstore }, 1438 /*13*/ { "aliasing_4k", "thresh >= .1", 1439 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1440 aliasing }, 1441 /*14*/ { "dtlbmissload", "thresh >= .1", 1442 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1443 dtlb_missload }, 1444 /*15*/ { "dtlbmissstore", "thresh >= .05", 1445 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1446 dtlb_missstore }, 1447 /*16*/ { "br_miss", "thresh >= .2", 1448 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1449 br_mispredictib }, 1450 /*17*/ { "clears", "thresh >= .02", 1451 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1452 clears }, 1453 /*18*/ { "microassist", "thresh >= .05", 1454 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1455 microassist }, 1456 /*19*/ { "fpassist", "look for a excessive value", 1457 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1458 fpassists }, 1459 /*20*/ { "otherassistavx", "look for a excessive value", 1460 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1461 otherassistavx }, 1462 /*21*/ { "otherassistsse", "look for a excessive value", 1463 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1464 otherassistsse }, 1465 }; 1466 1467 #define HASWELL_COUNT 20 1468 static struct cpu_entry haswell[HASWELL_COUNT] = { 1469 /*1*/ { "eff1", "thresh < .75", 1470 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1471 efficiency1 }, 1472 /*2*/ { "eff2", "thresh > 1.0", 1473 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1474 efficiency2 }, 1475 /*3*/ { "itlbmiss", "thresh > .05", 1476 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1477 itlb_miss }, 1478 /*4*/ { "icachemiss", "thresh > .05", 1479 "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", 1480 icache_miss_has }, 1481 /*5*/ { "lcpstall", "thresh > .05", 1482 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1483 lcp_stall }, 1484 /*6*/ { "cache1", "thresh >= .2", 1485 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1486 cache1ib }, 1487 /*7*/ { "cache2", "thresh >= .2", 1488 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1489 cache2has }, 1490 /*8*/ { "contested", "thresh >= .05", 1491 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1492 contested_has }, 1493 /*9*/ { "datashare", "thresh >= .05", 1494 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1495 datasharing_has }, 1496 /*10*/ { "blockstorefwd", "thresh >= .05", 1497 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1498 blockstoreforward }, 1499 /*11*/ { "splitload", "thresh >= .1", 1500 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1501 splitload }, 1502 /*12*/ { "splitstore", "thresh >= .01", 1503 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1504 splitstore }, 1505 /*13*/ { "aliasing_4k", "thresh >= .1", 1506 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1507 aliasing }, 1508 /*14*/ { "dtlbmissload", "thresh >= .1", 1509 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1510 dtlb_missload }, 1511 /*15*/ { "br_miss", "thresh >= .2", 1512 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1513 br_mispredict }, 1514 /*16*/ { "clears", "thresh >= .02", 1515 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1516 clears }, 1517 /*17*/ { "microassist", "thresh >= .05", 1518 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1519 microassist }, 1520 /*18*/ { "fpassist", "look for a excessive value", 1521 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1522 fpassists }, 1523 /*19*/ { "otherassistavx", "look for a excessive value", 1524 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1525 otherassistavx }, 1526 /*20*/ { "otherassistsse", "look for a excessive value", 1527 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1528 otherassistsse }, 1529 }; 1530 1531 1532 static void 1533 set_sandybridge(void) 1534 { 1535 strcpy(the_cpu.cputype, "SandyBridge PMC"); 1536 the_cpu.number = SANDY_BRIDGE_COUNT; 1537 the_cpu.ents = sandy_bridge; 1538 the_cpu.explain = explain_name_sb; 1539 } 1540 1541 static void 1542 set_ivybridge(void) 1543 { 1544 strcpy(the_cpu.cputype, "IvyBridge PMC"); 1545 the_cpu.number = IVY_BRIDGE_COUNT; 1546 the_cpu.ents = ivy_bridge; 1547 the_cpu.explain = explain_name_ib; 1548 } 1549 1550 1551 static void 1552 set_haswell(void) 1553 { 1554 strcpy(the_cpu.cputype, "HASWELL PMC"); 1555 the_cpu.number = HASWELL_COUNT; 1556 the_cpu.ents = haswell; 1557 the_cpu.explain = explain_name_has; 1558 } 1559 1560 static void 1561 set_expression(char *name) 1562 { 1563 int found = 0, i; 1564 for(i=0 ; i< the_cpu.number; i++) { 1565 if (strcmp(name, the_cpu.ents[i].name) == 0) { 1566 found = 1; 1567 expression = the_cpu.ents[i].func; 1568 command = the_cpu.ents[i].command; 1569 threshold = the_cpu.ents[i].thresh; 1570 break; 1571 } 1572 } 1573 if (!found) { 1574 printf("For CPU type %s we have no expression:%s\n", 1575 the_cpu.cputype, name); 1576 exit(-1); 1577 } 1578 } 1579 1580 1581 1582 1583 1584 static int 1585 validate_expression(char *name) 1586 { 1587 int i, found; 1588 1589 found = 0; 1590 for(i=0 ; i< the_cpu.number; i++) { 1591 if (strcmp(name, the_cpu.ents[i].name) == 0) { 1592 found = 1; 1593 break; 1594 } 1595 } 1596 if (!found) { 1597 return(-1); 1598 } 1599 return (0); 1600 } 1601 1602 static void 1603 do_expression(struct counters *cpu, int pos) 1604 { 1605 if (expression == NULL) 1606 return; 1607 (*expression)(cpu, pos); 1608 } 1609 1610 static void 1611 process_header(int idx, char *p) 1612 { 1613 struct counters *up; 1614 int i, len, nlen; 1615 /* 1616 * Given header element idx, at p in 1617 * form 's/NN/nameof' 1618 * process the entry to pull out the name and 1619 * the CPU number. 1620 */ 1621 if (strncmp(p, "s/", 2)) { 1622 printf("Check -- invalid header no s/ in %s\n", 1623 p); 1624 return; 1625 } 1626 up = &cnts[idx]; 1627 up->cpu = strtol(&p[2], NULL, 10); 1628 len = strlen(p); 1629 for (i=2; i<len; i++) { 1630 if (p[i] == '/') { 1631 nlen = strlen(&p[(i+1)]); 1632 if (nlen < (MAX_NLEN-1)) { 1633 strcpy(up->counter_name, &p[(i+1)]); 1634 } else { 1635 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 1636 } 1637 } 1638 } 1639 } 1640 1641 static void 1642 build_counters_from_header(FILE *io) 1643 { 1644 char buffer[8192], *p; 1645 int i, len, cnt; 1646 size_t mlen; 1647 1648 /* We have a new start, lets 1649 * setup our headers and cpus. 1650 */ 1651 if (fgets(buffer, sizeof(buffer), io) == NULL) { 1652 printf("First line can't be read from file err:%d\n", errno); 1653 return; 1654 } 1655 /* 1656 * Ok output is an array of counters. Once 1657 * we start to read the values in we must 1658 * put them in there slot to match there CPU and 1659 * counter being updated. We create a mass array 1660 * of the counters, filling in the CPU and 1661 * counter name. 1662 */ 1663 /* How many do we get? */ 1664 len = strlen(buffer); 1665 for (i=0, cnt=0; i<len; i++) { 1666 if (strncmp(&buffer[i], "s/", 2) == 0) { 1667 cnt++; 1668 for(;i<len;i++) { 1669 if (buffer[i] == ' ') 1670 break; 1671 } 1672 } 1673 } 1674 mlen = sizeof(struct counters) * cnt; 1675 cnts = malloc(mlen); 1676 ncnts = cnt; 1677 if (cnts == NULL) { 1678 printf("No memory err:%d\n", errno); 1679 return; 1680 } 1681 memset(cnts, 0, mlen); 1682 for (i=0, cnt=0; i<len; i++) { 1683 if (strncmp(&buffer[i], "s/", 2) == 0) { 1684 p = &buffer[i]; 1685 for(;i<len;i++) { 1686 if (buffer[i] == ' ') { 1687 buffer[i] = 0; 1688 break; 1689 } 1690 } 1691 process_header(cnt, p); 1692 cnt++; 1693 } 1694 } 1695 if (verbose) 1696 printf("We have %d entries\n", cnt); 1697 } 1698 extern int max_to_collect; 1699 int max_to_collect = MAX_COUNTER_SLOTS; 1700 1701 static int 1702 read_a_line(FILE *io) 1703 { 1704 char buffer[8192], *p, *stop; 1705 int pos, i; 1706 1707 if (fgets(buffer, sizeof(buffer), io) == NULL) { 1708 return(0); 1709 } 1710 p = buffer; 1711 for (i=0; i<ncnts; i++) { 1712 pos = cnts[i].pos; 1713 cnts[i].vals[pos] = strtol(p, &stop, 0); 1714 cnts[i].pos++; 1715 cnts[i].sum += cnts[i].vals[pos]; 1716 p = stop; 1717 } 1718 return (1); 1719 } 1720 1721 extern int cpu_count_out; 1722 int cpu_count_out=0; 1723 1724 static void 1725 print_header(void) 1726 { 1727 int i, cnt, printed_cnt; 1728 1729 printf("*********************************\n"); 1730 for(i=0, cnt=0; i<MAX_CPU; i++) { 1731 if (glob_cpu[i]) { 1732 cnt++; 1733 } 1734 } 1735 cpu_count_out = cnt; 1736 for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 1737 if (glob_cpu[i]) { 1738 printf("CPU%d", i); 1739 printed_cnt++; 1740 } 1741 if (printed_cnt == cnt) { 1742 printf("\n"); 1743 break; 1744 } else { 1745 printf("\t"); 1746 } 1747 } 1748 } 1749 1750 static void 1751 lace_cpus_together(void) 1752 { 1753 int i, j, lace_cpu; 1754 struct counters *cpat, *at; 1755 1756 for(i=0; i<ncnts; i++) { 1757 cpat = &cnts[i]; 1758 if (cpat->next_cpu) { 1759 /* Already laced in */ 1760 continue; 1761 } 1762 lace_cpu = cpat->cpu; 1763 if (lace_cpu >= MAX_CPU) { 1764 printf("CPU %d to big\n", lace_cpu); 1765 continue; 1766 } 1767 if (glob_cpu[lace_cpu] == NULL) { 1768 glob_cpu[lace_cpu] = cpat; 1769 } else { 1770 /* Already processed this cpu */ 1771 continue; 1772 } 1773 /* Ok look forward for cpu->cpu and link in */ 1774 for(j=(i+1); j<ncnts; j++) { 1775 at = &cnts[j]; 1776 if (at->next_cpu) { 1777 continue; 1778 } 1779 if (at->cpu == lace_cpu) { 1780 /* Found one */ 1781 cpat->next_cpu = at; 1782 cpat = at; 1783 } 1784 } 1785 } 1786 } 1787 1788 1789 static void 1790 process_file(char *filename) 1791 { 1792 FILE *io; 1793 int i; 1794 int line_at, not_done; 1795 pid_t pid_of_command=0; 1796 1797 if (filename == NULL) { 1798 io = my_popen(command, "r", &pid_of_command); 1799 if (io == NULL) { 1800 printf("Can't popen the command %s\n", command); 1801 return; 1802 } 1803 } else { 1804 io = fopen(filename, "r"); 1805 if (io == NULL) { 1806 printf("Can't process file %s err:%d\n", 1807 filename, errno); 1808 return; 1809 } 1810 } 1811 build_counters_from_header(io); 1812 if (cnts == NULL) { 1813 /* Nothing we can do */ 1814 printf("Nothing to do -- no counters built\n"); 1815 if (filename) { 1816 fclose(io); 1817 } else { 1818 my_pclose(io, pid_of_command); 1819 } 1820 return; 1821 } 1822 lace_cpus_together(); 1823 print_header(); 1824 if (verbose) { 1825 for (i=0; i<ncnts; i++) { 1826 printf("Counter:%s cpu:%d index:%d\n", 1827 cnts[i].counter_name, 1828 cnts[i].cpu, i); 1829 } 1830 } 1831 line_at = 0; 1832 not_done = 1; 1833 while(not_done) { 1834 if (read_a_line(io)) { 1835 line_at++; 1836 } else { 1837 break; 1838 } 1839 if (line_at >= max_to_collect) { 1840 not_done = 0; 1841 } 1842 if (filename == NULL) { 1843 int cnt; 1844 /* For the ones we dynamically open we print now */ 1845 for(i=0, cnt=0; i<MAX_CPU; i++) { 1846 do_expression(glob_cpu[i], (line_at-1)); 1847 cnt++; 1848 if (cnt == cpu_count_out) { 1849 printf("\n"); 1850 break; 1851 } else { 1852 printf("\t"); 1853 } 1854 } 1855 } 1856 } 1857 if (filename) { 1858 fclose(io); 1859 } else { 1860 my_pclose(io, pid_of_command); 1861 } 1862 } 1863 #if defined(__amd64__) 1864 #define cpuid(in,a,b,c,d)\ 1865 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 1866 #else 1867 #define cpuid(in, a, b, c, d) 1868 #endif 1869 1870 static void 1871 get_cpuid_set(void) 1872 { 1873 unsigned long eax, ebx, ecx, edx; 1874 int model; 1875 pid_t pid_of_command=0; 1876 size_t sz, len; 1877 FILE *io; 1878 char linebuf[1024], *str; 1879 1880 eax = ebx = ecx = edx = 0; 1881 1882 cpuid(0, eax, ebx, ecx, edx); 1883 if (ebx == 0x68747541) { 1884 printf("AMD processors are not supported by this program\n"); 1885 printf("Sorry\n"); 1886 exit(0); 1887 } else if (ebx == 0x6972794) { 1888 printf("Cyrix processors are not supported by this program\n"); 1889 printf("Sorry\n"); 1890 exit(0); 1891 } else if (ebx == 0x756e6547) { 1892 printf("Genuine Intel\n"); 1893 } else { 1894 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 1895 exit(0); 1896 } 1897 cpuid(1, eax, ebx, ecx, edx); 1898 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 1899 printf("CPU model is 0x%x id:0x%lx\n", model, eax); 1900 switch (eax & 0xF00) { 1901 case 0x500: /* Pentium family processors */ 1902 printf("Intel Pentium P5\n"); 1903 goto not_supported; 1904 break; 1905 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 1906 switch (model) { 1907 case 0x1: 1908 printf("Intel Pentium P6\n"); 1909 goto not_supported; 1910 break; 1911 case 0x3: 1912 case 0x5: 1913 printf("Intel PII\n"); 1914 goto not_supported; 1915 break; 1916 case 0x6: case 0x16: 1917 printf("Intel CL\n"); 1918 goto not_supported; 1919 break; 1920 case 0x7: case 0x8: case 0xA: case 0xB: 1921 printf("Intel PIII\n"); 1922 goto not_supported; 1923 break; 1924 case 0x9: case 0xD: 1925 printf("Intel PM\n"); 1926 goto not_supported; 1927 break; 1928 case 0xE: 1929 printf("Intel CORE\n"); 1930 goto not_supported; 1931 break; 1932 case 0xF: 1933 printf("Intel CORE2\n"); 1934 goto not_supported; 1935 break; 1936 case 0x17: 1937 printf("Intel CORE2EXTREME\n"); 1938 goto not_supported; 1939 break; 1940 case 0x1C: /* Per Intel document 320047-002. */ 1941 printf("Intel ATOM\n"); 1942 goto not_supported; 1943 break; 1944 case 0x1A: 1945 case 0x1E: /* 1946 * Per Intel document 253669-032 9/2009, 1947 * pages A-2 and A-57 1948 */ 1949 case 0x1F: /* 1950 * Per Intel document 253669-032 9/2009, 1951 * pages A-2 and A-57 1952 */ 1953 printf("Intel COREI7\n"); 1954 goto not_supported; 1955 break; 1956 case 0x2E: 1957 printf("Intel NEHALEM\n"); 1958 goto not_supported; 1959 break; 1960 case 0x25: /* Per Intel document 253669-033US 12/2009. */ 1961 case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 1962 printf("Intel WESTMERE\n"); 1963 goto not_supported; 1964 break; 1965 case 0x2F: /* Westmere-EX, seen in wild */ 1966 printf("Intel WESTMERE\n"); 1967 goto not_supported; 1968 break; 1969 case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 1970 printf("Intel SANDYBRIDGE\n"); 1971 set_sandybridge(); 1972 break; 1973 case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 1974 printf("Intel SANDYBRIDGE_XEON\n"); 1975 set_sandybridge(); 1976 break; 1977 case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 1978 printf("Intel IVYBRIDGE\n"); 1979 set_ivybridge(); 1980 break; 1981 case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 1982 printf("Intel IVYBRIDGE_XEON\n"); 1983 set_ivybridge(); 1984 break; 1985 case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 1986 printf("Intel HASWELL (Xeon)\n"); 1987 set_haswell(); 1988 break; 1989 case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 1990 case 0x45: 1991 case 0x46: 1992 printf("Intel HASWELL\n"); 1993 set_haswell(); 1994 break; 1995 case 0x4D: 1996 /* Per Intel document 330061-001 01/2014. */ 1997 printf("Intel ATOM_SILVERMONT\n"); 1998 goto not_supported; 1999 break; 2000 default: 2001 printf("Intel model 0x%x is not known -- sorry\n", 2002 model); 2003 goto not_supported; 2004 break; 2005 } 2006 break; 2007 case 0xF00: /* P4 */ 2008 printf("Intel unknown model %d\n", model); 2009 goto not_supported; 2010 break; 2011 } 2012 /* Ok lets load the list of all known PMC's */ 2013 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2014 if (valid_pmcs == NULL) { 2015 /* Likely */ 2016 pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2017 sz = sizeof(char *) * pmc_allocated_cnt; 2018 valid_pmcs = malloc(sz); 2019 if (valid_pmcs == NULL) { 2020 printf("No memory allocation fails at startup?\n"); 2021 exit(-1); 2022 } 2023 memset(valid_pmcs, 0, sz); 2024 } 2025 2026 while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2027 if (linebuf[0] != '\t') { 2028 /* sometimes headers ;-) */ 2029 continue; 2030 } 2031 len = strlen(linebuf); 2032 if (linebuf[(len-1)] == '\n') { 2033 /* Likely */ 2034 linebuf[(len-1)] = 0; 2035 } 2036 str = &linebuf[1]; 2037 len = strlen(str) + 1; 2038 valid_pmcs[valid_pmc_cnt] = malloc(len); 2039 if (valid_pmcs[valid_pmc_cnt] == NULL) { 2040 printf("No memory2 allocation fails at startup?\n"); 2041 exit(-1); 2042 } 2043 memset(valid_pmcs[valid_pmc_cnt], 0, len); 2044 strcpy(valid_pmcs[valid_pmc_cnt], str); 2045 valid_pmc_cnt++; 2046 if (valid_pmc_cnt >= pmc_allocated_cnt) { 2047 /* Got to expand -- unlikely */ 2048 char **more; 2049 2050 sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2051 more = malloc(sz); 2052 if (more == NULL) { 2053 printf("No memory3 allocation fails at startup?\n"); 2054 exit(-1); 2055 } 2056 memset(more, 0, sz); 2057 memcpy(more, valid_pmcs, sz); 2058 pmc_allocated_cnt *= 2; 2059 free(valid_pmcs); 2060 valid_pmcs = more; 2061 } 2062 } 2063 my_pclose(io, pid_of_command); 2064 return; 2065 not_supported: 2066 printf("Not supported\n"); 2067 exit(-1); 2068 } 2069 2070 static void 2071 explain_all(void) 2072 { 2073 int i; 2074 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2075 printf("-------------------------------------------------------------\n"); 2076 for(i=0; i<the_cpu.number; i++){ 2077 printf("For -e %s ", the_cpu.ents[i].name); 2078 (*the_cpu.explain)(the_cpu.ents[i].name); 2079 printf("----------------------------\n"); 2080 } 2081 } 2082 2083 static void 2084 test_for_a_pmc(const char *pmc, int out_so_far) 2085 { 2086 FILE *io; 2087 pid_t pid_of_command=0; 2088 char my_command[1024]; 2089 char line[1024]; 2090 char resp[1024]; 2091 int len, llen, i; 2092 2093 if (out_so_far < 50) { 2094 len = 50 - out_so_far; 2095 for(i=0; i<len; i++) { 2096 printf(" "); 2097 } 2098 } 2099 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2100 io = my_popen(my_command, "r", &pid_of_command); 2101 if (io == NULL) { 2102 printf("Failed -- popen fails\n"); 2103 return; 2104 } 2105 /* Setup what we expect */ 2106 len = sprintf(resp, "%s", pmc); 2107 if (fgets(line, sizeof(line), io) == NULL) { 2108 printf("Failed -- no output from pmstat\n"); 2109 goto out; 2110 } 2111 llen = strlen(line); 2112 if (line[(llen-1)] == '\n') { 2113 line[(llen-1)] = 0; 2114 llen--; 2115 } 2116 for(i=2; i<(llen-len); i++) { 2117 if (strncmp(&line[i], "ERROR", 5) == 0) { 2118 printf("Failed %s\n", line); 2119 goto out; 2120 } else if (strncmp(&line[i], resp, len) == 0) { 2121 int j, k; 2122 2123 if (fgets(line, sizeof(line), io) == NULL) { 2124 printf("Failed -- no second output from pmstat\n"); 2125 goto out; 2126 } 2127 len = strlen(line); 2128 for (j=0; j<len; j++) { 2129 if (line[j] == ' ') { 2130 j++; 2131 } else { 2132 break; 2133 } 2134 } 2135 printf("Pass"); 2136 len = strlen(&line[j]); 2137 if (len < 20) { 2138 for(k=0; k<(20-len); k++) { 2139 printf(" "); 2140 } 2141 } 2142 if (len) { 2143 printf("%s", &line[j]); 2144 } else { 2145 printf("\n"); 2146 } 2147 goto out; 2148 } 2149 } 2150 printf("Failed -- '%s' not '%s'\n", line, resp); 2151 out: 2152 my_pclose(io, pid_of_command); 2153 2154 } 2155 2156 static int 2157 add_it_to(char **vars, int cur_cnt, char *name) 2158 { 2159 int i; 2160 size_t len; 2161 for(i=0; i<cur_cnt; i++) { 2162 if (strcmp(vars[i], name) == 0) { 2163 /* Already have */ 2164 return(0); 2165 } 2166 } 2167 if (vars[cur_cnt] != NULL) { 2168 printf("Cur_cnt:%d filled with %s??\n", 2169 cur_cnt, vars[cur_cnt]); 2170 exit(-1); 2171 } 2172 /* Ok its new */ 2173 len = strlen(name) + 1; 2174 vars[cur_cnt] = malloc(len); 2175 if (vars[cur_cnt] == NULL) { 2176 printf("No memory %s\n", __FUNCTION__); 2177 exit(-1); 2178 } 2179 memset(vars[cur_cnt], 0, len); 2180 strcpy(vars[cur_cnt], name); 2181 return(1); 2182 } 2183 2184 static char * 2185 build_command_for_exp(struct expression *exp) 2186 { 2187 /* 2188 * Build the pmcstat command to handle 2189 * the passed in expression. 2190 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2191 * where NNN and QQQ represent the PMC's in the expression 2192 * uniquely.. 2193 */ 2194 char forming[1024]; 2195 int cnt_pmc, alloced_pmcs, i; 2196 struct expression *at; 2197 char **vars, *cmd; 2198 size_t mal; 2199 2200 alloced_pmcs = cnt_pmc = 0; 2201 /* first how many do we have */ 2202 at = exp; 2203 while (at) { 2204 if (at->type == TYPE_VALUE_PMC) { 2205 cnt_pmc++; 2206 } 2207 at = at->next; 2208 } 2209 if (cnt_pmc == 0) { 2210 printf("No PMC's in your expression -- nothing to do!!\n"); 2211 exit(0); 2212 } 2213 mal = cnt_pmc * sizeof(char *); 2214 vars = malloc(mal); 2215 if (vars == NULL) { 2216 printf("No memory\n"); 2217 exit(-1); 2218 } 2219 memset(vars, 0, mal); 2220 at = exp; 2221 while (at) { 2222 if (at->type == TYPE_VALUE_PMC) { 2223 if(add_it_to(vars, alloced_pmcs, at->name)) { 2224 alloced_pmcs++; 2225 } 2226 } 2227 at = at->next; 2228 } 2229 /* Now we have a unique list in vars so create our command */ 2230 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2231 for(i=0; i<alloced_pmcs; i++) { 2232 mal += strlen(vars[i]) + 4; /* var + " -s " */ 2233 } 2234 cmd = malloc((mal+2)); 2235 if (cmd == NULL) { 2236 printf("%s out of mem\n", __FUNCTION__); 2237 exit(-1); 2238 } 2239 memset(cmd, 0, (mal+2)); 2240 strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2241 at = exp; 2242 for(i=0; i<alloced_pmcs; i++) { 2243 sprintf(forming, " -s %s", vars[i]); 2244 strcat(cmd, forming); 2245 free(vars[i]); 2246 vars[i] = NULL; 2247 } 2248 free(vars); 2249 return(cmd); 2250 } 2251 2252 static int 2253 user_expr(struct counters *cpu, int pos) 2254 { 2255 int ret; 2256 double res; 2257 struct counters *var; 2258 struct expression *at; 2259 2260 at = master_exp; 2261 while (at) { 2262 if (at->type == TYPE_VALUE_PMC) { 2263 var = find_counter(cpu, at->name); 2264 if (var == NULL) { 2265 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2266 exit(-1); 2267 } 2268 if (pos != -1) { 2269 at->value = var->vals[pos] * 1.0; 2270 } else { 2271 at->value = var->sum * 1.0; 2272 } 2273 } 2274 at = at->next; 2275 } 2276 res = run_expr(master_exp, 1, NULL); 2277 ret = printf("%1.3f", res); 2278 return(ret); 2279 } 2280 2281 2282 static void 2283 set_manual_exp(struct expression *exp) 2284 { 2285 expression = user_expr; 2286 command = build_command_for_exp(exp); 2287 threshold = "User defined threshold"; 2288 } 2289 2290 static void 2291 run_tests(void) 2292 { 2293 int i, lenout; 2294 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2295 printf("------------------------------------------------------------------------\n"); 2296 for(i=0; i<valid_pmc_cnt; i++) { 2297 lenout = printf("%s", valid_pmcs[i]); 2298 fflush(stdout); 2299 test_for_a_pmc(valid_pmcs[i], lenout); 2300 } 2301 } 2302 static void 2303 list_all(void) 2304 { 2305 int i, cnt, j; 2306 printf("PMC Abbreviation\n"); 2307 printf("--------------------------------------------------------------\n"); 2308 for(i=0; i<valid_pmc_cnt; i++) { 2309 cnt = printf("%s", valid_pmcs[i]); 2310 for(j=cnt; j<52; j++) { 2311 printf(" "); 2312 } 2313 printf("%%%d\n", i); 2314 } 2315 } 2316 2317 2318 int 2319 main(int argc, char **argv) 2320 { 2321 int i, j, cnt; 2322 char *filename=NULL; 2323 char *name=NULL; 2324 int help_only = 0; 2325 int test_mode = 0; 2326 2327 get_cpuid_set(); 2328 memset(glob_cpu, 0, sizeof(glob_cpu)); 2329 while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) { 2330 switch (i) { 2331 case 'L': 2332 list_all(); 2333 return(0); 2334 case 'H': 2335 printf("**********************************\n"); 2336 explain_all(); 2337 printf("**********************************\n"); 2338 return(0); 2339 break; 2340 case 'T': 2341 test_mode = 1; 2342 break; 2343 case 'E': 2344 master_exp = parse_expression(optarg); 2345 if (master_exp) { 2346 set_manual_exp(master_exp); 2347 } 2348 break; 2349 case 'e': 2350 if (validate_expression(optarg)) { 2351 printf("Unknown expression %s\n", optarg); 2352 return(0); 2353 } 2354 name = optarg; 2355 set_expression(optarg); 2356 break; 2357 case 'm': 2358 max_to_collect = strtol(optarg, NULL, 0); 2359 if (max_to_collect > MAX_COUNTER_SLOTS) { 2360 /* You can't collect more than max in array */ 2361 max_to_collect = MAX_COUNTER_SLOTS; 2362 } 2363 break; 2364 case 'v': 2365 verbose++; 2366 break; 2367 case 'h': 2368 help_only = 1; 2369 break; 2370 case 'i': 2371 filename = optarg; 2372 break; 2373 case '?': 2374 default: 2375 use: 2376 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2377 argv[0]); 2378 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2379 printf("-v -- verbose dump debug type things -- you don't want this\n"); 2380 printf("-m N -- maximum to collect is N measurments\n"); 2381 printf("-e expr-name -- Do expression expr-name\n"); 2382 printf("-E 'your expression' -- Do your expression\n"); 2383 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2384 printf("-H -- Don't run anything, just explain all canned expressions\n"); 2385 printf("-T -- Test all PMC's defined by this processor\n"); 2386 return(0); 2387 break; 2388 }; 2389 } 2390 if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) { 2391 printf("Without setting an expression we cannot dynamically gather information\n"); 2392 printf("you must supply a filename (and you probably want verbosity)\n"); 2393 goto use; 2394 } 2395 if (test_mode) { 2396 run_tests(); 2397 return(0); 2398 } 2399 printf("*********************************\n"); 2400 if (master_exp == NULL) { 2401 (*the_cpu.explain)(name); 2402 } else { 2403 printf("Examine your expression "); 2404 print_exp(master_exp); 2405 printf("User defined threshold\n"); 2406 } 2407 if (help_only) { 2408 return(0); 2409 } 2410 process_file(filename); 2411 if (verbose >= 2) { 2412 for (i=0; i<ncnts; i++) { 2413 printf("Counter:%s cpu:%d index:%d\n", 2414 cnts[i].counter_name, 2415 cnts[i].cpu, i); 2416 for(j=0; j<cnts[i].pos; j++) { 2417 printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2418 } 2419 printf(" sum - %ld\n", (long int)cnts[i].sum); 2420 } 2421 } 2422 if (expression == NULL) { 2423 return(0); 2424 } 2425 for(i=0, cnt=0; i<MAX_CPU; i++) { 2426 if (glob_cpu[i]) { 2427 do_expression(glob_cpu[i], -1); 2428 cnt++; 2429 if (cnt == cpu_count_out) { 2430 printf("\n"); 2431 break; 2432 } else { 2433 printf("\t"); 2434 } 2435 } 2436 } 2437 return(0); 2438 } 2439