/*-
 * Copyright (c) 2014, 2015 Netflix Inc.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer,
 *    in this position and unchanged.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <sys/errno.h>
#include <signal.h>
#include <sys/wait.h>
#include <getopt.h>
#include "eval_expr.h"
__FBSDID("$FreeBSD$");

static int max_pmc_counters = 1;
static int run_all = 0;

#define MAX_COUNTER_SLOTS 1024
#define MAX_NLEN 64
#define MAX_CPU 64
static int verbose = 0;

extern char **environ;
extern struct expression *master_exp;
struct expression *master_exp=NULL;

#define PMC_INITIAL_ALLOC 512
extern char **valid_pmcs;
char **valid_pmcs = NULL;
extern int valid_pmc_cnt;
int valid_pmc_cnt=0;
extern int pmc_allocated_cnt;
int pmc_allocated_cnt=0;

/*
 * The following two varients on popen and pclose with
 * the cavet that they get you the PID so that you
 * can supply it to pclose so it can send a SIGTERM 
 *  to the process.
 */
static FILE *
my_popen(const char *command, const char *dir, pid_t *p_pid)
{
	FILE *io_out, *io_in;
	int pdesin[2], pdesout[2];
	char *argv[4];
	pid_t pid;
	char cmd[4];
	char cmd2[1024];
	char arg1[4];

	if ((strcmp(dir, "r") != 0) &&
	    (strcmp(dir, "w") != 0)) {
		errno = EINVAL;
		return(NULL);
	}
	if (pipe(pdesin) < 0)
		return (NULL);

	if (pipe(pdesout) < 0) {
		(void)close(pdesin[0]);
		(void)close(pdesin[1]);
		return (NULL);
	}
	strcpy(cmd, "sh");
	strcpy(arg1, "-c");
	strcpy(cmd2, command);
	argv[0] = cmd;
	argv[1] = arg1;
	argv[2] = cmd2;
	argv[3] = NULL;

	switch (pid = fork()) {
	case -1:			/* Error. */
		(void)close(pdesin[0]);
		(void)close(pdesin[1]);
		(void)close(pdesout[0]);
		(void)close(pdesout[1]);
		return (NULL);
		/* NOTREACHED */
	case 0:				/* Child. */
		/* Close out un-used sides */
		(void)close(pdesin[1]);
		(void)close(pdesout[0]);
		/* Now prepare the stdin of the process */
		close(0);
		(void)dup(pdesin[0]);
		(void)close(pdesin[0]);
		/* Now prepare the stdout of the process */
		close(1);
		(void)dup(pdesout[1]);
		/* And lets do stderr just in case */
		close(2);
		(void)dup(pdesout[1]);
		(void)close(pdesout[1]);
		/* Now run it */
		execve("/bin/sh", argv, environ);
		exit(127);
		/* NOTREACHED */
	}
	/* Parent; assume fdopen can't fail. */
	/* Store the pid */
	*p_pid = pid;
	if (strcmp(dir, "r") != 0) {
		io_out = fdopen(pdesin[1], "w");
		(void)close(pdesin[0]);
		(void)close(pdesout[0]);
		(void)close(pdesout[1]);
		return(io_out);
 	} else {
		/* Prepare the input stream */
		io_in = fdopen(pdesout[0], "r");
		(void)close(pdesout[1]);
		(void)close(pdesin[0]);
		(void)close(pdesin[1]);
		return (io_in);
	}
}

/*
 * pclose --
 *	Pclose returns -1 if stream is not associated with a `popened' command,
 *	if already `pclosed', or waitpid returns an error.
 */
static void
my_pclose(FILE *io, pid_t the_pid)
{
	int pstat;
	pid_t pid;

	/*
	 * Find the appropriate file pointer and remove it from the list.
	 */
	(void)fclose(io);
	/* Die if you are not dead! */
	kill(the_pid, SIGTERM);
	do {
		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
	} while (pid == -1 && errno == EINTR);
}

struct counters {
	struct counters *next_cpu;
	char counter_name[MAX_NLEN];		/* Name of counter */
	int cpu;				/* CPU we are on */
	int pos;				/* Index we are filling to. */
	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
	uint64_t sum;				/* Summary of entries */
};

extern struct counters *glob_cpu[MAX_CPU];
struct counters *glob_cpu[MAX_CPU];

extern struct counters *cnts;
struct counters *cnts=NULL;

extern int ncnts;
int ncnts=0;

extern int (*expression)(struct counters *, int);
int (*expression)(struct counters *, int);

static const char *threshold=NULL;
static const char *command;

struct cpu_entry {
	const char *name;
	const char *thresh;
	const char *command;
	int (*func)(struct counters *, int);
	int counters_required;
};

struct cpu_type {
	char cputype[32];
	int number;
	struct cpu_entry *ents;
	void (*explain)(const char *name);
};
extern struct cpu_type the_cpu;
struct cpu_type the_cpu;

static void
explain_name_sb(const char *name)
{
	const char *mythresh;
	if (strcmp(name, "allocstall1") == 0) {
		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "allocstall2") == 0) {
		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "br_miss") == 0) {
		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "splitload") == 0) {
		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "splitstore") == 0) {
		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
		mythresh = "thresh >= .01";
	} else if (strcmp(name, "contested") == 0) {
		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "blockstorefwd") == 0) {
		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "cache2") == 0) {
		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "cache1") == 0) {
		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "dtlbmissload") == 0) {
		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "frontendstall") == 0) {
		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
		mythresh = "thresh >= .15";
	} else if (strcmp(name, "clears") == 0) {
		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
		printf("          MACHINE_CLEARS.SMC + \n");
		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .02";
	} else if (strcmp(name, "microassist") == 0) {
		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "aliasing_4k") == 0) {
		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "fpassist") == 0) {
		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistavx") == 0) {
		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistsse") == 0) {
		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "eff1") == 0) {
		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh < .9";
	} else if (strcmp(name, "eff2") == 0) {
		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
		mythresh = "thresh > 1.0";
	} else if (strcmp(name, "dtlbmissstore") == 0) {
		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .05";
	} else {
		printf("Unknown name:%s\n", name);
		mythresh = "unknown entry";
        }
	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}

static void
explain_name_ib(const char *name)
{
	const char *mythresh;
	if (strcmp(name, "br_miss") == 0) {
		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "eff1") == 0) {
		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh < .9";
	} else if (strcmp(name, "eff2") == 0) {
		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
		mythresh = "thresh > 1.0";
	} else if (strcmp(name, "cache1") == 0) {
		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "cache2") == 0) {
		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "itlbmiss") == 0) {
		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05"; 
	} else if (strcmp(name, "icachemiss") == 0) {
		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "lcpstall") == 0) {
		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "datashare") == 0) {
		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "blockstorefwd") == 0) {
		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "splitload") == 0) {
		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "splitstore") == 0) {
		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
		mythresh = "thresh >= .01";
	} else if (strcmp(name, "aliasing_4k") == 0) {
		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "dtlbmissload") == 0) {
		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "dtlbmissstore") == 0) {
		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "contested") == 0) {
		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "clears") == 0) {
		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
		printf("          MACHINE_CLEARS.SMC + \n");
		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .02";
	} else if (strcmp(name, "microassist") == 0) {
		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "fpassist") == 0) {
		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistavx") == 0) {
		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistsse") == 0) {
		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else {
		printf("Unknown name:%s\n", name);
		mythresh = "unknown entry";
        }
	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}


static void
explain_name_has(const char *name)
{
	const char *mythresh;
	if (strcmp(name, "eff1") == 0) {
		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh < .75";
	} else if (strcmp(name, "eff2") == 0) {
		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
		mythresh = "thresh > 1.0";
	} else if (strcmp(name, "itlbmiss") == 0) {
		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05"; 
	} else if (strcmp(name, "icachemiss") == 0) {
		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "lcpstall") == 0) {
		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "cache1") == 0) {
		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "cache2") == 0) {
		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "contested") == 0) {
		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "datashare") == 0) {
		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "blockstorefwd") == 0) {
		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "splitload") == 0) {
		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "splitstore") == 0) {
		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
		mythresh = "thresh >= .01";
	} else if (strcmp(name, "aliasing_4k") == 0) {
		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "dtlbmissload") == 0) {
		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "br_miss") == 0) {
		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "clears") == 0) {
		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
		printf("          MACHINE_CLEARS.SMC + \n");
		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .02";
	} else if (strcmp(name, "microassist") == 0) {
		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "fpassist") == 0) {
		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistavx") == 0) {
		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistsse") == 0) {
		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else {
		printf("Unknown name:%s\n", name);
		mythresh = "unknown entry";
        }
	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}



static struct counters *
find_counter(struct counters *base, const char *name)
{
	struct counters *at;
	int len;

	at = base;
	len = strlen(name);
	while(at) {
		if (strncmp(at->counter_name, name, len) == 0) {
			return(at);
		}
		at = at->next_cpu;
	}
	printf("Can't find counter %s\n", name);
	printf("We have:\n");
	at = base;
	while(at) {
		printf("- %s\n", at->counter_name);
		at = at->next_cpu;
	}
	exit(-1);
}

static int
allocstall1(struct counters *cpu, int pos)
{
/*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
	int ret;
	struct counters *partial;
	struct counters *unhalt;
	double un, par, res;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
	if (pos != -1) {
		par = partial->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		par = partial->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = par/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
allocstall2(struct counters *cpu, int pos)
{
/*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
	int ret;
	struct counters *partial;
	struct counters *unhalt;
	double un, par, res;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
	if (pos != -1) {
		par = partial->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		par = partial->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = par/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
br_mispredict(struct counters *cpu, int pos)
{
	struct counters *brctr;
	struct counters *unhalt;
	int ret;
/*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
	double br, un, con, res;
	con = 20.0;
	
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
	if (pos != -1) {
		br = brctr->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		br = brctr->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (con * br)/un;
 	ret = printf("%1.3f", res);
	return(ret);
}

static int
br_mispredictib(struct counters *cpu, int pos)
{
	struct counters *brctr;
	struct counters *unhalt;
	struct counters *clear, *clear2, *clear3;
	struct counters *uops;
	struct counters *recv;	
	struct counters *iss;
/*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
	int ret;
        /*  
	 * (BR_MISP_RETIRED.ALL_BRANCHES / 
	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
	 *          MACHINE_CLEAR.COUNT) * 
	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
	 *
	 */
	double br, cl, cl2, cl3, uo, re, un, con, res, is;
	con = 4.0;
	
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
	if (pos != -1) {
		br = brctr->vals[pos] * 1.0;
		cl = clear->vals[pos] * 1.0;
		cl2 = clear2->vals[pos] * 1.0;
		cl3 = clear3->vals[pos] * 1.0;
		uo = uops->vals[pos] * 1.0;
		re = recv->vals[pos] * 1.0;
		is = iss->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		br = brctr->sum * 1.0;
		cl = clear->sum * 1.0;
		cl2 = clear2->sum * 1.0;
		cl3 = clear3->sum * 1.0;
		uo = uops->sum * 1.0;
		re = recv->sum * 1.0;
		is = iss->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
 	ret = printf("%1.3f", res);
	return(ret);
}


static int
br_mispredict_broad(struct counters *cpu, int pos)
{
	struct counters *brctr;
	struct counters *unhalt;
	struct counters *clear;
	struct counters *uops;
	struct counters *uops_ret;
	struct counters *recv;
	int ret;
	double br, cl, uo, uo_r, re, con, un, res;

	con = 4.0;
	
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");

	if (pos != -1) {
		un = unhalt->vals[pos] * 1.0;
		br = brctr->vals[pos] * 1.0;
		cl = clear->vals[pos] * 1.0;
		uo = uops->vals[pos] * 1.0;
		uo_r = uops_ret->vals[pos] * 1.0;
		re = recv->vals[pos] * 1.0;
	} else {
		un = unhalt->sum * 1.0;
		br = brctr->sum * 1.0;
		cl = clear->sum * 1.0;
		uo = uops->sum * 1.0;
		uo_r = uops_ret->sum * 1.0;
		re = recv->sum * 1.0;
	}
	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
 	ret = printf("%1.3f", res);
	return(ret);
}

static int
splitloadib(struct counters *cpu, int pos)
{
	int ret;
	struct counters *mem;
	struct counters *l1d, *ldblock;
	struct counters *unhalt;
	double un, memd, res, l1, ldb;
        /*  
	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
	 */

	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
	if (pos != -1) {
		memd = mem->vals[pos] * 1.0;
		l1 = l1d->vals[pos] * 1.0;
		ldb = ldblock->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		memd = mem->sum * 1.0;
		l1 = l1d->sum * 1.0;
		ldb = ldblock->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ((l1 / memd) * ldb)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
splitload(struct counters *cpu, int pos)
{
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, memd, res;
/*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/

	con = 5.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
	if (pos != -1) {
		memd = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		memd = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (memd * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
splitload_sb(struct counters *cpu, int pos)
{
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, memd, res;
/*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/

	con = 5.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
	if (pos != -1) {
		memd = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		memd = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (memd * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
splitstore_sb(struct counters *cpu, int pos)
{
        /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
	int ret;
	struct counters *mem_split;
	struct counters *mem_stores;
	double memsplit, memstore, res;
	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
	if (pos != -1) {
		memsplit = mem_split->vals[pos] * 1.0;
		memstore = mem_stores->vals[pos] * 1.0;
	} else {
		memsplit = mem_split->sum * 1.0;
		memstore = mem_stores->sum * 1.0;
	}
	res = memsplit/memstore;
	ret = printf("%1.3f", res);
	return(ret);
}



static int
splitstore(struct counters *cpu, int pos)
{
        /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
	int ret;
	struct counters *mem_split;
	struct counters *mem_stores;
	double memsplit, memstore, res;
	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
	if (pos != -1) {
		memsplit = mem_split->vals[pos] * 1.0;
		memstore = mem_stores->vals[pos] * 1.0;
	} else {
		memsplit = mem_split->sum * 1.0;
		memstore = mem_stores->sum * 1.0;
	}
	res = memsplit/memstore;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
contested(struct counters *cpu, int pos)
{
        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, memd, res;

	con = 60.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
	if (pos != -1) {
		memd = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		memd = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (memd * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
contested_has(struct counters *cpu, int pos)
{
        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, memd, res;

	con = 84.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
	if (pos != -1) {
		memd = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		memd = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (memd * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
contestedbroad(struct counters *cpu, int pos)
{
        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
	int ret;
	struct counters *mem;
	struct counters *mem2;
	struct counters *unhalt;
	double con, un, memd, memtoo, res;

	con = 84.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");

	if (pos != -1) {
		memd = mem->vals[pos] * 1.0;
		memtoo = mem2->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		memd = mem->sum * 1.0;
		memtoo = mem2->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ((memd * con) + memtoo)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
blockstoreforward(struct counters *cpu, int pos)
{
        /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
	int ret;
	struct counters *ldb;
	struct counters *unhalt;
	double con, un, ld, res;

	con = 13.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
	if (pos != -1) {
		ld = ldb->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		ld = ldb->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (ld * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
cache2(struct counters *cpu, int pos)
{
	/* ** Suspect ***
	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
	 */
	int ret;
	struct counters *mem1, *mem2, *mem3;
	struct counters *unhalt;
	double con1, con2, con3, un, me_1, me_2, me_3, res;

	con1 = 26.0;
	con2 = 43.0;
	con3 = 60.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
	if (pos != -1) {
		me_1 = mem1->vals[pos] * 1.0;
		me_2 = mem2->vals[pos] * 1.0;
		me_3 = mem3->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me_1 = mem1->sum * 1.0;
		me_2 = mem2->sum * 1.0;
		me_3 = mem3->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
datasharing(struct counters *cpu, int pos)
{
	/* 
	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
	 */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, res, me, un;

	con = 43.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (me * con)/un;
	ret = printf("%1.3f", res);
	return(ret);

}


static int
datasharing_has(struct counters *cpu, int pos)
{
	/* 
	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
	 */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, res, me, un;

	con = 72.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (me * con)/un;
	ret = printf("%1.3f", res);
	return(ret);

}


static int
cache2ib(struct counters *cpu, int pos)
{
        /*
	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
	 */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, me, res;

	con = 29.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (con * me)/un; 
	ret = printf("%1.3f", res);
	return(ret);
}

static int
cache2has(struct counters *cpu, int pos)
{
	/*
	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
	 *           / CPU_CLK_UNHALTED.THREAD_P
	 */
	int ret;
	struct counters *mem1, *mem2, *mem3;
	struct counters *unhalt;
	double con1, con2, con3, un, me1, me2, me3, res;

	con1 = 36.0;
	con2 = 72.0;
	con3 = 84.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
	if (pos != -1) {
		me1 = mem1->vals[pos] * 1.0;
		me2 = mem2->vals[pos] * 1.0;
		me3 = mem3->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me1 = mem1->sum * 1.0;
		me2 = mem2->sum * 1.0;
		me3 = mem3->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
cache2broad(struct counters *cpu, int pos)
{
        /*
	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
	 */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, me, res;

	con = 36.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (con * me)/un; 
	ret = printf("%1.3f", res);
	return(ret);
}


static int
cache1(struct counters *cpu, int pos)
{
	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, me, res;

	con = 180.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (me * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
cache1ib(struct counters *cpu, int pos)
{
	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, me, res;

	con = 180.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (me * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
cache1broad(struct counters *cpu, int pos)
{
	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
	int ret;
	struct counters *mem;
	struct counters *unhalt;
	double con, un, me, res;

	con = 180.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
	if (pos != -1) {
		me = mem->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		me = mem->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (me * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
dtlb_missload(struct counters *cpu, int pos)
{
	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
	int ret;
	struct counters *dtlb_m, *dtlb_d;
	struct counters *unhalt;
	double con, un, d1, d2, res;

	con = 7.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
	if (pos != -1) {
		d1 = dtlb_m->vals[pos] * 1.0;
		d2 = dtlb_d->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		d1 = dtlb_m->sum * 1.0;
		d2 = dtlb_d->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ((d1 * con) + d2)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
dtlb_missstore(struct counters *cpu, int pos)
{
        /* 
	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 
	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 
	 */
        int ret;
        struct counters *dtsb_m, *dtsb_d;
        struct counters *unhalt;
        double con, un, d1, d2, res;

        con = 7.0;
        unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
        dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
        dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
        if (pos != -1) {
                d1 = dtsb_m->vals[pos] * 1.0;
                d2 = dtsb_d->vals[pos] * 1.0;
                un = unhalt->vals[pos] * 1.0;
        } else {
                d1 = dtsb_m->sum * 1.0;
                d2 = dtsb_d->sum * 1.0;
                un = unhalt->sum * 1.0;
        }
        res = ((d1 * con) + d2)/un;
        ret = printf("%1.3f", res);
        return(ret);
}

static int
itlb_miss(struct counters *cpu, int pos)
{
	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
	int ret;
	struct counters *itlb;
	struct counters *unhalt;
	double un, d1, res;

	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
	if (pos != -1) {
		d1 = itlb->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		d1 = itlb->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = d1/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
itlb_miss_broad(struct counters *cpu, int pos)
{
	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
	int ret;
	struct counters *itlb;
	struct counters *unhalt;
	struct counters *four_k;
	double un, d1, res, k;

	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
	if (pos != -1) {
		d1 = itlb->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
		k = four_k->vals[pos] * 1.0;
	} else {
		d1 = itlb->sum * 1.0;
		un = unhalt->sum * 1.0;
		k = four_k->sum * 1.0;
	}
	res = (7.0 * k + d1)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
icache_miss(struct counters *cpu, int pos)
{
	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */

	int ret;
	struct counters *itlb, *icache;
	struct counters *unhalt;
	double un, d1, ic, res;

	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
	if (pos != -1) {
		d1 = itlb->vals[pos] * 1.0;
		ic = icache->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		d1 = itlb->sum * 1.0;
		ic = icache->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (ic-d1)/un;
	ret = printf("%1.3f", res);
	return(ret);

}

static int
icache_miss_has(struct counters *cpu, int pos)
{
	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */

	int ret;
	struct counters *icache;
	struct counters *unhalt;
	double un, con, ic, res;

	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	icache = find_counter(cpu, "ICACHE.MISSES");
	con = 36.0;
	if (pos != -1) {
		ic = icache->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		ic = icache->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (con * ic)/un;
	ret = printf("%1.3f", res);
	return(ret);

}

static int
lcp_stall(struct counters *cpu, int pos)
{
         /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
	int ret;
	struct counters *ild;
	struct counters *unhalt;
	double un, d1, res;

	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	ild = find_counter(cpu, "ILD_STALL.LCP");
	if (pos != -1) {
		d1 = ild->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		d1 = ild->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = d1/un;
	ret = printf("%1.3f", res);
	return(ret);

}


static int
frontendstall(struct counters *cpu, int pos)
{
      /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
	int ret;
	struct counters *idq;
	struct counters *unhalt;
	double con, un, id, res;

	con = 4.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
	if (pos != -1) {
		id = idq->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		id = idq->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = id/(un * con);
	ret = printf("%1.3f", res);
	return(ret);
}

static int
clears(struct counters *cpu, int pos)
{
	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )  
	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
	
	int ret;
	struct counters *clr1, *clr2, *clr3;
	struct counters *unhalt;
	double con, un, cl1, cl2, cl3, res;

	con = 100.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
	
	if (pos != -1) {
		cl1 = clr1->vals[pos] * 1.0;
		cl2 = clr2->vals[pos] * 1.0;
		cl3 = clr3->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		cl1 = clr1->sum * 1.0;
		cl2 = clr2->sum * 1.0;
		cl3 = clr3->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ((cl1 + cl2 + cl3) * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}



static int
clears_broad(struct counters *cpu, int pos)
{
	int ret;
	struct counters *clr1, *clr2, *clr3, *cyc;
	struct counters *unhalt;
	double con, un, cl1, cl2, cl3, cy, res;

	con = 100.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
	if (pos != -1) {
		cl1 = clr1->vals[pos] * 1.0;
		cl2 = clr2->vals[pos] * 1.0;
		cl3 = clr3->vals[pos] * 1.0;
		cy = cyc->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		cl1 = clr1->sum * 1.0;
		cl2 = clr2->sum * 1.0;
		cl3 = clr3->sum * 1.0;
		cy = cyc->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	/* Formula not listed but extrapulated to add the cy ?? */
	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}





static int
microassist(struct counters *cpu, int pos)
{
	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
	int ret;
	struct counters *idq;
	struct counters *unhalt;
	double un, id, res, con;

	con = 4.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	idq = find_counter(cpu, "IDQ.MS_UOPS");
	if (pos != -1) {
		id = idq->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		id = idq->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = id/(un * con);
	ret = printf("%1.3f", res);
	return(ret);
}


static int
microassist_broad(struct counters *cpu, int pos)
{
	int ret;
	struct counters *idq;
	struct counters *unhalt;
	struct counters *uopiss;
	struct counters *uopret;
	double un, id, res, con, uoi, uor;

	con = 4.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	idq = find_counter(cpu, "IDQ.MS_UOPS");
	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
	if (pos != -1) {
		id = idq->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
		uoi = uopiss->vals[pos] * 1.0;
		uor = uopret->vals[pos] * 1.0;
	} else {
		id = idq->sum * 1.0;
		un = unhalt->sum * 1.0;
		uoi = uopiss->sum * 1.0;
		uor = uopret->sum * 1.0;
	}
	res = (uor/uoi) * (id/(un * con));
	ret = printf("%1.3f", res);
	return(ret);
}


static int
aliasing(struct counters *cpu, int pos)
{
	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
	int ret;	
	struct counters *ld;
	struct counters *unhalt;
	double un, lds, con, res;

	con = 5.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
	if (pos != -1) {
		lds = ld->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		lds = ld->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (lds * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
aliasing_broad(struct counters *cpu, int pos)
{
	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
	int ret;	
	struct counters *ld;
	struct counters *unhalt;
	double un, lds, con, res;

	con = 7.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
	if (pos != -1) {
		lds = ld->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		lds = ld->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (lds * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}


static int
fpassists(struct counters *cpu, int pos)
{
	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
	int ret;	
	struct counters *fp;
	struct counters *inst;
	double un, fpd, res;

	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
	fp = find_counter(cpu, "FP_ASSIST.ANY");
	if (pos != -1) {
		fpd = fp->vals[pos] * 1.0;
		un = inst->vals[pos] * 1.0;
	} else {
		fpd = fp->sum * 1.0;
		un = inst->sum * 1.0;
	}
	res = fpd/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
otherassistavx(struct counters *cpu, int pos)
{
	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
	int ret;	
	struct counters *oth;
	struct counters *unhalt;
	double un, ot, con, res;

	con = 75.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
	if (pos != -1) {
		ot = oth->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		ot = oth->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (ot * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
otherassistsse(struct counters *cpu, int pos)
{

	int ret;	
	struct counters *oth;
	struct counters *unhalt;
	double un, ot, con, res;

	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
	con = 75.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
	if (pos != -1) {
		ot = oth->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		ot = oth->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = (ot * con)/un;
	ret = printf("%1.3f", res);
	return(ret);
}

static int
efficiency1(struct counters *cpu, int pos)
{

	int ret;	
	struct counters *uops;
	struct counters *unhalt;
	double un, ot, con, res;

        /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
	con = 4.0;
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
	if (pos != -1) {
		ot = uops->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		ot = uops->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = ot/(con * un);
	ret = printf("%1.3f", res);
	return(ret);
}

static int
efficiency2(struct counters *cpu, int pos)
{

	int ret;	
	struct counters *uops;
	struct counters *unhalt;
	double un, ot, res;

        /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
	if (pos != -1) {
		ot = uops->vals[pos] * 1.0;
		un = unhalt->vals[pos] * 1.0;
	} else {
		ot = uops->sum * 1.0;
		un = unhalt->sum * 1.0;
	}
	res = un/ot;
	ret = printf("%1.3f", res);
	return(ret);
}

#define SANDY_BRIDGE_COUNT 20	
static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
/*01*/	{ "allocstall1", "thresh > .05", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
	  allocstall1, 2 },
/* -- not defined for SB right (partial-rat_stalls) 02*/
        { "allocstall2", "thresh > .05", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
	  allocstall2, 2 },
/*03*/	{ "br_miss", "thresh >= .2", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
	  br_mispredict, 2 },
/*04*/	{ "splitload", "thresh >= .1", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
	  splitload_sb, 2 },
/* 05*/	{ "splitstore", "thresh >= .01", 
	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
	  splitstore_sb, 2 },
/*06*/	{ "contested", "thresh >= .05", 
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  contested, 2 },
/*07*/	{ "blockstorefwd", "thresh >= .05", 
	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  blockstoreforward, 2 },
/*08*/	{ "cache2", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache2, 4 },
/*09*/	{ "cache1", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache1, 2 },
/*10*/	{ "dtlbmissload", "thresh >= .1", 
	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  dtlb_missload, 3 },
/*11*/	{ "dtlbmissstore", "thresh >= .05", 
	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  dtlb_missstore, 3 },
/*12*/	{ "frontendstall", "thresh >= .15", 
	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  frontendstall, 2 },
/*13*/	{ "clears", "thresh >= .02", 
	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  clears, 4 },
/*14*/	{ "microassist", "thresh >= .05", 
	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  microassist, 2 },
/*15*/	{ "aliasing_4k", "thresh >= .1", 
	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  aliasing, 2 },
/*16*/	{ "fpassist", "look for a excessive value", 
	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
	  fpassists, 2 },
/*17*/	{ "otherassistavx", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistavx, 2},
/*18*/	{ "otherassistsse", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistsse, 2 },
/*19*/	{ "eff1", "thresh < .9", 
	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency1, 2 },
/*20*/	{ "eff2", "thresh > 1.0", 
	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency2, 2 },
};


#define IVY_BRIDGE_COUNT 21
static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
/*1*/	{ "eff1", "thresh < .75", 
	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency1, 2 },
/*2*/	{ "eff2", "thresh > 1.0", 
	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency2, 2 },
/*3*/	{ "itlbmiss", "thresh > .05", 
	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  itlb_miss, 2 },
/*4*/	{ "icachemiss", "thresh > .05", 
	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  icache_miss, 3 },
/*5*/	{ "lcpstall", "thresh > .05", 
	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  lcp_stall, 2 },
/*6*/	{ "cache1", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache1ib, 2 },
/*7*/	{ "cache2", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache2ib, 2 },
/*8*/	{ "contested", "thresh >= .05", 
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  contested, 2 },
/*9*/	{ "datashare", "thresh >= .05",
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  datasharing, 2 },
/*10*/	{ "blockstorefwd", "thresh >= .05", 
	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  blockstoreforward, 2 },
/*11*/	{ "splitload", "thresh >= .1", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
	  splitloadib, 4 },
/*12*/	{ "splitstore", "thresh >= .01", 
	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
	  splitstore, 2 },
/*13*/	{ "aliasing_4k", "thresh >= .1", 
	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  aliasing, 2 },
/*14*/	{ "dtlbmissload", "thresh >= .1", 
	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  dtlb_missload , 3},
/*15*/	{ "dtlbmissstore", "thresh >= .05", 
	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  dtlb_missstore, 3 },
/*16*/	{ "br_miss", "thresh >= .2", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
	  br_mispredictib, 8 },
/*17*/	{ "clears", "thresh >= .02", 
	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  clears, 4 },
/*18*/	{ "microassist", "thresh >= .05", 
	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  microassist, 2 },
/*19*/	{ "fpassist", "look for a excessive value", 
	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
	  fpassists, 2 },
/*20*/	{ "otherassistavx", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistavx , 2},
/*21*/	{ "otherassistsse", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistsse, 2 },
};

#define HASWELL_COUNT 20
static struct cpu_entry haswell[HASWELL_COUNT] = {
/*1*/	{ "eff1", "thresh < .75", 
	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency1, 2 },
/*2*/	{ "eff2", "thresh > 1.0", 
	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency2, 2 },
/*3*/	{ "itlbmiss", "thresh > .05", 
	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  itlb_miss, 2 },
/*4*/	{ "icachemiss", "thresh > .05", 
	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  icache_miss_has, 2 },
/*5*/	{ "lcpstall", "thresh > .05", 
	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  lcp_stall, 2 },
/*6*/	{ "cache1", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache1ib, 2 },
/*7*/	{ "cache2", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache2has, 4 },
/*8*/	{ "contested", "thresh >= .05", 
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  contested_has, 2 },
/*9*/	{ "datashare", "thresh >= .05",
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  datasharing_has, 2 },
/*10*/	{ "blockstorefwd", "thresh >= .05", 
	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  blockstoreforward, 2 },
/*11*/	{ "splitload", "thresh >= .1", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
	  splitload , 2},
/*12*/	{ "splitstore", "thresh >= .01", 
	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
	  splitstore, 2 },
/*13*/	{ "aliasing_4k", "thresh >= .1", 
	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  aliasing, 2 },
/*14*/	{ "dtlbmissload", "thresh >= .1", 
	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  dtlb_missload, 3 },
/*15*/	{ "br_miss", "thresh >= .2", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
	  br_mispredict, 2 },
/*16*/	{ "clears", "thresh >= .02", 
	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  clears, 4 },
/*17*/	{ "microassist", "thresh >= .05", 
	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  microassist, 2 },
/*18*/	{ "fpassist", "look for a excessive value", 
	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
	  fpassists, 2 },
/*19*/	{ "otherassistavx", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistavx, 2 },
/*20*/	{ "otherassistsse", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistsse, 2 },
};


static void
explain_name_broad(const char *name)
{
	const char *mythresh;
	if (strcmp(name, "eff1") == 0) {
		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh < .75";
	} else if (strcmp(name, "eff2") == 0) {
		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
		mythresh = "thresh > 1.0";
	} else if (strcmp(name, "itlbmiss") == 0) {
		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05"; 
	} else if (strcmp(name, "icachemiss") == 0) {
		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "lcpstall") == 0) {
		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "cache1") == 0) {
		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "cache2") == 0) {
		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "contested") == 0) {
		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "datashare") == 0) {
		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh > .05";
	} else if (strcmp(name, "blockstorefwd") == 0) {
		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .05";
	} else if (strcmp(name, "aliasing_4k") == 0) {
		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .1";
	} else if (strcmp(name, "dtlbmissload") == 0) {
		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
		mythresh = "thresh >= .1";

	} else if (strcmp(name, "br_miss") == 0) {
		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
		mythresh = "thresh >= .2";
	} else if (strcmp(name, "clears") == 0) {
		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
		printf("          MACHINE_CLEARS.SMC + \n");
		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "thresh >= .02";
	} else if (strcmp(name, "fpassist") == 0) {
		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "otherassistavx") == 0) {
		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
		mythresh = "look for a excessive value";
	} else if (strcmp(name, "microassist") == 0) {
		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
		mythresh = "thresh >= .05";
	} else {
		printf("Unknown name:%s\n", name);
		mythresh = "unknown entry";
        }
	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}


#define BROADWELL_COUNT 17
static struct cpu_entry broadwell[BROADWELL_COUNT] = {
/*1*/	{ "eff1", "thresh < .75", 
	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency1, 2 }, 
/*2*/	{ "eff2", "thresh > 1.0", 
	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  efficiency2, 2 },
/*3*/	{ "itlbmiss", "thresh > .05", 
	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
	  itlb_miss_broad, 3 },
/*4*/	{ "icachemiss", "thresh > .05", 
	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  icache_miss_has, 2 },
/*5*/	{ "lcpstall", "thresh > .05", 
	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  lcp_stall, 2 },
/*6*/	{ "cache1", "thresh >= .1", 
	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache1broad, 2 },
/*7*/	{ "cache2", "thresh >= .2", 
	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  cache2broad, 2 },
/*8*/	{ "contested", "thresh >= .05", 
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
	  contestedbroad, 2 },
/*9*/	{ "datashare", "thresh >= .05",
	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  datasharing_has, 2 },
/*10*/	{ "blockstorefwd", "thresh >= .05", 
	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  blockstoreforward, 2 },
/*11*/	{ "aliasing_4k", "thresh >= .1", 
	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  aliasing_broad, 2 }, 
/*12*/	{ "dtlbmissload", "thresh >= .1", 
	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  dtlb_missload, 3 },
/*13*/	{ "br_miss", "thresh >= .2", 
	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
	  br_mispredict_broad, 7 },
/*14*/	{ "clears", "thresh >= .02", 
	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  clears_broad, 5 },
/*15*/	{ "fpassist", "look for a excessive value", 
	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
	  fpassists, 2 },
/*16*/	{ "otherassistavx", "look for a excessive value", 
	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
	  otherassistavx, 2 },
/*17*/	{ "microassist", "thresh >= .2", 
	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
	  microassist_broad, 4 },
};


static void
set_sandybridge(void)
{
	strcpy(the_cpu.cputype, "SandyBridge PMC");
	the_cpu.number = SANDY_BRIDGE_COUNT;
	the_cpu.ents = sandy_bridge;
	the_cpu.explain = explain_name_sb;
}

static void
set_ivybridge(void)
{
	strcpy(the_cpu.cputype, "IvyBridge PMC");
	the_cpu.number = IVY_BRIDGE_COUNT;
	the_cpu.ents = ivy_bridge;
	the_cpu.explain = explain_name_ib;
}


static void
set_haswell(void)
{
	strcpy(the_cpu.cputype, "HASWELL PMC");
	the_cpu.number = HASWELL_COUNT;
	the_cpu.ents = haswell;
	the_cpu.explain = explain_name_has;
}


static void
set_broadwell(void)
{
	strcpy(the_cpu.cputype, "HASWELL PMC");
	the_cpu.number = BROADWELL_COUNT;
	the_cpu.ents = broadwell;
	the_cpu.explain = explain_name_broad;
}


static int
set_expression(const char *name)
{
	int found = 0, i;
	for(i=0 ; i< the_cpu.number; i++) {
		if (strcmp(name, the_cpu.ents[i].name) == 0) {
			found = 1;
			expression = the_cpu.ents[i].func;
			command = the_cpu.ents[i].command;
			threshold = the_cpu.ents[i].thresh;
			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
				       the_cpu.ents[i].name,
				       the_cpu.ents[i].counters_required, max_pmc_counters);
				printf("Sorry this test can not be run\n");
				if (run_all == 0) {
					exit(-1);
				} else {
					return(-1);
				}
			}
			break;
		}
	}
	if (!found) {
		printf("For CPU type %s we have no expression:%s\n",
		       the_cpu.cputype, name);
		exit(-1);
	}
	return(0);
}





static int
validate_expression(char *name) 
{
	int i, found;

	found = 0;
	for(i=0 ; i< the_cpu.number; i++) {
		if (strcmp(name, the_cpu.ents[i].name) == 0) {
			found = 1;
			break;
		}
	}
	if (!found) {
		return(-1);
	}
	return (0);
}

static void
do_expression(struct counters *cpu, int pos)
{
	if (expression == NULL) 
		return;
	(*expression)(cpu, pos);
}

static void
process_header(int idx, char *p)
{
	struct counters *up;
	int i, len, nlen;
	/* 
	 * Given header element idx, at p in
	 * form 's/NN/nameof'
	 * process the entry to pull out the name and
	 * the CPU number.
	 */
	if (strncmp(p, "s/", 2)) {
		printf("Check -- invalid header no s/ in %s\n",
		       p);
		return;
	}
	up = &cnts[idx];
	up->cpu = strtol(&p[2], NULL, 10);
	len = strlen(p);
	for (i=2; i<len; i++) {
		if (p[i] == '/') {
			nlen = strlen(&p[(i+1)]);
			if (nlen < (MAX_NLEN-1)) {
				strcpy(up->counter_name, &p[(i+1)]);
			} else {
				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
			}
		}
	}
}

static void
build_counters_from_header(FILE *io)
{
	char buffer[8192], *p;
	int i, len, cnt;
	size_t mlen;

	/* We have a new start, lets 
	 * setup our headers and cpus.
	 */
	if (fgets(buffer, sizeof(buffer), io) == NULL) {
		printf("First line can't be read from file err:%d\n", errno);
		return;
	}
	/*
	 * Ok output is an array of counters. Once
	 * we start to read the values in we must
	 * put them in there slot to match there CPU and 
	 * counter being updated. We create a mass array
	 * of the counters, filling in the CPU and 
	 * counter name. 
	 */
	/* How many do we get? */
	len = strlen(buffer);
	for (i=0, cnt=0; i<len; i++) {
		if (strncmp(&buffer[i], "s/", 2) == 0) {
			cnt++;
			for(;i<len;i++) {
				if (buffer[i] == ' ')
					break;
			}
		}
	}
	mlen = sizeof(struct counters) * cnt;
	cnts = malloc(mlen);
	ncnts = cnt;
	if (cnts == NULL) {
		printf("No memory err:%d\n", errno);
		return;
	}
	memset(cnts, 0, mlen);
	for (i=0, cnt=0; i<len; i++) {
		if (strncmp(&buffer[i], "s/", 2) == 0) {
			p = &buffer[i];
			for(;i<len;i++) {
				if (buffer[i] == ' ') {
					buffer[i] = 0;
					break;
				}
			}
			process_header(cnt, p);
			cnt++;
		}
	}
	if (verbose)
		printf("We have %d entries\n", cnt);	
}
extern int max_to_collect;
int max_to_collect = MAX_COUNTER_SLOTS;

static int
read_a_line(FILE *io) 
{
	char buffer[8192], *p, *stop;	
	int pos, i;

	if (fgets(buffer, sizeof(buffer), io) == NULL) {
		return(0);
	}
	p = buffer;
	for (i=0; i<ncnts; i++) {
		pos = cnts[i].pos;
		cnts[i].vals[pos] = strtol(p, &stop, 0);
		cnts[i].pos++;
		cnts[i].sum += cnts[i].vals[pos];
		p = stop;
	}
	return (1);
}

extern int cpu_count_out;
int cpu_count_out=0;

static void
print_header(void)
{
	int i, cnt, printed_cnt;

	printf("*********************************\n");
	for(i=0, cnt=0; i<MAX_CPU; i++) {
		if (glob_cpu[i]) {
			cnt++;
		}
	}	
	cpu_count_out = cnt;
	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
		if (glob_cpu[i]) {
			printf("CPU%d", i);
			printed_cnt++;
		}
		if (printed_cnt == cnt) {
			printf("\n");
			break;
		} else {
			printf("\t");
		}
	}
}

static void
lace_cpus_together(void)
{
	int i, j, lace_cpu;
	struct counters *cpat, *at;

	for(i=0; i<ncnts; i++) {
		cpat = &cnts[i];
		if (cpat->next_cpu) {
			/* Already laced in */
			continue;
		}
		lace_cpu = cpat->cpu;
		if (lace_cpu >= MAX_CPU) {
			printf("CPU %d to big\n", lace_cpu);
			continue;
		}
		if (glob_cpu[lace_cpu] == NULL) {
			glob_cpu[lace_cpu] = cpat;
		} else {
			/* Already processed this cpu */
			continue;
		}
		/* Ok look forward for cpu->cpu and link in */
		for(j=(i+1); j<ncnts; j++) {
			at = &cnts[j];
			if (at->next_cpu) {
				continue;
			}
			if (at->cpu == lace_cpu) {
				/* Found one */
				cpat->next_cpu = at;
				cpat = at;
			}
		}
	}
}


static void
process_file(char *filename)
{
	FILE *io;
	int i;
	int line_at, not_done;
	pid_t pid_of_command=0;

	if (filename ==  NULL) {
		io = my_popen(command, "r", &pid_of_command);
	} else {
		io = fopen(filename, "r");
		if (io == NULL) {
			printf("Can't process file %s err:%d\n",
			       filename, errno);
			return;
		}
	}
	build_counters_from_header(io);
	if (cnts == NULL) {
		/* Nothing we can do */
		printf("Nothing to do -- no counters built\n");
		if (io) {
			fclose(io);
		}
		return;
	}
	lace_cpus_together();
	print_header();
	if (verbose) {
		for (i=0; i<ncnts; i++) {
			printf("Counter:%s cpu:%d index:%d\n",
			       cnts[i].counter_name,
			       cnts[i].cpu, i);
		}
	}
	line_at = 0;
	not_done = 1;
	while(not_done) {
		if (read_a_line(io)) {
			line_at++;
		} else {
			break;
		}
		if (line_at >= max_to_collect) {
			not_done = 0;
		}
		if (filename == NULL) {
			int cnt;
			/* For the ones we dynamically open we print now */
			for(i=0, cnt=0; i<MAX_CPU; i++) {
				do_expression(glob_cpu[i], (line_at-1));
				cnt++;
				if (cnt == cpu_count_out) {
					printf("\n");
					break;
				} else {
					printf("\t");
				}
			}
		}
	}
	if (filename) {
		fclose(io);
	} else {
		my_pclose(io, pid_of_command);
	}
}
#if defined(__amd64__)
#define cpuid(in,a,b,c,d)\
  asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));

static __inline void
do_cpuid(u_int ax, u_int cx, u_int *p)
{
	__asm __volatile("cpuid"
			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
			 :  "0" (ax), "c" (cx) );
}

#else
#define cpuid(in, a, b, c, d) 
#define do_cpuid(ax, cx, p)
#endif

static void
get_cpuid_set(void)
{
	unsigned long eax, ebx, ecx, edx;
	int model;
	pid_t pid_of_command=0;
	size_t sz, len;
	FILE *io;
	char linebuf[1024], *str;
	u_int reg[4];

	eax = ebx = ecx = edx = 0;

	cpuid(0, eax, ebx, ecx, edx);
	if (ebx == 0x68747541) {
		printf("AMD processors are not supported by this program\n");
		printf("Sorry\n");
		exit(0);
	} else if (ebx == 0x6972794) {
		printf("Cyrix processors are not supported by this program\n");
		printf("Sorry\n");
		exit(0);
	} else if (ebx == 0x756e6547) {
		printf("Genuine Intel\n");
	} else {
		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
		exit(0);
	}
	cpuid(1, eax, ebx, ecx, edx);
	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
	switch (eax & 0xF00) {
	case 0x500:		/* Pentium family processors */
		printf("Intel Pentium P5\n");
		goto not_supported;
		break;
	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
		switch (model) {
		case 0x1:
			printf("Intel Pentium P6\n");
			goto not_supported;
			break;
		case 0x3: 
		case 0x5:
			printf("Intel PII\n");
			goto not_supported;
			break;
		case 0x6: case 0x16:
			printf("Intel CL\n");
			goto not_supported;
			break;
		case 0x7: case 0x8: case 0xA: case 0xB:
			printf("Intel PIII\n");
			goto not_supported;
			break;
		case 0x9: case 0xD:
			printf("Intel PM\n");
			goto not_supported;
			break;
		case 0xE:
			printf("Intel CORE\n");
			goto not_supported;
			break;
		case 0xF:
			printf("Intel CORE2\n");
			goto not_supported;
			break;
		case 0x17:
			printf("Intel CORE2EXTREME\n");
			goto not_supported;
			break;
		case 0x1C:	/* Per Intel document 320047-002. */
			printf("Intel ATOM\n");
			goto not_supported;
			break;
		case 0x1A:
		case 0x1E:	/*
				 * Per Intel document 253669-032 9/2009,
				 * pages A-2 and A-57
				 */
		case 0x1F:	/*
				 * Per Intel document 253669-032 9/2009,
				 * pages A-2 and A-57
				 */
			printf("Intel COREI7\n");
			goto not_supported;
			break;
		case 0x2E:
			printf("Intel NEHALEM\n");
			goto not_supported;
			break;
		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
			printf("Intel WESTMERE\n");
			goto not_supported;
			break;
		case 0x2F:	/* Westmere-EX, seen in wild */
			printf("Intel WESTMERE\n");
			goto not_supported;
			break;
		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
			printf("Intel SANDYBRIDGE\n");
			set_sandybridge();
			break;
		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
			printf("Intel SANDYBRIDGE_XEON\n");
			set_sandybridge();
			break;
		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
			printf("Intel IVYBRIDGE\n");
			set_ivybridge();
			break;
		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
			printf("Intel IVYBRIDGE_XEON\n");
			set_ivybridge();
			break;
		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
			printf("Intel HASWELL (Xeon)\n");
			set_haswell();
			break;
		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
		case 0x45:
		case 0x46:
			printf("Intel HASWELL\n");
			set_haswell();
			break;

		case 0x4e:
		case 0x5e:
			printf("Intel SKY-LAKE\n");
			goto not_supported;
			break;
		case 0x3D:
		case 0x47:
			printf("Intel BROADWELL\n");
			set_broadwell();
			break;
		case 0x4f:
		case 0x56:
			printf("Intel BROADWEL (Xeon)\n");
			set_broadwell();
			break;

		case 0x4D:
			/* Per Intel document 330061-001 01/2014. */
			printf("Intel ATOM_SILVERMONT\n");
			goto not_supported;
			break;
		default:
			printf("Intel model 0x%x is not known -- sorry\n",
			       model);
			goto not_supported;
			break;
		}
		break;
	case 0xF00:		/* P4 */
		printf("Intel unknown model %d\n", model);
		goto not_supported;
		break;
	}
	do_cpuid(0xa, 0, reg);
	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
	printf("We have %d PMC counters to work with\n", max_pmc_counters);
	/* Ok lets load the list of all known PMC's */
	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
	if (valid_pmcs == NULL) {
		/* Likely */
		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
		sz = sizeof(char *) * pmc_allocated_cnt;
		valid_pmcs = malloc(sz);
		if (valid_pmcs == NULL) {
			printf("No memory allocation fails at startup?\n");	
			exit(-1);
		}
		memset(valid_pmcs, 0, sz);
	}
	
	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
		if (linebuf[0] != '\t') {
			/* sometimes headers ;-) */
			continue;
		}
		len = strlen(linebuf);
		if (linebuf[(len-1)] == '\n') {
			/* Likely */
			linebuf[(len-1)] = 0;
		}
		str = &linebuf[1];
		len = strlen(str) + 1;
		valid_pmcs[valid_pmc_cnt] = malloc(len);
		if (valid_pmcs[valid_pmc_cnt] == NULL) {
			printf("No memory2 allocation fails at startup?\n");	
			exit(-1);
		}
		memset(valid_pmcs[valid_pmc_cnt], 0, len);
		strcpy(valid_pmcs[valid_pmc_cnt], str);
		valid_pmc_cnt++;
		if (valid_pmc_cnt >= pmc_allocated_cnt) {
			/* Got to expand -- unlikely */
			char **more;

			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
			more = malloc(sz);
			if (more == NULL) {
				printf("No memory3 allocation fails at startup?\n");	
				exit(-1);
			}
			memset(more, 0, sz);
			memcpy(more, valid_pmcs, sz);
			pmc_allocated_cnt *= 2;
			free(valid_pmcs);
			valid_pmcs = more;
		}
	}
	my_pclose(io, pid_of_command);	
	return;
not_supported:
	printf("Not supported\n");	
	exit(-1);
}

static void
explain_all(void)
{
	int i;
	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
	printf("-------------------------------------------------------------\n");
	for(i=0; i<the_cpu.number; i++){
		printf("For -e %s ", the_cpu.ents[i].name);
		(*the_cpu.explain)(the_cpu.ents[i].name);
		printf("----------------------------\n");
	}
}

static void
test_for_a_pmc(const char *pmc, int out_so_far)
{
	FILE *io;
	pid_t pid_of_command=0;	
	char my_command[1024];
	char line[1024];
	char resp[1024];
	int len, llen, i;

	if (out_so_far < 50) {
		len = 50 - out_so_far;
		for(i=0; i<len; i++) {
			printf(" ");
		}
	}
	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
	io = my_popen(my_command, "r", &pid_of_command);	
	if (io == NULL) {
		printf("Failed -- popen fails\n");
		return;
	}
	/* Setup what we expect */
	len = sprintf(resp, "%s", pmc);
	if (fgets(line, sizeof(line), io) == NULL) {
		printf("Failed -- no output from pmstat\n");
		goto out;
	}
	llen = strlen(line);
	if (line[(llen-1)] == '\n') {
		line[(llen-1)] = 0;
		llen--;
	}
	for(i=2; i<(llen-len); i++) {
		if (strncmp(&line[i], "ERROR", 5) == 0) {
			printf("Failed %s\n", line);
			goto out;
		} else if (strncmp(&line[i], resp, len) == 0) {
			int j, k;

			if (fgets(line, sizeof(line), io) == NULL) {
				printf("Failed -- no second output from pmstat\n");
				goto out;
			}
			len = strlen(line);
			for (j=0; j<len; j++) {
				if (line[j] == ' ') {
					j++; 
				} else {
					break;
				}
			}
			printf("Pass");
			len = strlen(&line[j]);
			if (len < 20) {
				for(k=0; k<(20-len); k++) {
					printf(" ");
				}
			}
			if (len) {
				printf("%s", &line[j]);
			} else {
				printf("\n");
			}
			goto out;
		}
	}
	printf("Failed -- '%s' not '%s'\n", line, resp);
out:
	my_pclose(io, pid_of_command);		
	
}

static int
add_it_to(char **vars, int cur_cnt, char *name)
{
	int i;
	size_t len;
	for(i=0; i<cur_cnt; i++) {
		if (strcmp(vars[i], name) == 0) {
			/* Already have */
			return(0);
		}
	}
	if (vars[cur_cnt] != NULL) {
		printf("Cur_cnt:%d filled with %s??\n", 
		       cur_cnt, vars[cur_cnt]);
		exit(-1);
	}
	/* Ok its new */
	len = strlen(name) + 1;
	vars[cur_cnt] = malloc(len);
	if (vars[cur_cnt] == NULL) {
		printf("No memory %s\n", __FUNCTION__);
		exit(-1);
	}
	memset(vars[cur_cnt], 0, len);
	strcpy(vars[cur_cnt], name);
	return(1);
}

static char *
build_command_for_exp(struct expression *exp)
{
	/*
	 * Build the pmcstat command to handle
	 * the passed in expression.
	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
	 * where NNN and QQQ represent the PMC's in the expression
	 * uniquely..
	 */
	char forming[1024];
	int cnt_pmc, alloced_pmcs, i;
	struct expression *at;
	char **vars, *cmd;
	size_t mal;

	alloced_pmcs = cnt_pmc = 0;
	/* first how many do we have */
	at = exp;
	while (at) {
		if (at->type == TYPE_VALUE_PMC) {
			cnt_pmc++;
		}
		at = at->next;
	}
	if (cnt_pmc == 0) {
		printf("No PMC's in your expression -- nothing to do!!\n");
		exit(0);
	}
	mal = cnt_pmc * sizeof(char *);
	vars = malloc(mal);
	if (vars == NULL) {
		printf("No memory\n");
		exit(-1);
	}
	memset(vars, 0, mal);
	at = exp;
	while (at) {
		if (at->type == TYPE_VALUE_PMC) {
			if(add_it_to(vars, alloced_pmcs, at->name)) {
				alloced_pmcs++;
			}
		}
		at = at->next;
	}
	/* Now we have a unique list in vars so create our command */
	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
	for(i=0; i<alloced_pmcs; i++) {
		mal += strlen(vars[i]) + 4;	/* var + " -s " */
	}
	cmd = malloc((mal+2));
	if (cmd == NULL) {
		printf("%s out of mem\n", __FUNCTION__);
		exit(-1);
	}
	memset(cmd, 0, (mal+2));
	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
	at = exp;
	for(i=0; i<alloced_pmcs; i++) {
		sprintf(forming, " -s %s", vars[i]);
		strcat(cmd, forming);
		free(vars[i]);
		vars[i] = NULL;
	}
	free(vars);
	return(cmd);
}

static int
user_expr(struct counters *cpu, int pos)
{
	int ret;	
	double res;
	struct counters *var;
	struct expression *at;

	at = master_exp;
	while (at) {
		if (at->type == TYPE_VALUE_PMC) {
			var = find_counter(cpu, at->name);
			if (var == NULL) {
				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
				exit(-1);
			}
			if (pos != -1) {
				at->value = var->vals[pos] * 1.0;
			} else {
				at->value = var->sum * 1.0;
			}
		}
		at = at->next;
	}
	res = run_expr(master_exp, 1, NULL);
	ret = printf("%1.3f", res);
	return(ret);
}


static void
set_manual_exp(struct expression *exp)
{
	expression = user_expr;
	command = build_command_for_exp(exp);
	threshold = "User defined threshold";
}

static void
run_tests(void)
{
	int i, lenout;
	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
	printf("------------------------------------------------------------------------\n");
	for(i=0; i<valid_pmc_cnt; i++) {
		lenout = printf("%s", valid_pmcs[i]);
		fflush(stdout);
		test_for_a_pmc(valid_pmcs[i], lenout);
	}
}
static void
list_all(void)
{
	int i, cnt, j;
	printf("PMC                                               Abbreviation\n");
	printf("--------------------------------------------------------------\n");
	for(i=0; i<valid_pmc_cnt; i++) {
		cnt = printf("%s", valid_pmcs[i]);
		for(j=cnt; j<52; j++) {
			printf(" ");
		}
		printf("%%%d\n", i);
	}
}


int
main(int argc, char **argv)
{
	int i, j, cnt;
	char *filename=NULL;
	const char *name=NULL;
	int help_only = 0;
	int test_mode = 0;
	int test_at = 0;

	get_cpuid_set();
	memset(glob_cpu, 0, sizeof(glob_cpu));
	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
		switch (i) {
		case 'A':
			run_all = 1;
			break;
		case 'L':
			list_all();
			return(0);
		case 'H':
			printf("**********************************\n");
			explain_all();
			printf("**********************************\n");
			return(0);
			break;
		case 'T':
			test_mode = 1;
			break;
		case 'E':
			master_exp = parse_expression(optarg);
			if (master_exp) {
				set_manual_exp(master_exp);
			}
			break;
		case 'e':
			if (validate_expression(optarg)) {
				printf("Unknown expression %s\n", optarg);
				return(0);
			}
			name = optarg;
			set_expression(optarg);
			break;
		case 'm':
			max_to_collect = strtol(optarg, NULL, 0);
			if (max_to_collect > MAX_COUNTER_SLOTS) {
				/* You can't collect more than max in array */
				max_to_collect = MAX_COUNTER_SLOTS;
			}
			break;
		case 'v':
			verbose++;
			break;
		case 'h':
			help_only = 1;
			break;
		case 'i':
			filename = optarg;
			break;
		case '?':
		default:
		use:
			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
			       argv[0]);
			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
			printf("-v -- verbose dump debug type things -- you don't want this\n");
			printf("-m N -- maximum to collect is N measurments\n");
			printf("-e expr-name -- Do expression expr-name\n");
			printf("-E 'your expression' -- Do your expression\n");
			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
			printf("-H -- Don't run anything, just explain all canned expressions\n");
			printf("-T -- Test all PMC's defined by this processor\n");
			printf("-A -- Run all canned tests\n");
			return(0);
			break;
		}
	}
	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
	    (test_mode == 0) && (master_exp == NULL)) {
		printf("Without setting an expression we cannot dynamically gather information\n");
		printf("you must supply a filename (and you probably want verbosity)\n");
		goto use;
	}
	if (run_all && max_to_collect > 10) {
		max_to_collect = 3;
	}
	if (test_mode) {
		run_tests();
		return(0);
	}
	printf("*********************************\n");
	if ((master_exp == NULL) && name) {
		(*the_cpu.explain)(name);
	} else if (master_exp) {
		printf("Examine your expression ");
		print_exp(master_exp);
		printf("User defined threshold\n");
	}
	if (help_only) {
		return(0);
	}
	if (run_all) {
	more:
		name = the_cpu.ents[test_at].name;
		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
		test_at++;
		if (set_expression(name) == -1) {
			if (test_at >= the_cpu.number) {
				goto done;
			} else
				goto more;
		}

	}
	process_file(filename);
	if (verbose >= 2) {
		for (i=0; i<ncnts; i++) {
			printf("Counter:%s cpu:%d index:%d\n",
			       cnts[i].counter_name,
			       cnts[i].cpu, i);
			for(j=0; j<cnts[i].pos; j++) {
				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
			}
			printf(" sum - %ld\n", (long int)cnts[i].sum);
		}
	}
	if (expression == NULL) {
		return(0);
	}
	if (max_to_collect > 1) {
		for(i=0, cnt=0; i<MAX_CPU; i++) {
			if (glob_cpu[i]) {
				do_expression(glob_cpu[i], -1);
				cnt++;
				if (cnt == cpu_count_out) {
					printf("\n");
					break;
				} else {
					printf("\t");
				}
			}
		}
	}
	if (run_all && (test_at < the_cpu.number)) {
		memset(glob_cpu, 0, sizeof(glob_cpu));
		ncnts = 0;
		printf("*********************************\n");
		goto more;
	} else if (run_all) {
	done:
		printf("*********************************\n");
	}
	return(0);	
}