1843e1988Sjohnlev /* 2*29e54759SJoshua M. Clulow * This file and its contents are supplied under the terms of the 3*29e54759SJoshua M. Clulow * Common Development and Distribution License ("CDDL"), version 1.0. 4*29e54759SJoshua M. Clulow * You may only use this file in accordance with the terms of version 5*29e54759SJoshua M. Clulow * 1.0 of the CDDL. 6843e1988Sjohnlev * 7*29e54759SJoshua M. Clulow * A full copy of the text of the CDDL should have accompanied this 8*29e54759SJoshua M. Clulow * source. A copy of the CDDL is also available via the Internet at 9*29e54759SJoshua M. Clulow * http://www.illumos.org/license/CDDL. 10843e1988Sjohnlev */ 11*29e54759SJoshua M. Clulow 12843e1988Sjohnlev /* 13*29e54759SJoshua M. Clulow * Copyright 2015, Joyent, Inc. 14843e1988Sjohnlev */ 15843e1988Sjohnlev 16*29e54759SJoshua M. Clulow /* 17*29e54759SJoshua M. Clulow * The microfind() routine is used to calibrate the delay provided by 18*29e54759SJoshua M. Clulow * tenmicrosec(). Early in boot gethrtime() is not yet configured and 19*29e54759SJoshua M. Clulow * available for accurate delays, but some drivers still need to be able to 20*29e54759SJoshua M. Clulow * pause execution for rough increments of ten microseconds. To that end, 21*29e54759SJoshua M. Clulow * microfind() will measure the wall time elapsed during a simple delay loop 22*29e54759SJoshua M. Clulow * using the Intel 8254 Programmable Interval Timer (PIT), and attempt to find 23*29e54759SJoshua M. Clulow * a loop count that approximates a ten microsecond delay. 24*29e54759SJoshua M. Clulow * 25*29e54759SJoshua M. Clulow * This mechanism is accurate enough when running unvirtualised on real CPUs, 26*29e54759SJoshua M. Clulow * but is somewhat less efficacious in a virtual machine. In a virtualised 27*29e54759SJoshua M. Clulow * guest the relationship between instruction completion and elapsed wall time 28*29e54759SJoshua M. Clulow * is, at best, variable; on such machines the calibration is merely a rough 29*29e54759SJoshua M. Clulow * guess. 30*29e54759SJoshua M. Clulow */ 31843e1988Sjohnlev 32843e1988Sjohnlev #include <sys/types.h> 33843e1988Sjohnlev #include <sys/dl.h> 34843e1988Sjohnlev #include <sys/param.h> 35843e1988Sjohnlev #include <sys/pit.h> 36843e1988Sjohnlev #include <sys/inline.h> 37843e1988Sjohnlev #include <sys/machlock.h> 38843e1988Sjohnlev #include <sys/avintr.h> 39843e1988Sjohnlev #include <sys/smp_impldefs.h> 40843e1988Sjohnlev #include <sys/archsystm.h> 41843e1988Sjohnlev #include <sys/systm.h> 42843e1988Sjohnlev #include <sys/machsystm.h> 43843e1988Sjohnlev 44843e1988Sjohnlev /* 45843e1988Sjohnlev * Loop count for 10 microsecond wait. MUST be initialized for those who 46843e1988Sjohnlev * insist on calling "tenmicrosec" before the clock has been initialized. 47843e1988Sjohnlev */ 48843e1988Sjohnlev unsigned int microdata = 50; 49843e1988Sjohnlev 50*29e54759SJoshua M. Clulow /* 51*29e54759SJoshua M. Clulow * These values, used later in microfind(), are stored in globals to allow them 52*29e54759SJoshua M. Clulow * to be adjusted more easily via kmdb. 53*29e54759SJoshua M. Clulow */ 54*29e54759SJoshua M. Clulow unsigned int microdata_trial_count = 7; 55*29e54759SJoshua M. Clulow unsigned int microdata_allowed_failures = 3; 56*29e54759SJoshua M. Clulow 57*29e54759SJoshua M. Clulow 58*29e54759SJoshua M. Clulow static void 59*29e54759SJoshua M. Clulow microfind_pit_reprogram_for_bios(void) 60*29e54759SJoshua M. Clulow { 61*29e54759SJoshua M. Clulow /* 62*29e54759SJoshua M. Clulow * Restore PIT counter 0 for BIOS use in mode 3 -- "Square Wave 63*29e54759SJoshua M. Clulow * Generator". 64*29e54759SJoshua M. Clulow */ 65*29e54759SJoshua M. Clulow outb(PITCTL_PORT, PIT_C0 | PIT_LOADMODE | PIT_SQUAREMODE); 66*29e54759SJoshua M. Clulow 67*29e54759SJoshua M. Clulow /* 68*29e54759SJoshua M. Clulow * Load an initial counter value of zero. 69*29e54759SJoshua M. Clulow */ 70*29e54759SJoshua M. Clulow outb(PITCTR0_PORT, 0); 71*29e54759SJoshua M. Clulow outb(PITCTR0_PORT, 0); 72*29e54759SJoshua M. Clulow } 73*29e54759SJoshua M. Clulow 74*29e54759SJoshua M. Clulow /* 75*29e54759SJoshua M. Clulow * Measure the run time of tenmicrosec() using the Intel 8254 Programmable 76*29e54759SJoshua M. Clulow * Interval Timer. The timer operates at 1.193182 Mhz, so each timer tick 77*29e54759SJoshua M. Clulow * represents 0.8381 microseconds of wall time. This function returns the 78*29e54759SJoshua M. Clulow * number of such ticks that passed while tenmicrosec() was running, or 79*29e54759SJoshua M. Clulow * -1 if the delay was too long to measure with the PIT. 80*29e54759SJoshua M. Clulow */ 81*29e54759SJoshua M. Clulow static int 82*29e54759SJoshua M. Clulow microfind_pit_delta(void) 83*29e54759SJoshua M. Clulow { 84*29e54759SJoshua M. Clulow unsigned char status; 85*29e54759SJoshua M. Clulow int count; 86*29e54759SJoshua M. Clulow 87*29e54759SJoshua M. Clulow /* 88*29e54759SJoshua M. Clulow * Configure PIT counter 0 in mode 0 -- "Interrupt On Terminal Count". 89*29e54759SJoshua M. Clulow * In this mode, the PIT will count down from the loaded value and 90*29e54759SJoshua M. Clulow * set its output bit high once it reaches zero. The PIT will pause 91*29e54759SJoshua M. Clulow * until we write the low byte and then the high byte to the counter 92*29e54759SJoshua M. Clulow * port. 93*29e54759SJoshua M. Clulow */ 94*29e54759SJoshua M. Clulow outb(PITCTL_PORT, PIT_LOADMODE); 95*29e54759SJoshua M. Clulow 96*29e54759SJoshua M. Clulow /* 97*29e54759SJoshua M. Clulow * Load the maximum counter value, 0xffff, into the counter port. 98*29e54759SJoshua M. Clulow */ 99*29e54759SJoshua M. Clulow outb(PITCTR0_PORT, 0xff); 100*29e54759SJoshua M. Clulow outb(PITCTR0_PORT, 0xff); 101*29e54759SJoshua M. Clulow 102*29e54759SJoshua M. Clulow /* 103*29e54759SJoshua M. Clulow * Run the delay function. 104*29e54759SJoshua M. Clulow */ 105*29e54759SJoshua M. Clulow tenmicrosec(); 106*29e54759SJoshua M. Clulow 107*29e54759SJoshua M. Clulow /* 108*29e54759SJoshua M. Clulow * Latch the counter value and status for counter 0 with the read 109*29e54759SJoshua M. Clulow * back command. 110*29e54759SJoshua M. Clulow */ 111*29e54759SJoshua M. Clulow outb(PITCTL_PORT, PIT_READBACK | PIT_READBACKC0); 112*29e54759SJoshua M. Clulow 113*29e54759SJoshua M. Clulow /* 114*29e54759SJoshua M. Clulow * In read back mode, three values are read from the counter port 115*29e54759SJoshua M. Clulow * in order: the status byte, followed by the low byte and high 116*29e54759SJoshua M. Clulow * byte of the counter value. 117*29e54759SJoshua M. Clulow */ 118*29e54759SJoshua M. Clulow status = inb(PITCTR0_PORT); 119*29e54759SJoshua M. Clulow count = inb(PITCTR0_PORT); 120*29e54759SJoshua M. Clulow count |= inb(PITCTR0_PORT) << 8; 121*29e54759SJoshua M. Clulow 122*29e54759SJoshua M. Clulow /* 123*29e54759SJoshua M. Clulow * Verify that the counter started counting down. The null count 124*29e54759SJoshua M. Clulow * flag in the status byte is set when we load a value, and cleared 125*29e54759SJoshua M. Clulow * when counting operation begins. 126*29e54759SJoshua M. Clulow */ 127*29e54759SJoshua M. Clulow if (status & (1 << PITSTAT_NULLCNT)) { 128*29e54759SJoshua M. Clulow /* 129*29e54759SJoshua M. Clulow * The counter did not begin. This means the loop count 130*29e54759SJoshua M. Clulow * used by tenmicrosec is too small for this CPU. We return 131*29e54759SJoshua M. Clulow * a zero count to represent that the delay was too small 132*29e54759SJoshua M. Clulow * to measure. 133*29e54759SJoshua M. Clulow */ 134*29e54759SJoshua M. Clulow return (0); 135*29e54759SJoshua M. Clulow } 136*29e54759SJoshua M. Clulow 137*29e54759SJoshua M. Clulow /* 138*29e54759SJoshua M. Clulow * Verify that the counter did not wrap around. The output pin is 139*29e54759SJoshua M. Clulow * reset when we load a new counter value, and set once the counter 140*29e54759SJoshua M. Clulow * reaches zero. 141*29e54759SJoshua M. Clulow */ 142*29e54759SJoshua M. Clulow if (status & (1 << PITSTAT_OUTPUT)) { 143*29e54759SJoshua M. Clulow /* 144*29e54759SJoshua M. Clulow * The counter reached zero before we were able to read the 145*29e54759SJoshua M. Clulow * value. This means the loop count used by tenmicrosec is too 146*29e54759SJoshua M. Clulow * large for this CPU. 147*29e54759SJoshua M. Clulow */ 148*29e54759SJoshua M. Clulow return (-1); 149*29e54759SJoshua M. Clulow } 150*29e54759SJoshua M. Clulow 151*29e54759SJoshua M. Clulow /* 152*29e54759SJoshua M. Clulow * The PIT counts from our initial load value of 0xffff down to zero. 153*29e54759SJoshua M. Clulow * Return the number of timer ticks that passed while tenmicrosec was 154*29e54759SJoshua M. Clulow * running. 155*29e54759SJoshua M. Clulow */ 156*29e54759SJoshua M. Clulow VERIFY(count <= 0xffff); 157*29e54759SJoshua M. Clulow return (0xffff - count); 158*29e54759SJoshua M. Clulow } 159*29e54759SJoshua M. Clulow 160*29e54759SJoshua M. Clulow static int 161*29e54759SJoshua M. Clulow microfind_pit_delta_avg(int trials, int allowed_failures) 162*29e54759SJoshua M. Clulow { 163*29e54759SJoshua M. Clulow int tc = 0; 164*29e54759SJoshua M. Clulow int failures = 0; 165*29e54759SJoshua M. Clulow long long int total = 0; 166*29e54759SJoshua M. Clulow 167*29e54759SJoshua M. Clulow while (tc < trials) { 168*29e54759SJoshua M. Clulow int d; 169*29e54759SJoshua M. Clulow 170*29e54759SJoshua M. Clulow if ((d = microfind_pit_delta()) < 0) { 171*29e54759SJoshua M. Clulow /* 172*29e54759SJoshua M. Clulow * If the counter wrapped, we cannot use this 173*29e54759SJoshua M. Clulow * data point in the average. Record the failure 174*29e54759SJoshua M. Clulow * and try again. 175*29e54759SJoshua M. Clulow */ 176*29e54759SJoshua M. Clulow if (++failures > allowed_failures) { 177*29e54759SJoshua M. Clulow /* 178*29e54759SJoshua M. Clulow * Too many failures. 179*29e54759SJoshua M. Clulow */ 180*29e54759SJoshua M. Clulow return (-1); 181*29e54759SJoshua M. Clulow } 182*29e54759SJoshua M. Clulow continue; 183*29e54759SJoshua M. Clulow } 184*29e54759SJoshua M. Clulow 185*29e54759SJoshua M. Clulow total += d; 186*29e54759SJoshua M. Clulow tc++; 187*29e54759SJoshua M. Clulow } 188*29e54759SJoshua M. Clulow 189*29e54759SJoshua M. Clulow return (total / tc); 190*29e54759SJoshua M. Clulow } 191*29e54759SJoshua M. Clulow 192843e1988Sjohnlev void 193843e1988Sjohnlev microfind(void) 194843e1988Sjohnlev { 195*29e54759SJoshua M. Clulow int ticks = -1; 196843e1988Sjohnlev ulong_t s; 197843e1988Sjohnlev 198843e1988Sjohnlev /* 199*29e54759SJoshua M. Clulow * Disable interrupts while we measure the speed of the CPU. 200*29e54759SJoshua M. Clulow */ 201*29e54759SJoshua M. Clulow s = clear_int_flag(); 202*29e54759SJoshua M. Clulow 203*29e54759SJoshua M. Clulow /* 204*29e54759SJoshua M. Clulow * Start at the smallest loop count, i.e. 1, and keep doubling 205*29e54759SJoshua M. Clulow * until a delay of ~10ms can be measured. 206843e1988Sjohnlev */ 207843e1988Sjohnlev microdata = 1; 208*29e54759SJoshua M. Clulow for (;;) { 209*29e54759SJoshua M. Clulow int ticksprev = ticks; 210843e1988Sjohnlev 211*29e54759SJoshua M. Clulow /* 212*29e54759SJoshua M. Clulow * We use a trial count of 7 to attempt to smooth out jitter 213*29e54759SJoshua M. Clulow * caused by the scheduling of virtual machines. We only allow 214*29e54759SJoshua M. Clulow * three failures, as each failure represents a wrapped counter 215*29e54759SJoshua M. Clulow * and an expired wall time of at least ~55ms. 216*29e54759SJoshua M. Clulow */ 217*29e54759SJoshua M. Clulow if ((ticks = microfind_pit_delta_avg(microdata_trial_count, 218*29e54759SJoshua M. Clulow microdata_allowed_failures)) < 0) { 219*29e54759SJoshua M. Clulow /* 220*29e54759SJoshua M. Clulow * The counter wrapped. Halve the counter, restore the 221*29e54759SJoshua M. Clulow * previous ticks count and break out of the loop. 222*29e54759SJoshua M. Clulow */ 223*29e54759SJoshua M. Clulow if (microdata <= 1) { 224*29e54759SJoshua M. Clulow /* 225*29e54759SJoshua M. Clulow * If the counter wrapped on the first try, 226*29e54759SJoshua M. Clulow * then we have some serious problems. 227*29e54759SJoshua M. Clulow */ 228*29e54759SJoshua M. Clulow panic("microfind: pit counter always wrapped"); 229*29e54759SJoshua M. Clulow } 230*29e54759SJoshua M. Clulow microdata = microdata >> 1; 231*29e54759SJoshua M. Clulow ticks = ticksprev; 232843e1988Sjohnlev break; 233843e1988Sjohnlev } 234843e1988Sjohnlev 235*29e54759SJoshua M. Clulow if (ticks > 0x3000) { 236843e1988Sjohnlev /* 237*29e54759SJoshua M. Clulow * The loop ran for at least ~10ms worth of 0.8381us 238*29e54759SJoshua M. Clulow * PIT ticks. 239843e1988Sjohnlev */ 240843e1988Sjohnlev break; 241*29e54759SJoshua M. Clulow } else if (microdata > (UINT_MAX >> 1)) { 242843e1988Sjohnlev /* 243*29e54759SJoshua M. Clulow * Doubling the loop count again would cause an 244*29e54759SJoshua M. Clulow * overflow. Use what we have. 245843e1988Sjohnlev */ 246*29e54759SJoshua M. Clulow break; 247*29e54759SJoshua M. Clulow } else { 248*29e54759SJoshua M. Clulow /* 249*29e54759SJoshua M. Clulow * Double and try again. 250*29e54759SJoshua M. Clulow */ 251*29e54759SJoshua M. Clulow microdata = microdata << 1; 252*29e54759SJoshua M. Clulow } 253*29e54759SJoshua M. Clulow } 254*29e54759SJoshua M. Clulow 255*29e54759SJoshua M. Clulow if (ticks < 1) { 256*29e54759SJoshua M. Clulow /* 257*29e54759SJoshua M. Clulow * If we were unable to measure a positive PIT tick count, then 258*29e54759SJoshua M. Clulow * we will be unable to scale the value of "microdata" 259*29e54759SJoshua M. Clulow * correctly. 260*29e54759SJoshua M. Clulow */ 261*29e54759SJoshua M. Clulow panic("microfind: could not calibrate delay loop"); 262843e1988Sjohnlev } 263843e1988Sjohnlev 264843e1988Sjohnlev /* 265*29e54759SJoshua M. Clulow * Calculate the loop count based on the final PIT tick count and the 266*29e54759SJoshua M. Clulow * loop count. Each PIT tick represents a duration of ~0.8381us, so we 267*29e54759SJoshua M. Clulow * want to adjust microdata to represent a duration of 12 ticks, or 268*29e54759SJoshua M. Clulow * ~10us. 269843e1988Sjohnlev */ 270*29e54759SJoshua M. Clulow microdata = (long long)microdata * 12LL / (long long)ticks; 271843e1988Sjohnlev 272843e1988Sjohnlev /* 273*29e54759SJoshua M. Clulow * Try and leave things as we found them. 274843e1988Sjohnlev */ 275*29e54759SJoshua M. Clulow microfind_pit_reprogram_for_bios(); 276843e1988Sjohnlev 277843e1988Sjohnlev /* 278*29e54759SJoshua M. Clulow * Restore previous interrupt state. 279843e1988Sjohnlev */ 280*29e54759SJoshua M. Clulow restore_int_flag(s); 281843e1988Sjohnlev } 282