xref: /titanic_52/usr/src/uts/i86pc/io/microfind.c (revision 29e54759b5b8e21fb481d44ee504a764aa7b6ea2)
1843e1988Sjohnlev /*
2*29e54759SJoshua M. Clulow  * This file and its contents are supplied under the terms of the
3*29e54759SJoshua M. Clulow  * Common Development and Distribution License ("CDDL"), version 1.0.
4*29e54759SJoshua M. Clulow  * You may only use this file in accordance with the terms of version
5*29e54759SJoshua M. Clulow  * 1.0 of the CDDL.
6843e1988Sjohnlev  *
7*29e54759SJoshua M. Clulow  * A full copy of the text of the CDDL should have accompanied this
8*29e54759SJoshua M. Clulow  * source.  A copy of the CDDL is also available via the Internet at
9*29e54759SJoshua M. Clulow  * http://www.illumos.org/license/CDDL.
10843e1988Sjohnlev  */
11*29e54759SJoshua M. Clulow 
12843e1988Sjohnlev /*
13*29e54759SJoshua M. Clulow  * Copyright 2015, Joyent, Inc.
14843e1988Sjohnlev  */
15843e1988Sjohnlev 
16*29e54759SJoshua M. Clulow /*
17*29e54759SJoshua M. Clulow  * The microfind() routine is used to calibrate the delay provided by
18*29e54759SJoshua M. Clulow  * tenmicrosec().  Early in boot gethrtime() is not yet configured and
19*29e54759SJoshua M. Clulow  * available for accurate delays, but some drivers still need to be able to
20*29e54759SJoshua M. Clulow  * pause execution for rough increments of ten microseconds.  To that end,
21*29e54759SJoshua M. Clulow  * microfind() will measure the wall time elapsed during a simple delay loop
22*29e54759SJoshua M. Clulow  * using the Intel 8254 Programmable Interval Timer (PIT), and attempt to find
23*29e54759SJoshua M. Clulow  * a loop count that approximates a ten microsecond delay.
24*29e54759SJoshua M. Clulow  *
25*29e54759SJoshua M. Clulow  * This mechanism is accurate enough when running unvirtualised on real CPUs,
26*29e54759SJoshua M. Clulow  * but is somewhat less efficacious in a virtual machine.  In a virtualised
27*29e54759SJoshua M. Clulow  * guest the relationship between instruction completion and elapsed wall time
28*29e54759SJoshua M. Clulow  * is, at best, variable; on such machines the calibration is merely a rough
29*29e54759SJoshua M. Clulow  * guess.
30*29e54759SJoshua M. Clulow  */
31843e1988Sjohnlev 
32843e1988Sjohnlev #include <sys/types.h>
33843e1988Sjohnlev #include <sys/dl.h>
34843e1988Sjohnlev #include <sys/param.h>
35843e1988Sjohnlev #include <sys/pit.h>
36843e1988Sjohnlev #include <sys/inline.h>
37843e1988Sjohnlev #include <sys/machlock.h>
38843e1988Sjohnlev #include <sys/avintr.h>
39843e1988Sjohnlev #include <sys/smp_impldefs.h>
40843e1988Sjohnlev #include <sys/archsystm.h>
41843e1988Sjohnlev #include <sys/systm.h>
42843e1988Sjohnlev #include <sys/machsystm.h>
43843e1988Sjohnlev 
44843e1988Sjohnlev /*
45843e1988Sjohnlev  * Loop count for 10 microsecond wait.  MUST be initialized for those who
46843e1988Sjohnlev  * insist on calling "tenmicrosec" before the clock has been initialized.
47843e1988Sjohnlev  */
48843e1988Sjohnlev unsigned int microdata = 50;
49843e1988Sjohnlev 
50*29e54759SJoshua M. Clulow /*
51*29e54759SJoshua M. Clulow  * These values, used later in microfind(), are stored in globals to allow them
52*29e54759SJoshua M. Clulow  * to be adjusted more easily via kmdb.
53*29e54759SJoshua M. Clulow  */
54*29e54759SJoshua M. Clulow unsigned int microdata_trial_count = 7;
55*29e54759SJoshua M. Clulow unsigned int microdata_allowed_failures = 3;
56*29e54759SJoshua M. Clulow 
57*29e54759SJoshua M. Clulow 
58*29e54759SJoshua M. Clulow static void
59*29e54759SJoshua M. Clulow microfind_pit_reprogram_for_bios(void)
60*29e54759SJoshua M. Clulow {
61*29e54759SJoshua M. Clulow 	/*
62*29e54759SJoshua M. Clulow 	 * Restore PIT counter 0 for BIOS use in mode 3 -- "Square Wave
63*29e54759SJoshua M. Clulow 	 * Generator".
64*29e54759SJoshua M. Clulow 	 */
65*29e54759SJoshua M. Clulow 	outb(PITCTL_PORT, PIT_C0 | PIT_LOADMODE | PIT_SQUAREMODE);
66*29e54759SJoshua M. Clulow 
67*29e54759SJoshua M. Clulow 	/*
68*29e54759SJoshua M. Clulow 	 * Load an initial counter value of zero.
69*29e54759SJoshua M. Clulow 	 */
70*29e54759SJoshua M. Clulow 	outb(PITCTR0_PORT, 0);
71*29e54759SJoshua M. Clulow 	outb(PITCTR0_PORT, 0);
72*29e54759SJoshua M. Clulow }
73*29e54759SJoshua M. Clulow 
74*29e54759SJoshua M. Clulow /*
75*29e54759SJoshua M. Clulow  * Measure the run time of tenmicrosec() using the Intel 8254 Programmable
76*29e54759SJoshua M. Clulow  * Interval Timer.  The timer operates at 1.193182 Mhz, so each timer tick
77*29e54759SJoshua M. Clulow  * represents 0.8381 microseconds of wall time.  This function returns the
78*29e54759SJoshua M. Clulow  * number of such ticks that passed while tenmicrosec() was running, or
79*29e54759SJoshua M. Clulow  * -1 if the delay was too long to measure with the PIT.
80*29e54759SJoshua M. Clulow  */
81*29e54759SJoshua M. Clulow static int
82*29e54759SJoshua M. Clulow microfind_pit_delta(void)
83*29e54759SJoshua M. Clulow {
84*29e54759SJoshua M. Clulow 	unsigned char status;
85*29e54759SJoshua M. Clulow 	int count;
86*29e54759SJoshua M. Clulow 
87*29e54759SJoshua M. Clulow 	/*
88*29e54759SJoshua M. Clulow 	 * Configure PIT counter 0 in mode 0 -- "Interrupt On Terminal Count".
89*29e54759SJoshua M. Clulow 	 * In this mode, the PIT will count down from the loaded value and
90*29e54759SJoshua M. Clulow 	 * set its output bit high once it reaches zero.  The PIT will pause
91*29e54759SJoshua M. Clulow 	 * until we write the low byte and then the high byte to the counter
92*29e54759SJoshua M. Clulow 	 * port.
93*29e54759SJoshua M. Clulow 	 */
94*29e54759SJoshua M. Clulow 	outb(PITCTL_PORT, PIT_LOADMODE);
95*29e54759SJoshua M. Clulow 
96*29e54759SJoshua M. Clulow 	/*
97*29e54759SJoshua M. Clulow 	 * Load the maximum counter value, 0xffff, into the counter port.
98*29e54759SJoshua M. Clulow 	 */
99*29e54759SJoshua M. Clulow 	outb(PITCTR0_PORT, 0xff);
100*29e54759SJoshua M. Clulow 	outb(PITCTR0_PORT, 0xff);
101*29e54759SJoshua M. Clulow 
102*29e54759SJoshua M. Clulow 	/*
103*29e54759SJoshua M. Clulow 	 * Run the delay function.
104*29e54759SJoshua M. Clulow 	 */
105*29e54759SJoshua M. Clulow 	tenmicrosec();
106*29e54759SJoshua M. Clulow 
107*29e54759SJoshua M. Clulow 	/*
108*29e54759SJoshua M. Clulow 	 * Latch the counter value and status for counter 0 with the read
109*29e54759SJoshua M. Clulow 	 * back command.
110*29e54759SJoshua M. Clulow 	 */
111*29e54759SJoshua M. Clulow 	outb(PITCTL_PORT, PIT_READBACK | PIT_READBACKC0);
112*29e54759SJoshua M. Clulow 
113*29e54759SJoshua M. Clulow 	/*
114*29e54759SJoshua M. Clulow 	 * In read back mode, three values are read from the counter port
115*29e54759SJoshua M. Clulow 	 * in order: the status byte, followed by the low byte and high
116*29e54759SJoshua M. Clulow 	 * byte of the counter value.
117*29e54759SJoshua M. Clulow 	 */
118*29e54759SJoshua M. Clulow 	status = inb(PITCTR0_PORT);
119*29e54759SJoshua M. Clulow 	count = inb(PITCTR0_PORT);
120*29e54759SJoshua M. Clulow 	count |= inb(PITCTR0_PORT) << 8;
121*29e54759SJoshua M. Clulow 
122*29e54759SJoshua M. Clulow 	/*
123*29e54759SJoshua M. Clulow 	 * Verify that the counter started counting down.  The null count
124*29e54759SJoshua M. Clulow 	 * flag in the status byte is set when we load a value, and cleared
125*29e54759SJoshua M. Clulow 	 * when counting operation begins.
126*29e54759SJoshua M. Clulow 	 */
127*29e54759SJoshua M. Clulow 	if (status & (1 << PITSTAT_NULLCNT)) {
128*29e54759SJoshua M. Clulow 		/*
129*29e54759SJoshua M. Clulow 		 * The counter did not begin.  This means the loop count
130*29e54759SJoshua M. Clulow 		 * used by tenmicrosec is too small for this CPU.  We return
131*29e54759SJoshua M. Clulow 		 * a zero count to represent that the delay was too small
132*29e54759SJoshua M. Clulow 		 * to measure.
133*29e54759SJoshua M. Clulow 		 */
134*29e54759SJoshua M. Clulow 		return (0);
135*29e54759SJoshua M. Clulow 	}
136*29e54759SJoshua M. Clulow 
137*29e54759SJoshua M. Clulow 	/*
138*29e54759SJoshua M. Clulow 	 * Verify that the counter did not wrap around.  The output pin is
139*29e54759SJoshua M. Clulow 	 * reset when we load a new counter value, and set once the counter
140*29e54759SJoshua M. Clulow 	 * reaches zero.
141*29e54759SJoshua M. Clulow 	 */
142*29e54759SJoshua M. Clulow 	if (status & (1 << PITSTAT_OUTPUT)) {
143*29e54759SJoshua M. Clulow 		/*
144*29e54759SJoshua M. Clulow 		 * The counter reached zero before we were able to read the
145*29e54759SJoshua M. Clulow 		 * value.  This means the loop count used by tenmicrosec is too
146*29e54759SJoshua M. Clulow 		 * large for this CPU.
147*29e54759SJoshua M. Clulow 		 */
148*29e54759SJoshua M. Clulow 		return (-1);
149*29e54759SJoshua M. Clulow 	}
150*29e54759SJoshua M. Clulow 
151*29e54759SJoshua M. Clulow 	/*
152*29e54759SJoshua M. Clulow 	 * The PIT counts from our initial load value of 0xffff down to zero.
153*29e54759SJoshua M. Clulow 	 * Return the number of timer ticks that passed while tenmicrosec was
154*29e54759SJoshua M. Clulow 	 * running.
155*29e54759SJoshua M. Clulow 	 */
156*29e54759SJoshua M. Clulow 	VERIFY(count <= 0xffff);
157*29e54759SJoshua M. Clulow 	return (0xffff - count);
158*29e54759SJoshua M. Clulow }
159*29e54759SJoshua M. Clulow 
160*29e54759SJoshua M. Clulow static int
161*29e54759SJoshua M. Clulow microfind_pit_delta_avg(int trials, int allowed_failures)
162*29e54759SJoshua M. Clulow {
163*29e54759SJoshua M. Clulow 	int tc = 0;
164*29e54759SJoshua M. Clulow 	int failures = 0;
165*29e54759SJoshua M. Clulow 	long long int total = 0;
166*29e54759SJoshua M. Clulow 
167*29e54759SJoshua M. Clulow 	while (tc < trials) {
168*29e54759SJoshua M. Clulow 		int d;
169*29e54759SJoshua M. Clulow 
170*29e54759SJoshua M. Clulow 		if ((d = microfind_pit_delta()) < 0) {
171*29e54759SJoshua M. Clulow 			/*
172*29e54759SJoshua M. Clulow 			 * If the counter wrapped, we cannot use this
173*29e54759SJoshua M. Clulow 			 * data point in the average.  Record the failure
174*29e54759SJoshua M. Clulow 			 * and try again.
175*29e54759SJoshua M. Clulow 			 */
176*29e54759SJoshua M. Clulow 			if (++failures > allowed_failures) {
177*29e54759SJoshua M. Clulow 				/*
178*29e54759SJoshua M. Clulow 				 * Too many failures.
179*29e54759SJoshua M. Clulow 				 */
180*29e54759SJoshua M. Clulow 				return (-1);
181*29e54759SJoshua M. Clulow 			}
182*29e54759SJoshua M. Clulow 			continue;
183*29e54759SJoshua M. Clulow 		}
184*29e54759SJoshua M. Clulow 
185*29e54759SJoshua M. Clulow 		total += d;
186*29e54759SJoshua M. Clulow 		tc++;
187*29e54759SJoshua M. Clulow 	}
188*29e54759SJoshua M. Clulow 
189*29e54759SJoshua M. Clulow 	return (total / tc);
190*29e54759SJoshua M. Clulow }
191*29e54759SJoshua M. Clulow 
192843e1988Sjohnlev void
193843e1988Sjohnlev microfind(void)
194843e1988Sjohnlev {
195*29e54759SJoshua M. Clulow 	int ticks = -1;
196843e1988Sjohnlev 	ulong_t s;
197843e1988Sjohnlev 
198843e1988Sjohnlev 	/*
199*29e54759SJoshua M. Clulow 	 * Disable interrupts while we measure the speed of the CPU.
200*29e54759SJoshua M. Clulow 	 */
201*29e54759SJoshua M. Clulow 	s = clear_int_flag();
202*29e54759SJoshua M. Clulow 
203*29e54759SJoshua M. Clulow 	/*
204*29e54759SJoshua M. Clulow 	 * Start at the smallest loop count, i.e. 1, and keep doubling
205*29e54759SJoshua M. Clulow 	 * until a delay of ~10ms can be measured.
206843e1988Sjohnlev 	 */
207843e1988Sjohnlev 	microdata = 1;
208*29e54759SJoshua M. Clulow 	for (;;) {
209*29e54759SJoshua M. Clulow 		int ticksprev = ticks;
210843e1988Sjohnlev 
211*29e54759SJoshua M. Clulow 		/*
212*29e54759SJoshua M. Clulow 		 * We use a trial count of 7 to attempt to smooth out jitter
213*29e54759SJoshua M. Clulow 		 * caused by the scheduling of virtual machines.  We only allow
214*29e54759SJoshua M. Clulow 		 * three failures, as each failure represents a wrapped counter
215*29e54759SJoshua M. Clulow 		 * and an expired wall time of at least ~55ms.
216*29e54759SJoshua M. Clulow 		 */
217*29e54759SJoshua M. Clulow 		if ((ticks = microfind_pit_delta_avg(microdata_trial_count,
218*29e54759SJoshua M. Clulow 		    microdata_allowed_failures)) < 0) {
219*29e54759SJoshua M. Clulow 			/*
220*29e54759SJoshua M. Clulow 			 * The counter wrapped.  Halve the counter, restore the
221*29e54759SJoshua M. Clulow 			 * previous ticks count and break out of the loop.
222*29e54759SJoshua M. Clulow 			 */
223*29e54759SJoshua M. Clulow 			if (microdata <= 1) {
224*29e54759SJoshua M. Clulow 				/*
225*29e54759SJoshua M. Clulow 				 * If the counter wrapped on the first try,
226*29e54759SJoshua M. Clulow 				 * then we have some serious problems.
227*29e54759SJoshua M. Clulow 				 */
228*29e54759SJoshua M. Clulow 				panic("microfind: pit counter always wrapped");
229*29e54759SJoshua M. Clulow 			}
230*29e54759SJoshua M. Clulow 			microdata = microdata >> 1;
231*29e54759SJoshua M. Clulow 			ticks = ticksprev;
232843e1988Sjohnlev 			break;
233843e1988Sjohnlev 		}
234843e1988Sjohnlev 
235*29e54759SJoshua M. Clulow 		if (ticks > 0x3000) {
236843e1988Sjohnlev 			/*
237*29e54759SJoshua M. Clulow 			 * The loop ran for at least ~10ms worth of 0.8381us
238*29e54759SJoshua M. Clulow 			 * PIT ticks.
239843e1988Sjohnlev 			 */
240843e1988Sjohnlev 			break;
241*29e54759SJoshua M. Clulow 		} else if (microdata > (UINT_MAX >> 1)) {
242843e1988Sjohnlev 			/*
243*29e54759SJoshua M. Clulow 			 * Doubling the loop count again would cause an
244*29e54759SJoshua M. Clulow 			 * overflow.  Use what we have.
245843e1988Sjohnlev 			 */
246*29e54759SJoshua M. Clulow 			break;
247*29e54759SJoshua M. Clulow 		} else {
248*29e54759SJoshua M. Clulow 			/*
249*29e54759SJoshua M. Clulow 			 * Double and try again.
250*29e54759SJoshua M. Clulow 			 */
251*29e54759SJoshua M. Clulow 			microdata = microdata << 1;
252*29e54759SJoshua M. Clulow 		}
253*29e54759SJoshua M. Clulow 	}
254*29e54759SJoshua M. Clulow 
255*29e54759SJoshua M. Clulow 	if (ticks < 1) {
256*29e54759SJoshua M. Clulow 		/*
257*29e54759SJoshua M. Clulow 		 * If we were unable to measure a positive PIT tick count, then
258*29e54759SJoshua M. Clulow 		 * we will be unable to scale the value of "microdata"
259*29e54759SJoshua M. Clulow 		 * correctly.
260*29e54759SJoshua M. Clulow 		 */
261*29e54759SJoshua M. Clulow 		panic("microfind: could not calibrate delay loop");
262843e1988Sjohnlev 	}
263843e1988Sjohnlev 
264843e1988Sjohnlev 	/*
265*29e54759SJoshua M. Clulow 	 * Calculate the loop count based on the final PIT tick count and the
266*29e54759SJoshua M. Clulow 	 * loop count.  Each PIT tick represents a duration of ~0.8381us, so we
267*29e54759SJoshua M. Clulow 	 * want to adjust microdata to represent a duration of 12 ticks, or
268*29e54759SJoshua M. Clulow 	 * ~10us.
269843e1988Sjohnlev 	 */
270*29e54759SJoshua M. Clulow 	microdata = (long long)microdata * 12LL / (long long)ticks;
271843e1988Sjohnlev 
272843e1988Sjohnlev 	/*
273*29e54759SJoshua M. Clulow 	 * Try and leave things as we found them.
274843e1988Sjohnlev 	 */
275*29e54759SJoshua M. Clulow 	microfind_pit_reprogram_for_bios();
276843e1988Sjohnlev 
277843e1988Sjohnlev 	/*
278*29e54759SJoshua M. Clulow 	 * Restore previous interrupt state.
279843e1988Sjohnlev 	 */
280*29e54759SJoshua M. Clulow 	restore_int_flag(s);
281843e1988Sjohnlev }
282