xref: /linux/arch/x86/kernel/cpu/perfctr-watchdog.c (revision 98366c20a275e957416e9516db5dcb7195b4e101)
1 /* local apic based NMI watchdog for various CPUs.
2    This file also handles reservation of performance counters for coordination
3    with other users (like oprofile).
4 
5    Note that these events normally don't tick when the CPU idles. This means
6    the frequency varies with CPU load.
7 
8    Original code for K7/P6 written by Keith Owens */
9 
10 #include <linux/percpu.h>
11 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/bitops.h>
14 #include <linux/smp.h>
15 #include <linux/nmi.h>
16 #include <asm/apic.h>
17 #include <asm/intel_arch_perfmon.h>
18 
19 struct nmi_watchdog_ctlblk {
20 	unsigned int cccr_msr;
21 	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
22 	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
23 };
24 
25 /* Interface defining a CPU specific perfctr watchdog */
26 struct wd_ops {
27 	int (*reserve)(void);
28 	void (*unreserve)(void);
29 	int (*setup)(unsigned nmi_hz);
30 	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
31 	void (*stop)(void);
32 	unsigned perfctr;
33 	unsigned evntsel;
34 	u64 checkbit;
35 };
36 
37 static const struct wd_ops *wd_ops;
38 
39 /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
40  * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
41  */
42 #define NMI_MAX_COUNTER_BITS 66
43 
44 /* perfctr_nmi_owner tracks the ownership of the perfctr registers:
45  * evtsel_nmi_owner tracks the ownership of the event selection
46  * - different performance counters/ event selection may be reserved for
47  *   different subsystems this reservation system just tries to coordinate
48  *   things a little
49  */
50 static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
51 static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
52 
53 static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
54 
55 /* converts an msr to an appropriate reservation bit */
56 static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
57 {
58 	/* returns the bit offset of the performance counter register */
59 	switch (boot_cpu_data.x86_vendor) {
60 	case X86_VENDOR_AMD:
61 		return (msr - MSR_K7_PERFCTR0);
62 	case X86_VENDOR_INTEL:
63 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
64 			return (msr - MSR_ARCH_PERFMON_PERFCTR0);
65 
66 		switch (boot_cpu_data.x86) {
67 		case 6:
68 			return (msr - MSR_P6_PERFCTR0);
69 		case 15:
70 			return (msr - MSR_P4_BPU_PERFCTR0);
71 		}
72 	}
73 	return 0;
74 }
75 
76 /* converts an msr to an appropriate reservation bit */
77 /* returns the bit offset of the event selection register */
78 static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
79 {
80 	/* returns the bit offset of the event selection register */
81 	switch (boot_cpu_data.x86_vendor) {
82 	case X86_VENDOR_AMD:
83 		return (msr - MSR_K7_EVNTSEL0);
84 	case X86_VENDOR_INTEL:
85 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
86 			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
87 
88 		switch (boot_cpu_data.x86) {
89 		case 6:
90 			return (msr - MSR_P6_EVNTSEL0);
91 		case 15:
92 			return (msr - MSR_P4_BSU_ESCR0);
93 		}
94 	}
95 	return 0;
96 
97 }
98 
99 /* checks for a bit availability (hack for oprofile) */
100 int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
101 {
102 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
103 
104 	return (!test_bit(counter, perfctr_nmi_owner));
105 }
106 
107 /* checks the an msr for availability */
108 int avail_to_resrv_perfctr_nmi(unsigned int msr)
109 {
110 	unsigned int counter;
111 
112 	counter = nmi_perfctr_msr_to_bit(msr);
113 	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
114 
115 	return (!test_bit(counter, perfctr_nmi_owner));
116 }
117 
118 int reserve_perfctr_nmi(unsigned int msr)
119 {
120 	unsigned int counter;
121 
122 	counter = nmi_perfctr_msr_to_bit(msr);
123 	/* register not managed by the allocator? */
124 	if (counter > NMI_MAX_COUNTER_BITS)
125 		return 1;
126 
127 	if (!test_and_set_bit(counter, perfctr_nmi_owner))
128 		return 1;
129 	return 0;
130 }
131 
132 void release_perfctr_nmi(unsigned int msr)
133 {
134 	unsigned int counter;
135 
136 	counter = nmi_perfctr_msr_to_bit(msr);
137 	/* register not managed by the allocator? */
138 	if (counter > NMI_MAX_COUNTER_BITS)
139 		return;
140 
141 	clear_bit(counter, perfctr_nmi_owner);
142 }
143 
144 int reserve_evntsel_nmi(unsigned int msr)
145 {
146 	unsigned int counter;
147 
148 	counter = nmi_evntsel_msr_to_bit(msr);
149 	/* register not managed by the allocator? */
150 	if (counter > NMI_MAX_COUNTER_BITS)
151 		return 1;
152 
153 	if (!test_and_set_bit(counter, evntsel_nmi_owner))
154 		return 1;
155 	return 0;
156 }
157 
158 void release_evntsel_nmi(unsigned int msr)
159 {
160 	unsigned int counter;
161 
162 	counter = nmi_evntsel_msr_to_bit(msr);
163 	/* register not managed by the allocator? */
164 	if (counter > NMI_MAX_COUNTER_BITS)
165 		return;
166 
167 	clear_bit(counter, evntsel_nmi_owner);
168 }
169 
170 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
171 EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
172 EXPORT_SYMBOL(reserve_perfctr_nmi);
173 EXPORT_SYMBOL(release_perfctr_nmi);
174 EXPORT_SYMBOL(reserve_evntsel_nmi);
175 EXPORT_SYMBOL(release_evntsel_nmi);
176 
177 void disable_lapic_nmi_watchdog(void)
178 {
179 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
180 
181 	if (atomic_read(&nmi_active) <= 0)
182 		return;
183 
184 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
185 	wd_ops->unreserve();
186 
187 	BUG_ON(atomic_read(&nmi_active) != 0);
188 }
189 
190 void enable_lapic_nmi_watchdog(void)
191 {
192 	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
193 
194 	/* are we already enabled */
195 	if (atomic_read(&nmi_active) != 0)
196 		return;
197 
198 	/* are we lapic aware */
199 	if (!wd_ops)
200 		return;
201 	if (!wd_ops->reserve()) {
202 		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
203 		return;
204 	}
205 
206 	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
207 	touch_nmi_watchdog();
208 }
209 
210 /*
211  * Activate the NMI watchdog via the local APIC.
212  */
213 
214 static unsigned int adjust_for_32bit_ctr(unsigned int hz)
215 {
216 	u64 counter_val;
217 	unsigned int retval = hz;
218 
219 	/*
220 	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
221 	 * are writable, with higher bits sign extending from bit 31.
222 	 * So, we can only program the counter with 31 bit values and
223 	 * 32nd bit should be 1, for 33.. to be 1.
224 	 * Find the appropriate nmi_hz
225 	 */
226 	counter_val = (u64)cpu_khz * 1000;
227 	do_div(counter_val, retval);
228  	if (counter_val > 0x7fffffffULL) {
229 		u64 count = (u64)cpu_khz * 1000;
230 		do_div(count, 0x7fffffffUL);
231 		retval = count + 1;
232 	}
233 	return retval;
234 }
235 
236 static void
237 write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
238 {
239 	u64 count = (u64)cpu_khz * 1000;
240 
241 	do_div(count, nmi_hz);
242 	if(descr)
243 		Dprintk("setting %s to -0x%08Lx\n", descr, count);
244 	wrmsrl(perfctr_msr, 0 - count);
245 }
246 
247 static void write_watchdog_counter32(unsigned int perfctr_msr,
248 		const char *descr, unsigned nmi_hz)
249 {
250 	u64 count = (u64)cpu_khz * 1000;
251 
252 	do_div(count, nmi_hz);
253 	if(descr)
254 		Dprintk("setting %s to -0x%08Lx\n", descr, count);
255 	wrmsr(perfctr_msr, (u32)(-count), 0);
256 }
257 
258 /* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
259    nicely stable so there is not much variety */
260 
261 #define K7_EVNTSEL_ENABLE	(1 << 22)
262 #define K7_EVNTSEL_INT		(1 << 20)
263 #define K7_EVNTSEL_OS		(1 << 17)
264 #define K7_EVNTSEL_USR		(1 << 16)
265 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
266 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
267 
268 static int setup_k7_watchdog(unsigned nmi_hz)
269 {
270 	unsigned int perfctr_msr, evntsel_msr;
271 	unsigned int evntsel;
272 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
273 
274 	perfctr_msr = wd_ops->perfctr;
275 	evntsel_msr = wd_ops->evntsel;
276 
277 	wrmsrl(perfctr_msr, 0UL);
278 
279 	evntsel = K7_EVNTSEL_INT
280 		| K7_EVNTSEL_OS
281 		| K7_EVNTSEL_USR
282 		| K7_NMI_EVENT;
283 
284 	/* setup the timer */
285 	wrmsr(evntsel_msr, evntsel, 0);
286 	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
287 	apic_write(APIC_LVTPC, APIC_DM_NMI);
288 	evntsel |= K7_EVNTSEL_ENABLE;
289 	wrmsr(evntsel_msr, evntsel, 0);
290 
291 	wd->perfctr_msr = perfctr_msr;
292 	wd->evntsel_msr = evntsel_msr;
293 	wd->cccr_msr = 0;  //unused
294 	return 1;
295 }
296 
297 static void single_msr_stop_watchdog(void)
298 {
299 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
300 
301 	wrmsr(wd->evntsel_msr, 0, 0);
302 }
303 
304 static int single_msr_reserve(void)
305 {
306 	if (!reserve_perfctr_nmi(wd_ops->perfctr))
307 		return 0;
308 
309 	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
310 		release_perfctr_nmi(wd_ops->perfctr);
311 		return 0;
312 	}
313 	return 1;
314 }
315 
316 static void single_msr_unreserve(void)
317 {
318 	release_evntsel_nmi(wd_ops->evntsel);
319 	release_perfctr_nmi(wd_ops->perfctr);
320 }
321 
322 static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
323 {
324 	/* start the cycle over again */
325 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
326 }
327 
328 static const struct wd_ops k7_wd_ops = {
329 	.reserve = single_msr_reserve,
330 	.unreserve = single_msr_unreserve,
331 	.setup = setup_k7_watchdog,
332 	.rearm = single_msr_rearm,
333 	.stop = single_msr_stop_watchdog,
334 	.perfctr = MSR_K7_PERFCTR0,
335 	.evntsel = MSR_K7_EVNTSEL0,
336 	.checkbit = 1ULL<<47,
337 };
338 
339 /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
340 
341 #define P6_EVNTSEL0_ENABLE	(1 << 22)
342 #define P6_EVNTSEL_INT		(1 << 20)
343 #define P6_EVNTSEL_OS		(1 << 17)
344 #define P6_EVNTSEL_USR		(1 << 16)
345 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
346 #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
347 
348 static int setup_p6_watchdog(unsigned nmi_hz)
349 {
350 	unsigned int perfctr_msr, evntsel_msr;
351 	unsigned int evntsel;
352 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
353 
354 	perfctr_msr = wd_ops->perfctr;
355 	evntsel_msr = wd_ops->evntsel;
356 
357 	/* KVM doesn't implement this MSR */
358 	if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
359 		return 0;
360 
361 	evntsel = P6_EVNTSEL_INT
362 		| P6_EVNTSEL_OS
363 		| P6_EVNTSEL_USR
364 		| P6_NMI_EVENT;
365 
366 	/* setup the timer */
367 	wrmsr(evntsel_msr, evntsel, 0);
368 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
369 	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
370 	apic_write(APIC_LVTPC, APIC_DM_NMI);
371 	evntsel |= P6_EVNTSEL0_ENABLE;
372 	wrmsr(evntsel_msr, evntsel, 0);
373 
374 	wd->perfctr_msr = perfctr_msr;
375 	wd->evntsel_msr = evntsel_msr;
376 	wd->cccr_msr = 0;  //unused
377 	return 1;
378 }
379 
380 static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
381 {
382 	/* P6 based Pentium M need to re-unmask
383 	 * the apic vector but it doesn't hurt
384 	 * other P6 variant.
385 	 * ArchPerfom/Core Duo also needs this */
386 	apic_write(APIC_LVTPC, APIC_DM_NMI);
387 	/* P6/ARCH_PERFMON has 32 bit counter write */
388 	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
389 }
390 
391 static const struct wd_ops p6_wd_ops = {
392 	.reserve = single_msr_reserve,
393 	.unreserve = single_msr_unreserve,
394 	.setup = setup_p6_watchdog,
395 	.rearm = p6_rearm,
396 	.stop = single_msr_stop_watchdog,
397 	.perfctr = MSR_P6_PERFCTR0,
398 	.evntsel = MSR_P6_EVNTSEL0,
399 	.checkbit = 1ULL<<39,
400 };
401 
402 /* Intel P4 performance counters. By far the most complicated of all. */
403 
404 #define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
405 #define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
406 #define P4_ESCR_OS		(1<<3)
407 #define P4_ESCR_USR		(1<<2)
408 #define P4_CCCR_OVF_PMI0	(1<<26)
409 #define P4_CCCR_OVF_PMI1	(1<<27)
410 #define P4_CCCR_THRESHOLD(N)	((N)<<20)
411 #define P4_CCCR_COMPLEMENT	(1<<19)
412 #define P4_CCCR_COMPARE		(1<<18)
413 #define P4_CCCR_REQUIRED	(3<<16)
414 #define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
415 #define P4_CCCR_ENABLE		(1<<12)
416 #define P4_CCCR_OVF 		(1<<31)
417 
418 /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
419    CRU_ESCR0 (with any non-null event selector) through a complemented
420    max threshold. [IA32-Vol3, Section 14.9.9] */
421 
422 static int setup_p4_watchdog(unsigned nmi_hz)
423 {
424 	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
425 	unsigned int evntsel, cccr_val;
426 	unsigned int misc_enable, dummy;
427 	unsigned int ht_num;
428 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
429 
430 	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
431 	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
432 		return 0;
433 
434 #ifdef CONFIG_SMP
435 	/* detect which hyperthread we are on */
436 	if (smp_num_siblings == 2) {
437 		unsigned int ebx, apicid;
438 
439         	ebx = cpuid_ebx(1);
440 	        apicid = (ebx >> 24) & 0xff;
441         	ht_num = apicid & 1;
442 	} else
443 #endif
444 		ht_num = 0;
445 
446 	/* performance counters are shared resources
447 	 * assign each hyperthread its own set
448 	 * (re-use the ESCR0 register, seems safe
449 	 * and keeps the cccr_val the same)
450 	 */
451 	if (!ht_num) {
452 		/* logical cpu 0 */
453 		perfctr_msr = MSR_P4_IQ_PERFCTR0;
454 		evntsel_msr = MSR_P4_CRU_ESCR0;
455 		cccr_msr = MSR_P4_IQ_CCCR0;
456 		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
457 	} else {
458 		/* logical cpu 1 */
459 		perfctr_msr = MSR_P4_IQ_PERFCTR1;
460 		evntsel_msr = MSR_P4_CRU_ESCR0;
461 		cccr_msr = MSR_P4_IQ_CCCR1;
462 		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
463 	}
464 
465 	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
466 	 	| P4_ESCR_OS
467 		| P4_ESCR_USR;
468 
469 	cccr_val |= P4_CCCR_THRESHOLD(15)
470 		 | P4_CCCR_COMPLEMENT
471 		 | P4_CCCR_COMPARE
472 		 | P4_CCCR_REQUIRED;
473 
474 	wrmsr(evntsel_msr, evntsel, 0);
475 	wrmsr(cccr_msr, cccr_val, 0);
476 	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
477 	apic_write(APIC_LVTPC, APIC_DM_NMI);
478 	cccr_val |= P4_CCCR_ENABLE;
479 	wrmsr(cccr_msr, cccr_val, 0);
480 	wd->perfctr_msr = perfctr_msr;
481 	wd->evntsel_msr = evntsel_msr;
482 	wd->cccr_msr = cccr_msr;
483 	return 1;
484 }
485 
486 static void stop_p4_watchdog(void)
487 {
488 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
489 	wrmsr(wd->cccr_msr, 0, 0);
490 	wrmsr(wd->evntsel_msr, 0, 0);
491 }
492 
493 static int p4_reserve(void)
494 {
495 	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
496 		return 0;
497 #ifdef CONFIG_SMP
498 	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
499 		goto fail1;
500 #endif
501 	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
502 		goto fail2;
503 	/* RED-PEN why is ESCR1 not reserved here? */
504 	return 1;
505  fail2:
506 #ifdef CONFIG_SMP
507 	if (smp_num_siblings > 1)
508 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
509  fail1:
510 #endif
511 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
512 	return 0;
513 }
514 
515 static void p4_unreserve(void)
516 {
517 #ifdef CONFIG_SMP
518 	if (smp_num_siblings > 1)
519 		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
520 #endif
521 	release_evntsel_nmi(MSR_P4_CRU_ESCR0);
522 	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
523 }
524 
525 static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
526 {
527 	unsigned dummy;
528 	/*
529  	 * P4 quirks:
530 	 * - An overflown perfctr will assert its interrupt
531 	 *   until the OVF flag in its CCCR is cleared.
532 	 * - LVTPC is masked on interrupt and must be
533 	 *   unmasked by the LVTPC handler.
534 	 */
535 	rdmsrl(wd->cccr_msr, dummy);
536 	dummy &= ~P4_CCCR_OVF;
537 	wrmsrl(wd->cccr_msr, dummy);
538 	apic_write(APIC_LVTPC, APIC_DM_NMI);
539 	/* start the cycle over again */
540 	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
541 }
542 
543 static const struct wd_ops p4_wd_ops = {
544 	.reserve = p4_reserve,
545 	.unreserve = p4_unreserve,
546 	.setup = setup_p4_watchdog,
547 	.rearm = p4_rearm,
548 	.stop = stop_p4_watchdog,
549 	/* RED-PEN this is wrong for the other sibling */
550 	.perfctr = MSR_P4_BPU_PERFCTR0,
551 	.evntsel = MSR_P4_BSU_ESCR0,
552 	.checkbit = 1ULL<<39,
553 };
554 
555 /* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
556    all future Intel CPUs. */
557 
558 #define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
559 #define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
560 
561 static struct wd_ops intel_arch_wd_ops;
562 
563 static int setup_intel_arch_watchdog(unsigned nmi_hz)
564 {
565 	unsigned int ebx;
566 	union cpuid10_eax eax;
567 	unsigned int unused;
568 	unsigned int perfctr_msr, evntsel_msr;
569 	unsigned int evntsel;
570 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
571 
572 	/*
573 	 * Check whether the Architectural PerfMon supports
574 	 * Unhalted Core Cycles Event or not.
575 	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
576 	 */
577 	cpuid(10, &(eax.full), &ebx, &unused, &unused);
578 	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
579 	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
580 		return 0;
581 
582 	perfctr_msr = wd_ops->perfctr;
583 	evntsel_msr = wd_ops->evntsel;
584 
585 	wrmsrl(perfctr_msr, 0UL);
586 
587 	evntsel = ARCH_PERFMON_EVENTSEL_INT
588 		| ARCH_PERFMON_EVENTSEL_OS
589 		| ARCH_PERFMON_EVENTSEL_USR
590 		| ARCH_PERFMON_NMI_EVENT_SEL
591 		| ARCH_PERFMON_NMI_EVENT_UMASK;
592 
593 	/* setup the timer */
594 	wrmsr(evntsel_msr, evntsel, 0);
595 	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
596 	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
597 	apic_write(APIC_LVTPC, APIC_DM_NMI);
598 	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
599 	wrmsr(evntsel_msr, evntsel, 0);
600 
601 	wd->perfctr_msr = perfctr_msr;
602 	wd->evntsel_msr = evntsel_msr;
603 	wd->cccr_msr = 0;  //unused
604 	intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
605 	return 1;
606 }
607 
608 static struct wd_ops intel_arch_wd_ops __read_mostly = {
609 	.reserve = single_msr_reserve,
610 	.unreserve = single_msr_unreserve,
611 	.setup = setup_intel_arch_watchdog,
612 	.rearm = p6_rearm,
613 	.stop = single_msr_stop_watchdog,
614 	.perfctr = MSR_ARCH_PERFMON_PERFCTR1,
615 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
616 };
617 
618 static struct wd_ops coreduo_wd_ops = {
619 	.reserve = single_msr_reserve,
620 	.unreserve = single_msr_unreserve,
621 	.setup = setup_intel_arch_watchdog,
622 	.rearm = p6_rearm,
623 	.stop = single_msr_stop_watchdog,
624 	.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
625 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
626 };
627 
628 static void probe_nmi_watchdog(void)
629 {
630 	switch (boot_cpu_data.x86_vendor) {
631 	case X86_VENDOR_AMD:
632 		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
633 		    boot_cpu_data.x86 != 16)
634 			return;
635 		wd_ops = &k7_wd_ops;
636 		break;
637 	case X86_VENDOR_INTEL:
638 		/* Work around Core Duo (Yonah) errata AE49 where perfctr1
639 		   doesn't have a working enable bit. */
640 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
641 			wd_ops = &coreduo_wd_ops;
642 			break;
643 		}
644 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
645 			wd_ops = &intel_arch_wd_ops;
646 			break;
647 		}
648 		switch (boot_cpu_data.x86) {
649 		case 6:
650 			if (boot_cpu_data.x86_model > 0xd)
651 				return;
652 
653 			wd_ops = &p6_wd_ops;
654 			break;
655 		case 15:
656 			if (boot_cpu_data.x86_model > 0x4)
657 				return;
658 
659 			wd_ops = &p4_wd_ops;
660 			break;
661 		default:
662 			return;
663 		}
664 		break;
665 	}
666 }
667 
668 /* Interface to nmi.c */
669 
670 int lapic_watchdog_init(unsigned nmi_hz)
671 {
672 	if (!wd_ops) {
673 		probe_nmi_watchdog();
674 		if (!wd_ops)
675 			return -1;
676 
677 		if (!wd_ops->reserve()) {
678 			printk(KERN_ERR
679 				"NMI watchdog: cannot reserve perfctrs\n");
680 			return -1;
681 		}
682 	}
683 
684 	if (!(wd_ops->setup(nmi_hz))) {
685 		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
686 		       raw_smp_processor_id());
687 		return -1;
688 	}
689 
690 	return 0;
691 }
692 
693 void lapic_watchdog_stop(void)
694 {
695 	if (wd_ops)
696 		wd_ops->stop();
697 }
698 
699 unsigned lapic_adjust_nmi_hz(unsigned hz)
700 {
701 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
702 	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
703 	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
704 		hz = adjust_for_32bit_ctr(hz);
705 	return hz;
706 }
707 
708 int lapic_wd_event(unsigned nmi_hz)
709 {
710 	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
711 	u64 ctr;
712 	rdmsrl(wd->perfctr_msr, ctr);
713 	if (ctr & wd_ops->checkbit) { /* perfctr still running? */
714 		return 0;
715 	}
716 	wd_ops->rearm(wd, nmi_hz);
717 	return 1;
718 }
719 
720 int lapic_watchdog_ok(void)
721 {
722 	return wd_ops != NULL;
723 }
724