xref: /freebsd/lib/libpmc/pmc.3 (revision 7afc53b8dfcc7d5897920ce6cc7e842fbb4ab813)
1.\" Copyright (c) 2003 Joseph Koshy.  All rights reserved.
2.\"
3.\" Redistribution and use in source and binary forms, with or without
4.\" modification, are permitted provided that the following conditions
5.\" are met:
6.\" 1. Redistributions of source code must retain the above copyright
7.\"    notice, this list of conditions and the following disclaimer.
8.\" 2. Redistributions in binary form must reproduce the above copyright
9.\"    notice, this list of conditions and the following disclaimer in the
10.\"    documentation and/or other materials provided with the distribution.
11.\"
12.\" This software is provided by Joseph Koshy ``as is'' and
13.\" any express or implied warranties, including, but not limited to, the
14.\" implied warranties of merchantability and fitness for a particular purpose
15.\" are disclaimed.  in no event shall Joseph Koshy be liable
16.\" for any direct, indirect, incidental, special, exemplary, or consequential
17.\" damages (including, but not limited to, procurement of substitute goods
18.\" or services; loss of use, data, or profits; or business interruption)
19.\" however caused and on any theory of liability, whether in contract, strict
20.\" liability, or tort (including negligence or otherwise) arising in any way
21.\" out of the use of this software, even if advised of the possibility of
22.\" such damage.
23.\"
24.\" $FreeBSD$
25.\"
26.Dd Apr 15, 2005
27.Os
28.Dt PMC 3
29.Sh NAME
30.Nm pmc_allocate ,
31.Nm pmc_attach ,
32.Nm pmc_capabilities ,
33.Nm pmc_configure_logfile ,
34.Nm pmc_cpuinfo ,
35.Nm pmc_detach ,
36.Nm pmc_disable ,
37.Nm pmc_enable ,
38.Nm pmc_event_names_of_class ,
39.Nm pmc_get_driver_stats ,
40.Nm pmc_init ,
41.Nm pmc_name_of_capability ,
42.Nm pmc_name_of_class ,
43.Nm pmc_name_of_cputype ,
44.Nm pmc_name_of_event ,
45.Nm pmc_name_of_mode ,
46.Nm pmc_name_of_state ,
47.Nm pmc_ncpu ,
48.Nm pmc_npmc ,
49.Nm pmc_pmcinfo ,
50.Nm pmc_read ,
51.Nm pmc_release ,
52.Nm pmc_rw ,
53.Nm pmc_set ,
54.Nm pmc_start ,
55.Nm pmc_stop ,
56.Nm pmc_write ,
57.Nm pmc_width ,
58.Nm pmc_x86_get_msr
59.Nd programming API for using hardware performance monitoring counters
60.Sh LIBRARY
61.Lb libpmc
62.Sh SYNOPSIS
63.In pmc.h
64.Ft int
65.Fo pmc_allocate
66.Fa "const char *eventspecifier"
67.Fa "enum pmc_mode mode"
68.Fa "uint32_t flags"
69.Fa "uint32_t cpu"
70.Fa "pmc_id_t *pmcid"
71.Fc
72.Ft int
73.Fo pmc_attach
74.Fa "pmc_id_t pmcid"
75.Fa "pid_t pid"
76.Fc
77.Ft int
78.Fn pmc_capabilities "pmc_id_t pmc" "uint32_t *caps"
79.Ft int
80.Fn pmc_configure_logfile "int fd"
81.Ft int
82.Fn pmc_cpuinfo "const struct pmc_op_getcpuinfo **cpu_info"
83.Ft int
84.Fo pmc_detach
85.Fa "pmc_id_t pmcid"
86.Fa "pid_t pid"
87.Fc
88.Ft int
89.Fn pmc_disable "uint32_t cpu" "int pmc"
90.Ft int
91.Fn pmc_enable "uint32_t cpu" "int pmc"
92.Ft int
93.Fo pmc_event_names_of_class
94.Fa "enum pmc_class cl"
95.Fa "const char ***eventnames"
96.Fa "int *nevents"
97.Fc
98.Ft int
99.Fn pmc_get_driver_stats "struct pmc_op_getdriverstats *gms"
100.Ft int
101.Fn pmc_init "void"
102.Ft "const char *"
103.Fn pmc_name_of_capability "enum pmc_caps pc"
104.Ft "const char *"
105.Fn pmc_name_of_class "enum pmc_class pc"
106.Ft "const char *"
107.Fn pmc_name_of_cputype "enum pmc_cputype ct"
108.Ft "const char *"
109.Fn pmc_name_of_disposition "enum pmc_disp pd"
110.Ft "const char *"
111.Fn pmc_name_of_event "enum pmc_event pe"
112.Ft "const char *"
113.Fn pmc_name_of_mode "enum pmc_mode pm"
114.Ft "const char *"
115.Fn pmc_name_of_state "enum pmc_state ps"
116.Ft int
117.Fn pmc_ncpu "void"
118.Ft int
119.Fn pmc_npmc "uint32_t cpu"
120.Ft int
121.Fn pmc_pmcinfo "uint32_t cpu" "struct pmc_op_getpmcinfo **pmc_info"
122.Ft int
123.Fn pmc_read "pmc_id_t pmc" "pmc_value_t *value"
124.Ft int
125.Fn pmc_release "pmc_id_t pmc"
126.Ft int
127.Fn pmc_rw "pmc_id_t pmc" "pmc_value_t newvalue" "pmc_value_t *oldvaluep"
128.Ft int
129.Fn pmc_set "pmc_id_t pmc" "pmc_value_t value"
130.Ft int
131.Fn pmc_start "pmc_id_t pmc"
132.Ft int
133.Fn pmc_stop "pmc_id_t pmc"
134.Ft int
135.Fn pmc_write "pmc_id_t pmc" "pmc_value_t value"
136.Ft int
137.Fn pmc_width "pmc_id_t pmc" "uint32_t *width"
138.Ft int
139.Fn pmc_x86_get_msr "int pmc" "uint32_t *msr"
140.Sh DESCRIPTION
141These functions implement a high-level library for using the
142system's hardware performance counters.
143.Pp
144PMCs are allocated using
145.Fn pmc_allocate ,
146released using
147.Fn pmc_release
148and read using
149.Fn pmc_read .
150Allocated PMCs may be started or stopped at any time using
151.Fn pmc_start
152and
153.Fn pmc_stop
154respectively.
155An allocated PMC may be of
156.Qq global
157scope, meaning that the PMC measures system-wide events, or
158.Qq process-private
159scope, meaning that the PMC only counts hardware events when
160the allocating process (or, optionally, its children)
161are active.
162.Pp
163PMCs may further be in
164.Qq "counting mode" ,
165or in
166.Qq "sampling mode" .
167Sampling mode PMCs deliver an interrupt to the CPU after
168a configured number of hardware events have been seen.
169A process-private sampling mode PMC will cause its owner
170process to get periodic
171.Sy SIGPROF
172interrupts, while a global sampling mode PMC is used to
173do system-wide statistical sampling (see
174.Xr hwpmc 4 ) .
175The sampling rate desired of a sampling-mode PMC is set using
176.Fn pmc_set .
177Counting mode PMCs do not interrupt the CPU; their values
178can be read using
179.Fn pmc_read .
180.Pp
181System-wide statistical sampling is configured by allocating
182at least one sampling mode PMC with
183global scope, and when a log file is configured using
184.Fn pmc_configure_logfile .
185The
186.Xr hwpmc 4
187driver manages system-wide statistical sampling; for more
188information please see
189.Xr hwpmc 4 .
190.Ss APPLICATION PROGRAMMING INTERFACE
191.Fn pmc_init
192initializes the
193.Xr pmc 3
194library.
195This function must be called first, before any of the other
196functions in the library.
197.Pp
198.Fn pmc_allocate
199allocates a counter that counts the events named by
200.Fa eventspecifier ,
201and writes the allocated counter id to
202.Fa *pmcid .
203Argument
204.Fa eventspecifier
205comprises an PMC event name followed by an optional comma separated
206list of keywords and qualifiers.
207The allowed syntax for
208.Fa eventspecifier
209is processor architecture specific and is listed in section
210.Sx "EVENT SPECIFIERS"
211below.
212The desired PMC mode is specified by
213.Fa mode ,
214and any mode specific modifiers are specified using
215.Fa flags .
216The
217.Fa cpu
218argument is the value
219.Li PMC_CPU_ANY ,
220or names the cpu the allocation is to be on.
221Requesting a specific CPU makes only makes sense for global PMCs;
222process-private PMC allocations should always specify
223.Li PMC_CPU_ANY .
224.Pp
225By default a PMC configured in process-virtual counting mode is setup
226to profile its owner process.
227The function
228.Fn pmc_attach
229may be used to attach the PMC to a different process.
230.Fn pmc_attach
231needs to be called before the counter is first started
232with
233.Fn pmc_start .
234The function
235.Fn pmc_detach
236may be used to detach a PMC from a process it was attached to
237using a prior call to
238.Fn pmc_attach .
239.Pp
240.Fn pmc_release
241releases a PMC previously allocated with
242.Fn pmc_allocate .
243This function call implicitly detaches the PMC from all its target
244processes.
245.Pp
246An allocated PMC may be started and stopped using
247.Fn pmc_start
248and
249.Fn pmc_stop
250respectively.
251.Pp
252The current value of a PMC may be read with
253.Fn pmc_read
254and written using
255.Fn pmc_write ,
256provided the underlying hardware supports these operations on
257the allocated PMC.
258The read and write operation may be combined using
259.Fn pmc_rw .
260.Pp
261The function
262.Fn pmc_capabilities
263sets argument
264.Fa caps
265to a bitmask of capabilities supported by the PMC denoted by
266argument
267.Fa pmc .
268The function
269.Fn pmc_width
270sets argument
271.Fa width
272to the width of the PMC denoted by argument
273.Fa pmc .
274.Pp
275The
276.Fn pmc_configure_logfile
277function causes the
278.Xr hwpmc 4
279driver to log system wide performance data to file corresponding
280to the process' file handle
281.Fa fd .
282.Pp
283.Fn pmc_set
284configures an sampling PMC
285.Fa pmc
286to interrupt every
287.Fa value
288events.
289For counting PMCs,
290.Fn pmc_set
291sets the initial value of the PMC to
292.Fa value .
293.Pp
294.Fn pmc_get_driver_statistics
295copies a snapshot of the usage statistics maintained by
296.Xr hwpmc 4
297into the memory area pointed to be argument
298.Fa gms .
299.Ss SIGNAL HANDLING REQUIREMENTS
300Applications using PMCs are required to handle the following signals:
301.Bl -tag -width indent
302.It SIGBUS
303When the
304.Xr hwpmc 4
305module is unloaded using
306.Xr kldunload 8 ,
307processes that have PMCs allocated to them will be sent a
308SIGBUS signal.
309.It SIGIO
310Attempting to read a PMC that is not currently attached to a running
311process will cause a SIGIO signal to be sent to the reader.
312.El
313.Ss CONVENIENCE FUNCTIONS
314.Fn pmc_ncpu
315returns the number of CPUs present in the system.
316.Pp
317.Fn pmc_npmc
318returns the number of PMCs supported on CPU
319.Fa cpu .
320.Fn pmc_cpuinfo
321sets argument
322.Fa cpu_info
323to point to a structure with information about the system's CPUs.
324.Fn pmc_pmcinfo
325returns information about the current state of CPU
326.Fa cpu Ap s
327PMCs.
328.Pp
329The functions
330.Fn pmc_name_of_capability ,
331.Fn pmc_name_of_class ,
332.Fn pmc_name_of_cputype ,
333.Fn pmc_name_of_disposition ,
334.Fn pmc_name_of_event ,
335.Fn pmc_name_of_mode
336and
337.Fn pmc_name_of_state
338are useful for code wanting to print error messages.
339They return
340.Ft "const char *"
341pointers to human-readable representations of their arguments.
342These return values should not be freed using
343.Xr free 3 .
344.Pp
345.Fn pmc_event_names_of_class
346returns a list of event names supported by a given PMC class
347.Fa cl .
348On successful return, an array of
349.Ft "const char *"
350pointers to the names of valid events supported by class
351.Fa cl
352is allocated by the library using
353.Xr malloc 3 ,
354and a pointer to this array is returned in the location pointed to by
355.Fa eventnames .
356The number of pointers allocated is returned in the location pointed
357to by
358.Fa nevents .
359.Ss ADMINISTRATION
360Individual PMCs may be enabled or disabled on a given CPU using
361.Fn pmc_enable
362and
363.Fn pmc_disable
364respectively.
365For these functions,
366.Fa cpu
367is the CPU number, and
368.Fa pmc
369is the index of the PMC to be operated on.
370Only the super-user is allowed to enable and disable PMCs.
371.Ss X86 ARCHITECTURE SPECIFIC API
372The
373.Fn pmc_x86_get_msr
374function returns the processor model specific register number
375associated with
376.Fa pmc .
377Applications may use the x86
378.Sy RDPMC
379instruction to directly read the contents of the PMC.
380.Sh EVENT SPECIFIERS
381Event specifiers are strings comprising of an event name, followed by
382optional parameters modifying the semantics of the hardware event
383being probed.
384Event names are PMC architecture dependent, but the
385.Xr hwpmc 4
386library defines machine independent aliases for commonly used
387events.
388.Ss Event Name Aliases
389Event name aliases are CPU architecture independent names for commonly
390used events.
391The following aliases are known to this version of the
392.Xr pmc 3
393library:
394.Bl -tag -width indent
395.It Li branches
396Measure the number of branches retired.
397.It Li branch-mispredicts
398Measure the number of retired branches that were mispredicted.
399.It Li cycles
400Measure processor cycles.
401This event is implemented using the processor's Time Stamp Counter
402register.
403.It Li dc-misses
404Measure the number of data cache misses.
405.It Li ic-misses
406Measure the number of instruction cache misses.
407.It Li instructions
408Measure the number of instructions retired.
409.It Li interrupts
410Measure the number of interrupts seen.
411.El
412.Ss Time Stamp Counter (TSC)
413The timestamp counter is a monontonically non-decreasing counter that
414counts processor cycles.
415.Pp
416In the i386 architecture this counter may
417be selected by requesting an event with eventspecifier
418.Ic tsc .
419The
420.Ic tsc
421event does not support any further qualifiers.
422It can only be allocated in system-wide counting mode,
423and is a read-only counter.
424Multiple processes are allowed to allocate the TSC.
425Once allocated, it may be read using the
426.Fn pmc_read
427function, or by using the RDTSC instruction.
428.Ss AMD (K7) PMCs
429These PMCs are present in the
430.Tn "AMD Athlon"
431series of CPUs and are documented in:
432.Rs
433.%B "AMD Athlon Processor x86 Code Optimization Guide"
434.%N "Publication No. 22007"
435.%D "February 2002"
436.%Q "Advanced Micronic Devices, Inc."
437.Re
438.Pp
439Event specifiers for AMD K7 PMCs can have the following optional
440qualifiers:
441.Bl -tag -width indent
442.It Li count= Ns Ar value
443Configure the counter to increment only if the number of configured
444events measured in a cycle is greater than or equal to
445.Ar value .
446.It Li edge
447Configure the counter to only count negated-to-asserted transitions
448of the conditions expressed by the other qualifiers.
449In other words, the counter will increment only once whenever a given
450condition becomes true, irrespective of the number of clocks during
451which the condition remains true.
452.It Li inv
453Invert the sense of comparision when the
454.Li count
455qualifier is present, making the counter to increment when the
456number of events per cycle is less than the value specified by
457the
458.Li count
459qualifier.
460.It Li os
461Configure the PMC to count events happening at privilege level 0.
462.It Li unitmask= Ns Ar mask
463This qualifier is used to further qualify a select few events,
464.Li k7-dc-refills-from-l2 ,
465.Li k7-dc-refills-from-system
466and
467.Li k7-dc-writebacks .
468Here
469.Ar mask
470is a string of the following characters optionally seperated by
471.Li "+"
472characters:
473.Bl -tag -width indent -compact
474.It Li m
475Count operations for lines in the
476.Dq Modified
477state.
478.It Li o
479Count operations for lines in the
480.Dq Owner
481state.
482.It Li e
483Count operations for lines in the
484.Dq Exclusive
485state.
486.It Li s
487Count operations for lines in the
488.Dq Shared
489state.
490.It Li i
491Count operations for lines in the
492.Dq Invalid
493state.
494.El
495If no
496.Ar unitmask
497qualifier is specified, the default is to count events for caches
498lines in any of the above states.
499.It Li usr
500Configure the PMC to count events occurring at privilege levels 1, 2
501or 3.
502.El
503If neither of the
504.Li os
505or
506.Li usr
507qualifiers were specified, the default is to enable both.
508.Pp
509The event specifiers support on AMD K7 PMCs are:
510.Bl -tag -width indent
511.It Li k7-dc-accesses
512Count data cache accesses.
513.It Li k7-dc-misses
514Count data cache misses.
515.It Li k7-dc-refills-from-l2 Op Li ,unitmask= Ns Ar mask
516Count data cache refills from L2 cache.
517This event may be further qualified using the
518.Li unitmask
519qualifier.
520.It Li k7-dc-refills-from-system Op Li ,unitmask= Ns Ar mask
521Count data cache refills from system memory.
522This event may be further qualified using the
523.Li unitmask
524qualifier.
525.It Li k7-dc-writebacks Op Li ,unitmask= Ns Ar mask
526Count data cache writebacks.
527This event may be further qualified using the
528.Li unitmask
529qualifier.
530.It Li k7-l1-dtlb-miss-and-l2-dtlb-hits
531Count L1 DTLB misses and L2 DTLB hits.
532.It Li k7-l1-and-l2-dtlb-misses
533Count L1 and L2 DTLB misses.
534.It Li k7-misaligned-references
535Count misaligned data references.
536.It Li k7-ic-fetches
537Count instruction cache fetches.
538.It Li k7-ic-misses
539Count instruction cache misses.
540.It Li k7-l1-itlb-misses
541Count L1 ITLB misses that are L2 ITLB hits.
542.It Li k7-l1-l2-itlb-misses
543Count L1 (and L2) ITLB misses.
544.It Li k7-retired-instructions
545Count all retired instructions.
546.It Li k7-retired-ops
547Count retired ops.
548.It Li k7-retired-branches
549Count all retired branches (conditional, unconditional, exceptions
550and interrupts).
551.It Li k7-retired-branches-mispredicted
552Count all misprediced retired branches.
553.It Li k7-retired-taken-branches
554Count retired taken branches.
555.It Li k7-retired-taken-branches-mispredicted
556Count mispredicted taken branches that were retired.
557.It Li k7-retired-far-control-transfers
558Count retired far control transfers.
559.It Li k7-retired-resync-branches
560Count retired resync branches (non control transfer branches).
561.It Li k7-interrupts-masked-cycles
562Count the number of cycles when the processor's
563.Li IF
564flag was zero.
565.It Li k7-interrupts-masked-while-pending-cycles
566Count the number of cycles interrupts were masked while pending due
567to the processor's
568.Li IF
569flag being zero.
570.It Li k7-hardware-interrupts
571Count the number of taken hardware interrupts.
572.El
573.Ss AMD (K8) PMCs
574These PMCs are present in the
575.Tn "AMD Athlon64"
576and
577.Tn "AMD Opteron"
578series of CPUs.
579They are documented in:
580.Rs
581.%B "BIOS and Kernel Developer's Guide for the AMD Athlon(tm) 64 and AMD Opteron Processors"
582.%N "Publication No. 26094"
583.%D "April 2004"
584.%Q "Advanced Micronic Devices, Inc."
585.Re
586.Pp
587Event specifiers for AMD K8 PMCs can have the following optional
588qualifiers:
589.Bl -tag -width indent
590.It Li count= Ns Ar value
591Configure the counter to increment only if the number of configured
592events measured in a cycle is greater than or equal to
593.Ar value .
594.It Li edge
595Configure the counter to only count negated-to-asserted transitions
596of the conditions expressed by the other fields.
597In other words, the counter will increment only once whenever a given
598condition becomes true, irrespective of the number of clocks during
599which the condition remains true.
600.It Li inv
601Invert the sense of comparision when the
602.Li count
603qualifier is present, making the counter to increment when the
604number of events per cycle is less than the value specified by
605the
606.Li count
607qualifier.
608.It Li mask= Ns Ar qualifier
609Many event specifiers for AMD K8 PMCs need to be additionally
610qualified using a mask qualifier.
611These additional qualifiers are event-specific and are documented
612along with their associated event specifiers below.
613.It Li os
614Configure the PMC to count events happening at privilege level 0.
615.It Li usr
616Configure the PMC to count events occurring at privilege levels 1, 2
617or 3.
618.El
619If neither of the
620.Li os
621or
622.Li usr
623qualifiers were specified, the default is to enable both.
624.Pp
625The event specifiers support on AMD K8 PMCs are:
626.Bl -tag -width indent
627.It Li k8-bu-cpu-clk-unhalted
628Count the number of clock cycles when the CPU is not in the HLT or
629STPCLK states.
630.It Li k8-bu-fill-request-l2-miss Op Li ,mask= Ns Ar qualifier
631Count fill requests that missed in the L2 cache.
632This event may be further qualified using
633.Ar qualifier ,
634which is a
635.Li + Ns - Ns
636separated set of the following keywords:
637.Bl -tag -width "XXXXXXXXXX" -compact
638.It Li dc-fill
639Count data cache fill requests.
640.It Li ic-fill
641Count instruction cache fill requests.
642.It Li tlb-reload
643Count TLB reloads.
644.El
645The default is to count all types of requests.
646.It Li k8-bu-internal-l2-request Op Li ,mask= Ns Ar qualifier
647Count internally generated requests to the L2 cache.
648This event may be further qualified using
649.Ar qualifier ,
650which is a
651.Li "+" Ns - Ns
652separated set of the following keywords:
653.Bl -tag -width "XXXXXXXXXX" -compact
654.It Li cancelled
655Count cancelled requests.
656.It Li dc-fill
657Count data cache fill requests.
658.It Li ic-fill
659Count instruction cache fill requests.
660.It Li tag-snoop
661Count tag snoop requests.
662.It Li tlb-reload
663Count TLB reloads.
664.El
665The default is to count all types of requests.
666.It Li k8-dc-access
667Count data cache accesses including microcode scratchpad accesses.
668.It Li k8-dc-copyback Op Li ,mask= Ns Ar qualifier
669Count data cache copyback operations.
670This event may be further qualified using
671.Ar qualifier ,
672which is a
673.Li "+" Ns - Ns
674separated set of the following keywords:
675.Bl -tag -width "exclusive" -compact
676.It Li exclusive
677Count operations for lines in the
678.Dq exclusive
679state.
680.It Li invalid
681Count operations for lines in the
682.Dq invalid
683state.
684.It Li modified
685Count operations for lines in the
686.Dq modified
687state.
688.It Li owner
689Count operations for lines in the
690.Dq owner
691state.
692.It Li shared
693Count operations for lines in the
694.Dq shared
695state.
696.El
697The default is to count operations for lines in all the
698above states.
699.It Li k8-dc-dcache-accesses-by-locks Op Li ,mask= Ns Ar qualifier
700Count data cache accesses by lock instructions.
701This event is only available on processors of revision C or later
702vintage.
703This event may be further qualified using
704.Ar qualifier ,
705which is a
706.Li "+" Ns - Ns
707separated set of the following keywords:
708.Bl -tag -width "exclusive" -compact
709.It Li accesses
710Count data cache accesses by lock instructions.
711.It Li misses
712Count data cache misses by lock instructions.
713.El
714The default is to count all accesses.
715.It Li k8-dc-dispatched-prefetch-instructions Op Li ,mask= Ns Ar qualifier
716Count the number of dispatched prefetch instructions.
717This event may be further qualified using
718.Ar qualifier ,
719which is a
720.Li "+" Ns - Ns
721separated set of the following keywords:
722.Bl -tag -width "exclusive" -compact
723.It Li load
724Count load operations.
725.It Li nta
726Count non-temporal operations.
727.It Li store
728Count store operations.
729.El
730The default is to count all operations.
731.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-hit
732Count L1 DTLB misses that are L2 DTLB hits.
733.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-miss
734Count L1 DTLB misses that are also misses in the L2 DTLB.
735.It Li k8-dc-microarchitectural-early-cancel-of-an-access
736Count microarchitectural early cancels of data cache accesses.
737.It Li k8-dc-microarchitectural-late-cancel-of-an-access
738Count microarchitectural late cancels of data cache accesses.
739.It Li k8-dc-misaligned-data-reference
740Count misaligned data references.
741.It Li k8-dc-miss
742Count data cache misses.
743.It Li k8-dc-one-bit-ecc-error Op Li ,mask= Ns Ar qualifier
744Count one bit ECC errors found by the scrubber.
745This event may be further qualified using
746.Ar qualifier ,
747which is a
748.Li "+" Ns - Ns
749separated set of the following keywords:
750.Bl -tag -width "piggyback" -compact
751.It Li scrubber
752Count scrubber detected errors.
753.It Li piggyback
754Count piggyback scrubber errors.
755.El
756The default is to count both kinds of errors.
757.It Li k8-dc-refill-from-l2 Op Li ,mask= Ns Ar qualifier
758Count data cache refills from L2 cache.
759This event may be further qualified using
760.Ar qualifier ,
761which is a
762.Li "+" Ns - Ns
763separated set of the following keywords:
764.Bl -tag -width "exclusive" -compact
765.It Li exclusive
766Count operations for lines in the
767.Dq exclusive
768state.
769.It Li invalid
770Count operations for lines in the
771.Dq invalid
772state.
773.It Li modified
774Count operations for lines in the
775.Dq modified
776state.
777.It Li owner
778Count operations for lines in the
779.Dq owner
780state.
781.It Li shared
782Count operations for lines in the
783.Dq shared
784state.
785.El
786The default is to count operations for lines in all the
787above states.
788.It Li k8-dc-refill-from-system Op Li ,mask= Ns Ar qualifier
789Count data cache refills from system memory.
790This event may be further qualified using
791.Ar qualifier ,
792which is a
793.Li "+" Ns - Ns
794separated set of the following keywords:
795.Bl -tag -width "exclusive" -compact
796.It Li exclusive
797Count operations for lines in the
798.Dq exclusive
799state.
800.It Li invalid
801Count operations for lines in the
802.Dq invalid
803state.
804.It Li modified
805Count operations for lines in the
806.Dq modified
807state.
808.It Li owner
809Count operations for lines in the
810.Dq owner
811state.
812.It Li shared
813Count operations for lines in the
814.Dq shared
815state.
816.El
817The default is to count operations for lines in all the
818above states.
819.It Li k8-fp-dispatched-fpu-ops Op Li ,mask= Ns Ar qualifier
820Count the number of dispatched FPU ops.
821This event is supported in revision B and later CPUs.
822This event may be further qualified using
823.Ar qualifier ,
824which is a
825.Li "+" Ns - Ns
826separated set of the following keywords:
827.Bl -tag -width "XXXXXXXXXX" -compact
828.It Li add-pipe-excluding-junk-ops
829Count add pipe ops excluding junk ops.
830.It Li add-pipe-junk-ops
831Count junk ops in the add pipe.
832.It Li multiply-pipe-excluding-junk-ops
833Count multiply pipe ops excluding junk ops.
834.It Li multiply-pipe-junk-ops
835Count junk ops in the multiply pipe.
836.It Li store-pipe-excluding-junk-ops
837Count store pipe ops excluding junk ops
838.It Li store-pipe-junk-ops
839Count junk ops in the store pipe.
840.El
841The default is to count all types of ops.
842.It Li k8-fp-cycles-with-no-fpu-ops-retired
843Count cycles when no FPU ops were retired.
844This event is supported in revision B and later CPUs.
845.It Li k8-fp-dispatched-fpu-fast-flag-ops
846Count dispatched FPU ops that use the fast flag interface.
847This event is supported in revision B and later CPUs.
848.It Li k8-fr-decoder-empty
849Count cycles when there was nothing to dispatch (i.e., the decoder
850was empty).
851.It Li k8-fr-dispatch-stalls
852Count all dispatch stalls.
853.It Li k8-fr-dispatch-stall-for-segment-load
854Count dispatch stalls for segment loads.
855.It Li k8-fr-dispatch-stall-for-serialization
856Count dispatch stalls for serialization.
857.It Li k8-fr-dispatch-stall-from-branch-abort-to-retire
858Count dispatch stalls from branch abort to retiral.
859.It Li k8-fr-dispatch-stall-when-fpu-is-full
860Count dispatch stalls when the FPU is full.
861.It Li k8-fr-dispatch-stall-when-ls-is-full
862Count dispatch stalls when the load/store unit is full.
863.It Li k8-fr-dispatch-stall-when-reorder-buffer-is-full
864Count dispatch stalls when the reorder buffer is full.
865.It Li k8-fr-dispatch-stall-when-reservation-stations-are-full
866Count dispatch stalls when reservation stations are full.
867.It Li k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet
868Count dispatch stalls when waiting for all to be quiet.
869.\" XXX What does "waiting for all to be quiet" mean?
870.It Li k8-fr-dispatch-stall-when-waiting-far-xfer-or-resync-branch-pending
871Count dispatch stalls when a far control transfer or a resync branch
872is pending.
873.It Li k8-fr-fpu-exceptions Op Li ,mask= Ns Ar qualifier
874Count FPU exceptions.
875This event is supported in revision B and later CPUs.
876This event may be further qualified using
877.Ar qualifier ,
878which is a
879.Li "+" Ns - Ns
880separated set of the following keywords:
881.Bl -tag -width "XXXXXXXXXX" -compact
882.It Li sse-and-x87-microtraps
883Count SSE and x87 microtraps.
884.It Li sse-reclass-microfaults
885Count SSE reclass microfaults
886.It Li sse-retype-microfaults
887Count SSE retype microfaults
888.It Li x87-reclass-microfaults
889Count x87 reclass microfaults.
890.El
891The default is to count all types of exceptions.
892.It Li k8-fr-interrupts-masked-cycles
893Count cycles when interrupts were masked (by CPU RFLAGS field IF was zero).
894.It Li k8-fr-interrupts-masked-while-pending-cycles
895Count cycles while interrupts were masked while pending (i.e., cycles
896when INTR was asserted while CPU RFLAGS field IF was zero).
897.It Li k8-fr-number-of-breakpoints-for-dr0
898Count the number of breakpoints for DR0.
899.It Li k8-fr-number-of-breakpoints-for-dr1
900Count the number of breakpoints for DR1.
901.It Li k8-fr-number-of-breakpoints-for-dr2
902Count the number of breakpoints for DR2.
903.It Li k8-fr-number-of-breakpoints-for-dr3
904Count the number of breakpoints for DR3.
905.It Li k8-fr-retired-branches
906Count retired branches including exceptions and interrupts.
907.It Li k8-fr-retired-branches-mispredicted
908Count mispredicted retired branches.
909.It Li k8-fr-retired-far-control-transfers
910Count retired far control transfers (which are always mispredicted).
911.It Li k8-fr-retired-fastpath-double-op-instructions Op Li ,mask= Ns Ar qualifier
912Count retired fastpath double op instructions.
913This event is supported in revision B and later CPUs.
914This event may be further qualified using
915.Ar qualifier ,
916which is a
917.Li "+" Ns - Ns
918separated set of the following keywords:
919.Bl -tag -width "XXXXXXXXXXXX" -compact
920.It Li low-op-pos-0
921Count instructions with the low op in position 0.
922.It Li low-op-pos-1
923Count instructions with the low op in position 1.
924.It Li low-op-pos-2
925Count instructions with the low op in position 2.
926.El
927The default is to count all types of instructions.
928.It Li k8-fr-retired-fpu-instructions Op Li ,mask= Ns Ar qualifier
929Count retired FPU instructions.
930This event is supported in revision B and later CPUs.
931This event may be further qualified using
932.Ar qualifier ,
933which is a
934.Li "+" Ns - Ns
935separated set of the following keywords:
936.Bl -tag -width "XXXXXXXXXX" -compact
937.It Li mmx-3dnow
938Count MMX and 3DNow! instructions.
939.It Li packed-sse-sse2
940Count packed SSE and SSE2 instructions.
941.It Li scalar-sse-sse2
942Count scalar SSE and SSE2 instructions
943.It Li x87
944Count x87 instructions.
945.El
946The default is to count all types of instructions.
947.It Li k8-fr-retired-near-returns
948Count retired near returns.
949.It Li k8-fr-retired-near-returns-mispredicted
950Count mispredicted near returns.
951.It Li k8-fr-retired-resyncs
952Count retired resyncs (non-control transfer branches).
953.It Li k8-fr-retired-taken-hardware-interrupts
954Count retired taken hardware interrupts.
955.It Li k8-fr-retired-taken-branches
956Count retired taken branches.
957.It Li k8-fr-retired-taken-branches-mispredicted
958Count retired taken branches that were mispredicted.
959.It Li k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare
960Count retired taken branches that were mispredicted only due to an
961address miscompare.
962.It Li k8-fr-retired-uops
963Count retired uops.
964.It Li k8-fr-retired-x86-instructions
965Count retired x86 instructions including exceptions and interrupts.
966.It Li k8-ic-fetch
967Count instruction cache fetches.
968.It Li k8-ic-instruction-fetch-stall
969Count cycles in stalls due to instruction fetch.
970.It Li k8-ic-l1-itlb-miss-and-l2-itlb-hit
971Count L1 ITLB misses that are L2 ITLB hits.
972.It Li k8-ic-l1-itlb-miss-and-l2-itlb-miss
973Count ITLB misses that miss in both L1 and L2 ITLBs.
974.It Li k8-ic-microarchitectural-resync-by-snoop
975Count microarchitectural resyncs caused by snoops.
976.It Li k8-ic-miss
977Count instruction cache misses.
978.It Li k8-ic-refill-from-l2
979Count instruction cache refills from L2 cache.
980.It Li k8-ic-refill-from-system
981Count instruction cache refills from system memory.
982.It Li k8-ic-return-stack-hits
983Count hits to the return stack.
984.It Li k8-ic-return-stack-overflow
985Count overflows of the return stack.
986.It Li k8-ls-buffer2-full
987Count load/store buffer2 full events.
988.It Li k8-ls-locked-operation Op Li ,mask= Ns Ar qualifier
989Count locked operations.
990For revision C and later CPUs, the following qualifiers are supported:
991.Bl -tag -width "XXXXXXXXXXXXX" -compact
992.It Li cycles-in-request
993Count the number of cycles in the lock request/grant stage.
994.It Li cycles-to-complete
995Count the number of cycles a lock takes to complete once it is
996non-speculative and is the older load/store operation.
997.It Li locked-instructions
998Count the number of lock instructions executed.
999.El
1000The default is to count the number of lock instructions executed.
1001.It Li k8-ls-microarchitectural-late-cancel
1002Count microarchitectural late cancels of operations in the load/store
1003unit.
1004.It Li k8-ls-microarchitectural-resync-by-self-modifying-code
1005Count microarchitectural resyncs caused by self-modifying code.
1006.It Li k8-ls-microarchitectural-resync-by-snoop
1007Count microarchitectural resyncs caused by snoops.
1008.It Li k8-ls-retired-cflush-instructions
1009Count retired CFLUSH instructions.
1010.It Li k8-ls-retired-cpuid-instructions
1011Count retired CPUID instructions.
1012.It Li k8-ls-segment-register-load Op Li ,mask= Ns Ar qualifier
1013Count segment register loads.
1014This event may be further qualified using
1015.Ar qualifier ,
1016which is a
1017.Li "+" Ns - Ns
1018separated set of the following keywords:
1019.Bl -tag -width "XX" -compact
1020.It Li cs
1021Count CS register loads.
1022.It Li ds
1023Count DS register loads.
1024.It Li es
1025Count ES register loads.
1026.It Li fs
1027Count FS register loads.
1028.It Li gs
1029Count GS register loads.
1030.\" .It Ic hs
1031.\" Count HS register loads.
1032.\" XXX "HS" register?
1033.It Li ss
1034Count SS register loads.
1035.El
1036The default is to count all types of loads.
1037.It Li k8-nb-memory-controller-bypass-saturation Op Li ,mask= Ns Ar qualifier
1038Count memory controller bypass counter saturation events.
1039This event may be further qualified using
1040.Ar qualifier ,
1041which is a
1042.Li "+" Ns - Ns
1043separated set of the following keywords:
1044.Bl -tag -width "XXXXXXXXXX" -compact
1045.It Li dram-controller-interface-bypass
1046Count DRAM controller interface bypass.
1047.It Li dram-controller-queue-bypass
1048Count DRAM controller queue bypass.
1049.It Li memory-controller-hi-pri-bypass
1050Count memory controller high priority bypasses.
1051.It Li memory-controller-lo-pri-bypass
1052Count memory controller low priority bypasses.
1053.El
1054.It Li k8-nb-memory-controller-dram-slots-missed
1055Count memory controller DRAM command slots missed (in MemClks).
1056.It Li k8-nb-memory-controller-page-access-event Op Li ,mask= Ns Ar qualifier
1057Count memory controller page access events.
1058This event may be further qualified using
1059.Ar qualifier ,
1060which is a
1061.Li "+" Ns - Ns
1062separated set of the following keywords:
1063.Bl -tag -width "XXXXXXXXXX" -compact
1064.It Li page-conflict
1065Count page conflicts.
1066.It Li page-hit
1067Count page hits.
1068.It Li page-miss
1069Count page misses.
1070.El
1071The default is to count all types of events.
1072.It Li k8-nb-memory-controller-page-table-overflow
1073Count memory control page table overflow events.
1074.It Li k8-nb-probe-result Op Li ,mask= Ns Ar qualifier
1075Count probe events.
1076This event may be further qualified using
1077.Ar qualifier ,
1078which is a
1079.Li "+" Ns - Ns
1080separated set of the following keywords:
1081.Bl -tag -width "exclusive" -compact
1082.It Li probe-hit
1083Count all probe hits.
1084.It Li probe-hit-dirty-no-memory-cancel
1085Count probe hits without memory cancels.
1086.It Li probe-hit-dirty-with-memory-cancel
1087Count probe hits with memory cancels.
1088.It Li probe-miss
1089Count probe misses.
1090.El
1091.It Li k8-nb-sized-commands Op Li ,mask= Ns Ar qualifier
1092Count sized commands issued.
1093This event may be further qualified using
1094.Ar qualifier ,
1095which is a
1096.Li "+" Ns - Ns
1097separated set of the following keywords:
1098.Bl -tag -width "exclusive" -compact
1099.It Li nonpostwrszbyte
1100.It Li nonpostwrszdword
1101.It Li postwrszbyte
1102.It Li postwrszdword
1103.It Li rdszbyte
1104.It Li rdszdword
1105.It Li rdmodwr
1106.El
1107The default is to count all types of commands.
1108.It Li k8-nb-memory-controller-turnaround Op Li ,mask= Ns Ar qualifier
1109Count memory control turnaround events.
1110This event may be further qualified using
1111.Ar qualifier ,
1112which is a
1113.Li "+" Ns - Ns
1114separated set of the following keywords:
1115.Bl -tag -width "XXXXXXXXXX" -compact
1116.\" XXX doc is unclear whether these are cycle counts or event counts
1117.It Li dimm-turnaround
1118Count DIMM turnarounds.
1119.It Li read-to-write-turnaround
1120Count read to write turnarounds.
1121.It Li write-to-read-turnaround
1122Count write to read turnarounds.
1123.El
1124The default is to count all types of events.
1125.It Li k8-nb-ht-bus0-bandwidth Op Li ,mask= Ns Ar qualifier
1126.It Li k8-nb-ht-bus1-bandwidth Op Li ,mask= Ns Ar qualifier
1127.It Li k8-nb-ht-bus2-bandwidth Op Li ,mask= Ns Ar qualifier
1128Count events on the HyperTransport(tm) buses.
1129These events may be further qualified using
1130.Ar qualifier ,
1131which is a
1132.Li "+" Ns - Ns
1133separated set of the following keywords:
1134.Bl -tag -width "XXXXXXXXXX" -compact
1135.It Li buffer-release
1136Count buffer release messages sent.
1137.It Li command
1138Count command messages sent.
1139.It Li data
1140Count data messages sent.
1141.It Li nop
1142Count nop messages sent.
1143.El
1144The default is to count all types of messages.
1145.El
1146.Ss Intel P6 PMCS
1147Intel P6 PMCs are present in Intel
1148.Tn "Pentium Pro" ,
1149.Tn "Pentium II" ,
1150.Tn "Celeron" ,
1151.Tn "Pentium III"
1152and
1153.Tn "Pentium M"
1154processors.
1155.Pp
1156These CPUs have two counters.
1157Some events may only be used on specific counters and some events are
1158defined only on specific processor models.
1159.Pp
1160These PMCs are documented in
1161.Rs
1162.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
1163.%T "Volume 3: System Programming Guide"
1164.%N "Order Number 245472-012"
1165.%D 2003
1166.%Q "Intel Corporation"
1167.Re
1168.Pp
1169Some of these events are affected by processor errata described in
1170.Rs
1171.%B "Intel(R) Pentium(R) III Processor Specification Update"
1172.%N "Document Number: 244453-054"
1173.%D "April 2005"
1174.%Q "Intel Corporation"
1175.Re
1176.Pp
1177Event specifiers for Intel P6 PMCs can have the following common
1178qualifiers:
1179.Bl -tag -width indent
1180.It Li cmask= Ns Ar value
1181Configure the PMC to increment only if the number of configured
1182events measured in a cycle is greater than or equal to
1183.Ar value .
1184.It Li edge
1185Configure the PMC to count the number of deasserted to asserted
1186transitions of the conditions expressed by the other qualifiers.
1187If specified, the counter will increment only once whenever a
1188condition becomes true, irrespective of the number of clocks during
1189which the condition remains true.
1190.It Li inv
1191Invert the sense of comparision when the
1192.Ar cmask
1193qualifier is present, making the counter increment when the number of
1194events per cycle is less than the value specified by the
1195.Ar cmask
1196qualifier.
1197.It Li os
1198Configure the PMC to count events happening at processor privilege
1199level 0.
1200.It Li umask= Ns Ar value
1201This qualifier is used to further qualify the event selected (see
1202below).
1203.It Li usr
1204Configure the PMC to count events occurring at privilege levels 1, 2
1205or 3.
1206.El
1207If neither of the
1208.Li os
1209or
1210.Li usr
1211qualifiers are specified, the default is to enable both.
1212.Pp
1213The event specifiers supported by Intel P6 PMCs are:
1214.Bl -tag -width indent
1215.It Li p6-baclears
1216Count the number of times a static branch prediction was made by the
1217branch decoder because the BTB did not have a prediction.
1218.It Li p6-br-bac-missp-exec
1219.Pq Tn "Pentium M"
1220Count the number of branch instructions executed that where
1221mispredicted at the Front End (BAC).
1222.It Li p6-br-bogus
1223Count the number of bogus branches.
1224.It Li p6-br-call-exec
1225.Pq Tn "Pentium M"
1226Count the number of call instructions executed.
1227.It Li p6-br-call-missp-exec
1228.Pq Tn "Pentium M"
1229Count the number of call instructions executed that were mispredicted.
1230.It Li p6-br-cnd-exec
1231.Pq Tn "Pentium M"
1232Count the number of conditional branch instructions executed.
1233.It Li p6-br-cnd-missp-exec
1234.Pq Tn "Pentium M"
1235Count the number of conditional branch instructions executed that were
1236mispredicted.
1237.It Li p6-br-ind-call-exec
1238.Pq Tn "Pentium M"
1239Count the number of indirect call instructions executed.
1240.It Li p6-br-ind-exec
1241.Pq Tn "Pentium M"
1242Count the number of indirect branch instructions executed.
1243.It Li p6-br-ind-missp-exec
1244.Pq Tn "Pentium M"
1245Count the number of indirect branch instructions executed that were
1246mispredicted.
1247.It Li p6-br-inst-decoded
1248Count the number of branch instructions decoded.
1249.It Li p6-br-inst-exec
1250.Pq Tn "Pentium M"
1251Count the number of branch instructions executed but necessarily retired.
1252.It Li p6-br-inst-retired
1253Count the number of branch instructions retired.
1254.It Li p6-br-miss-pred-retired
1255Count the number of mispredicted branch instructions retired.
1256.It Li p6-br-miss-pred-taken-ret
1257Count the number of taken mispredicted branches retired.
1258.It Li p6-br-missp-exec
1259.Pq Tn "Pentium M"
1260Count the number of branch instructions executed that were
1261mispredicted at execution.
1262.It Li p6-br-ret-bac-missp-exec
1263.Pq Tn "Pentium M"
1264Count the number of return instructions executed that were
1265mispredicted at the Front End (BAC).
1266.It Li p6-br-ret-exec
1267.Pq Tn "Pentium M"
1268Count the number of return instructions executed.
1269.It Li p6-br-ret-missp-exec
1270.Pq Tn "Pentium M"
1271Count the number of return instructions executed that were
1272mispredicted at execution.
1273.It Li p6-br-taken-retired
1274Count the number of taken branches retired.
1275.It Li p6-btb-misses
1276Count the number of branches for which the BTB did not produce a
1277prediction.
1278.It Li p6-bus-bnr-drv
1279Count the number of bus clock cycles during which this processor is
1280driving the BNR# pin.
1281.It Li p6-bus-data-rcv
1282Count the number of bus clock cycles during which this processor is
1283receiving data.
1284.It Li p6-bus-drdy-clocks Op Li ,umask= Ns Ar qualifier
1285Count the number of clocks during which DRDY# is asserted.
1286An additional qualifier may be specified, and comprises one of the
1287following keywords:
1288.Bl -tag -width indent -compact
1289.It Li any
1290Count transactions generated by any agent on the bus.
1291.It Li self
1292Count transactions generated by this processor.
1293.El
1294The default is to count operations generated by this processor.
1295.It Li p6-bus-hit-drv
1296Count the number of bus clock cycles during which this processor is
1297driving the HIT# pin.
1298.It Li p6-bus-hitm-drv
1299Count the number of bus clock cycles during which this processor is
1300driving the HITM# pin.
1301.It Li p6-bus-lock-clocks Op Li ,umask= Ns Ar qualifier
1302Count the number of clocks during with LOCK# is asserted on the
1303external system bus.
1304An additional qualifier may be specified and comprises one of the following
1305keywords:
1306.Bl -tag -width indent -compact
1307.It Li any
1308Count transactions generated by any agent on the bus.
1309.It Li self
1310Count transactions generated by this processor.
1311.El
1312The default is to count operations generated by this processor.
1313.It Li p6-bus-req-outstanding
1314Count the number of bus requests outstanding in any given cycle.
1315.It Li p6-bus-snoop-stall
1316Count the number of clock cycles during which the bus is snoop stalled.
1317.It Li p6-bus-tran-any Op Li ,umask= Ns Ar qualifier
1318Count the number of completed bus transactions of any kind.
1319An additional qualifier may be specified and comprises one of the following
1320keywords:
1321.Bl -tag -width indent -compact
1322.It Li any
1323Count transactions generated by any agent on the bus.
1324.It Li self
1325Count transactions generated by this processor.
1326.El
1327The default is to count operations generated by this processor.
1328.It Li p6-bus-tran-brd Op Li ,umask= Ns Ar qualifier
1329Count the number of burst read transactions.
1330An additional qualifier may be specified and comprises one of the following
1331keywords:
1332.Bl -tag -width indent -compact
1333.It Li any
1334Count transactions generated by any agent on the bus.
1335.It Li self
1336Count transactions generated by this processor.
1337.El
1338The default is to count operations generated by this processor.
1339.It Li p6-bus-tran-burst Op Li ,umask= Ns Ar qualifier
1340Count the number of completed burst transactions.
1341An additional qualifier may be specified and comprises one of the following
1342keywords:
1343.Bl -tag -width indent -compact
1344.It Li any
1345Count transactions generated by any agent on the bus.
1346.It Li self
1347Count transactions generated by this processor.
1348.El
1349The default is to count operations generated by this processor.
1350.It Li p6-bus-tran-def Op Li ,umask= Ns Ar qualifier
1351Count the number of completed deferred transactions.
1352An additional qualifier may be specified and comprises one of the following
1353keywords:
1354.Bl -tag -width indent -compact
1355.It Li any
1356Count transactions generated by any agent on the bus.
1357.It Li self
1358Count transactions generated by this processor.
1359.El
1360The default is to count operations generated by this processor.
1361.It Li p6-bus-tran-ifetch Op Li ,umask= Ns Ar qualifier
1362Count the number of completed instruction fetch transactions.
1363An additional qualifier may be specified and comprises one of the following
1364keywords:
1365.Bl -tag -width indent -compact
1366.It Li any
1367Count transactions generated by any agent on the bus.
1368.It Li self
1369Count transactions generated by this processor.
1370.El
1371The default is to count operations generated by this processor.
1372.It Li p6-bus-tran-inval Op Li ,umask= Ns Ar qualifier
1373Count the number of completed invalidate transactions.
1374An additional qualifier may be specified and comprises one of the following
1375keywords:
1376.Bl -tag -width indent -compact
1377.It Li any
1378Count transactions generated by any agent on the bus.
1379.It Li self
1380Count transactions generated by this processor.
1381.El
1382The default is to count operations generated by this processor.
1383.It Li p6-bus-tran-mem Op Li ,umask= Ns Ar qualifier
1384Count the number of completed memory transactions.
1385An additional qualifier may be specified and comprises one of the following
1386keywords:
1387.Bl -tag -width indent -compact
1388.It Li any
1389Count transactions generated by any agent on the bus.
1390.It Li self
1391Count transactions generated by this processor.
1392.El
1393The default is to count operations generated by this processor.
1394.It Li p6-bus-tran-pwr Op Li ,umask= Ns Ar qualifier
1395Count the number of completed partial write transactions.
1396An additional qualifier may be specified and comprises one of the following
1397keywords:
1398.Bl -tag -width indent -compact
1399.It Li any
1400Count transactions generated by any agent on the bus.
1401.It Li self
1402Count transactions generated by this processor.
1403.El
1404The default is to count operations generated by this processor.
1405.It Li p6-bus-tran-rfo Op Li ,umask= Ns Ar qualifier
1406Count the number of completed read-for-ownership transactions.
1407An additional qualifier may be specified and comprises one of the following
1408keywords:
1409.Bl -tag -width indent -compact
1410.It Li any
1411Count transactions generated by any agent on the bus.
1412.It Li self
1413Count transactions generated by this processor.
1414.El
1415The default is to count operations generated by this processor.
1416.It Li p6-bus-trans-io Op Li ,umask= Ns Ar qualifier
1417Count the number of completed I/O transactions.
1418An additional qualifier may be specified and comprises one of the following
1419keywords:
1420.Bl -tag -width indent -compact
1421.It Li any
1422Count transactions generated by any agent on the bus.
1423.It Li self
1424Count transactions generated by this processor.
1425.El
1426The default is to count operations generated by this processor.
1427.It Li p6-bus-trans-p Op Li ,umask= Ns Ar qualifier
1428Count the number of completed partial transactions.
1429An additional qualifier may be specified and comprises one of the following
1430keywords:
1431.Bl -tag -width indent -compact
1432.It Li any
1433Count transactions generated by any agent on the bus.
1434.It Li self
1435Count transactions generated by this processor.
1436.El
1437The default is to count operations generated by this processor.
1438.It Li p6-bus-trans-wb Op Li ,umask= Ns Ar qualifier
1439Count the number of completed write-back transactions.
1440An additional qualifier may be specified and comprises one of the following
1441keywords:
1442.Bl -tag -width indent -compact
1443.It Li any
1444Count transactions generated by any agent on the bus.
1445.It Li self
1446Count transactions generated by this processor.
1447.El
1448The default is to count operations generated by this processor.
1449.It Li p6-cpu-clk-unhalted
1450Count the number of cycles during with the processor was not halted.
1451.Pp
1452.Pq Tn "Pentium M"
1453Count the number of cycles during with the processor was not halted
1454and not in a thermal trip.
1455.It Li p6-cycles-div-busy
1456Count the number of cycles during which the divider is busy and cannot
1457accept new divides.
1458This event is only allocated on counter 0.
1459.It Li p6-cycles-in-pending-and-masked
1460Count the number of processor cycles for which interrupts were
1461disabled and interrupts were pending.
1462.It Li p6-cycles-int-masked
1463Count the number of processor cycles for which interrupts were
1464disabled.
1465.It Li p6-data-mem-refs
1466Count all loads and all stores using any memory type, including
1467internal retries.
1468Each part of a split store is counted seperately.
1469.It Li p6-dcu-lines-in
1470Count the total lines allocated in the data cache unit.
1471.It Li p6-dcu-m-lines-in
1472Count the number of M state lines allocated in the data cache unit.
1473.It Li p6-dcu-m-lines-out
1474Count the number of M state lines evicted from the data cache unit.
1475.It Li p6-dcu-miss-outstanding
1476Count the weighted number of cycles while a data cache unit miss is
1477outstanding, incremented by the number of outstanding cache misses at
1478any time.
1479.It Li p6-div
1480Count the number of floating point multiplies.
1481This event is only allocated on counter 1.
1482.It Li p6-emon-esp-uops
1483.Pq Tn "Pentium M"
1484Count the total number of micro-ops.
1485.It Li p6-emon-est-trans Op Li ,umask= Ns Ar qualifier
1486.Pq Tn "Pentium M"
1487Count the number of
1488.Tn "Enhanced Intel SpeedStep"
1489transitions.
1490An additional qualifier may be specified, and can be one of the
1491following keywords:
1492.Bl -tag -width indent -compact
1493.It Li all
1494Count all transitions.
1495.It Li freq
1496Count only frequency transitions.
1497.El
1498The default is to count all transitions.
1499.It Li p6-emon-fused-uops-ret Op Li ,umask= Ns Ar qualifier
1500.Pq Tn "Pentium M"
1501Count the number of retired fused micro-ops.
1502An additional qualifier may be specified, and may be one of the
1503following keywords:
1504.Bl -tag -width indent -compact
1505.It Li all
1506Count all fused micro-ops.
1507.It Li loadop
1508Count only load and op micro-ops.
1509.It Li stdsta
1510Count only STD/STA micro-ops.
1511.El
1512The default is to count all fused micro-ops.
1513.It Li p6-emon-kni-comp-inst-ret
1514.Pq Tn "Pentium III"
1515Count the number of SSE computational instructions retired.
1516An additional qualifier may be specified, and comprises one of the
1517following keywords:
1518.Bl -tag -width indent -compact
1519.It Li packed-and-scalar
1520Count packed and scalar operations.
1521.It Li scalar
1522Count scalar operations only.
1523.El
1524The default is to count packed and scalar operations.
1525.It Li p6-emon-kni-inst-retired Op Li ,umask= Ns Ar qualifier
1526.Pq Tn "Pentium III"
1527Count the number of SSE instructions retired.
1528An additional qualifier may be specified, and comprises one of the
1529following keywords:
1530.Bl -tag -width indent -compact
1531.It Li packed-and-scalar
1532Count packed and scalar operations.
1533.It Li scalar
1534Count scalar operations only.
1535.El
1536The default is to count packed and scalar operations.
1537.It Li p6-emon-kni-pref-dispatched Op Li ,umask= Ns Ar qualifier
1538.Pq Tn "Pentium III"
1539Count the number of SSE prefetch or weakly ordered instructions
1540dispatched (including speculative prefetches).
1541An additional qualifier may be specified, and comprises one of the
1542following keywords:
1543.Bl -tag -width indent -compact
1544.It Li nta
1545Count non-temporal prefetches.
1546.It Li t1
1547Count prefetches to L1.
1548.It Li t2
1549Count prefetches to L2.
1550.It Li wos
1551Count weakly ordered stores.
1552.El
1553The default is to count non-temporal prefetches.
1554.It Li p6-emon-kni-pref-miss Op Li ,umask= Ns Ar qualifier
1555.Pq Tn "Pentium III"
1556Count the number of prefetch or weakly ordered instructions that miss
1557all caches.
1558An additional qualifier may be specified, and comprises one of the
1559following keywords:
1560.Bl -tag -width indent -compact
1561.It Li nta
1562Count non-temporal prefetches.
1563.It Li t1
1564Count prefetches to L1.
1565.It Li t2
1566Count prefetches to L2.
1567.It Li wos
1568Count weakly ordered stores.
1569.El
1570The default is to count non-temporal prefetches.
1571.It Li p6-emon-pref-rqsts-dn
1572.Pq Tn "Pentium M"
1573Count the number of downward prefetches issued.
1574.It Li p6-emon-pref-rqsts-up
1575.Pq Tn "Pentium M"
1576Count the number of upward prefetches issued.
1577.It Li p6-emon-simd-instr-retired
1578.Pq Tn "Pentium M"
1579Count the number of retired
1580.Tn MMX
1581instructions.
1582.It Li p6-emon-sse-sse2-comp-inst-retired Op Li ,umask= Ns Ar qualifier
1583.Pq Tn "Pentium M"
1584Count the number of computational SSE instructions retired.
1585An additional qualifier may be specified and can be one of the
1586following keywords:
1587.Bl -tag -width indent -compact
1588.It Li sse-packed-single
1589Count SSE packed-single instructions.
1590.It Li sse-scalar-single
1591Count SSE scalar-single instructions.
1592.It Li sse2-packed-double
1593Count SSE2 packed-double instructions.
1594.It Li sse2-scalar-double
1595Count SSE2 scalar-double instructions.
1596.El
1597The default is to count SSE packed-single instructions.
1598.It Li p6-emon-sse-sse2-inst-retired Op Li ,umask= Ns Ar qualifer
1599.Pp
1600.Pq Tn "Pentium M"
1601Count the number of SSE instructions retired.
1602An additional qualifier can be specified, and can be one of the
1603following keywords:
1604.Bl -tag -width indent -compact
1605.It Li sse-packed-single
1606Count SSE packed-single instructions.
1607.It Li sse-packed-single-scalar-single
1608Count SSE packed-single and scalar-single instructions.
1609.It Li sse2-packed-double
1610Count SSE2 packed-double instructions.
1611.It Li sse2-scalar-double
1612Count SSE2 scalar-double instructions.
1613.El
1614The default is to count SSE packed-single instructions.
1615.It Li p6-emon-synch-uops
1616.Pq Tn "Pentium M"
1617Count the number of sync micro-ops.
1618.It Li p6-emon-thermal-trip
1619.Pq Tn "Pentium M"
1620Count the duration or occurrences of thermal trips.
1621Use the
1622.Ar edge
1623qualifier to count occurrences of thermal trips.
1624.It Li p6-emon-unfusion
1625.Pq Tn "Pentium M"
1626Count the number of unfusion events in the reorder buffer.
1627.It Li p6-flops
1628Count the number of computational floating point operations retired.
1629This event is only allocated on counter 0.
1630.It Li p6-fp-assist
1631Count the number of floating point exceptions handled by microcode.
1632This event is only allocated on counter 1.
1633.It Li p6-fp-comps-ops-exe
1634Count the number of computation floating point operations executed.
1635This event is only allocated on counter 0.
1636.It Li p6-fp-mmx-trans Op Li ,umask= Ns Ar qualifier
1637.Pq Tn "Pentium II" , Tn "Pentium III"
1638Count the number of transitions between MMX and floating-point
1639instructions.
1640An additional qualifier may be specified, and comprises one of the
1641following keywords:
1642.Bl -tag -width indent -compact
1643.It Li mmxtofp
1644Count transitions from MMX instructions to floating-point instructions.
1645.It Li fptommx
1646Count transitions from floating-point instructions to MMX instructions.
1647.El
1648The default is to count MMX to floating-point transitions.
1649.It Li p6-hw-int-rx
1650Count the number of hardware interrupts received.
1651.It Li p6-ifu-fetch
1652Count the number of instruction fetches, both cacheable and non-cacheable.
1653.It Li p6-ifu-fetch-miss
1654Count the number of instruction fetch misses (i.e., those that produce
1655memory accesses).
1656.It Li p6-ifu-mem-stall
1657Count the number of cycles instruction fetch is stalled for any reason.
1658.It Li p6-ild-stall
1659Count the number of cycles the instruction length decoder is stalled.
1660.It Li p6-inst-decoded
1661Count the number of instructions decoded.
1662.It Li p6-inst-retired
1663Count the number of instructions retired.
1664.It Li p6-itlb-miss
1665Count the number of instruction TLB misses.
1666.It Li p6-l2-ads
1667Count the number of L2 address strobes.
1668.It Li p6-l2-dbus-busy
1669Count the number of cycles during which the L2 cache data bus was busy.
1670.It Li p6-l2-dbus-busy-rd
1671Count the number of cycles during which the L2 cache data bus was busy
1672transferring read data from L2 to the processor.
1673.It Li p6-l2-ifetch Op Li ,umask= Ns Ar qualifier
1674Count the number of L2 instruction fetches.
1675An additional qualifier may be specified and comprises a list of the following
1676keywords separated by
1677.Li "+"
1678characters:
1679.Bl -tag -width indent -compact
1680.It Li e
1681Count operations affecting E (exclusive) state lines.
1682.It Li i
1683Count operations affecting I (invalid) state lines.
1684.It Li m
1685Count operations affecting M (modified) state lines.
1686.It Li s
1687Count operations affecting S (shared) state lines.
1688.El
1689The default is to count operations affecting all (MESI) state lines.
1690.It Li p6-l2-ld Op Li ,umask= Ns Ar qualifier
1691Count the number of L2 data loads.
1692An additional qualifier may be specified and comprises a list of the following
1693keywords separated by
1694.Li "+"
1695characters:
1696.Bl -tag -width indent -compact
1697.It Li both
1698.Pq Tn "Pentium M"
1699Count both hardware-prefetched lines and non-hardware-prefetched lines.
1700.It Li e
1701Count operations affecting E (exclusive) state lines.
1702.It Li hw
1703.Pq Tn "Pentium M"
1704Count hardware-prefetched lines only.
1705.It Li i
1706Count operations affecting I (invalid) state lines.
1707.It Li m
1708Count operations affecting M (modified) state lines.
1709.It Li nonhw
1710.Pq Tn "Pentium M"
1711Exclude hardware-prefetched lines.
1712.It Li s
1713Count operations affecting S (shared) state lines.
1714.El
1715The default on processors other than
1716.Tn "Pentium M"
1717processors is to count operations affecting all (MESI) state lines.
1718The default on
1719.Tn "Pentium M"
1720processors is to count both hardware-prefetched and
1721non-hardware-prefetch operations on all (MESI) state lines.
1722.Pq Errata
1723This event is affected by processor errata E53.
1724.It Li p6-l2-lines-in Op Li ,umask= Ns Ar qualifier
1725Count the number of L2 lines allocated.
1726An additional qualifier may be specified and comprises a list of the following
1727keywords separated by
1728.Li "+"
1729characters:
1730.Bl -tag -width indent -compact
1731.It Li both
1732.Pq Tn "Pentium M"
1733Count both hardware-prefetched lines and non-hardware-prefetched lines.
1734.It Li e
1735Count operations affecting E (exclusive) state lines.
1736.It Li hw
1737.Pq Tn "Pentium M"
1738Count hardware-prefetched lines only.
1739.It Li i
1740Count operations affecting I (invalid) state lines.
1741.It Li m
1742Count operations affecting M (modified) state lines.
1743.It Li nonhw
1744.Pq Tn "Pentium M"
1745Exclude hardware-prefetched lines.
1746.It Li s
1747Count operations affecting S (shared) state lines.
1748.El
1749The default on processors other than
1750.Tn "Pentium M"
1751processors is to count operations affecting all (MESI) state lines.
1752The default on
1753.Tn "Pentium M"
1754processors is to count both hardware-prefetched and
1755non-hardware-prefetch operations on all (MESI) state lines.
1756.Pq Errata
1757This event is affected by processor errata E45.
1758.It Li p6-l2-lines-out Op Li ,umask= Ns Ar qualifier
1759Count the number of L2 lines evicted.
1760An additional qualifier may be specified and comprises a list of the following
1761keywords separated by
1762.Li "+"
1763characters:
1764.Bl -tag -width indent -compact
1765.It Li both
1766.Pq Tn "Pentium M"
1767Count both hardware-prefetched lines and non-hardware-prefetched lines.
1768.It Li e
1769Count operations affecting E (exclusive) state lines.
1770.It Li hw
1771.Pq Tn "Pentium M"
1772Count hardware-prefetched lines only.
1773.It Li i
1774Count operations affecting I (invalid) state lines.
1775.It Li m
1776Count operations affecting M (modified) state lines.
1777.It Li nonhw
1778.Pq Tn "Pentium M" only
1779Exclude hardware-prefetched lines.
1780.It Li s
1781Count operations affecting S (shared) state lines.
1782.El
1783The default on processors other than
1784.Tn "Pentium M"
1785processors is to count operations affecting all (MESI) state lines.
1786The default on
1787.Tn "Pentium M"
1788processors is to count both hardware-prefetched and
1789non-hardware-prefetch operations on all (MESI) state lines.
1790.Pq Errata
1791This event is affected by processor errata E45.
1792.It Li p6-l2-m-lines-inm
1793Count the number of modified lines allocated in L2 cache.
1794.It Li p6-l2-m-lines-outm Op Li ,umask= Ns Ar qualifier
1795Count the number of L2 M-state lines evicted.
1796.Pp
1797.Pq Tn "Pentium M"
1798On these processors an additional qualifier may be specified and
1799comprises a list of the following keywords separated by
1800.Li "+"
1801characters:
1802.Bl -tag -width indent -compact
1803.It Li both
1804Count both hardware-prefetched lines and non-hardware-prefetched lines.
1805.It Li hw
1806Count hardware-prefetched lines only.
1807.It Li nonhw
1808Exclude hardware-prefetched lines.
1809.El
1810The default is to count both hardware-prefetched and
1811non-hardware-prefetch operations.
1812.Pq Errata
1813This event is affected by processor errata E53.
1814.It Li p6-l2-rqsts Op Li ,umask= Ns Ar qualifier
1815Count the total number of L2 requests.
1816An additional qualifier may be specified and comprises a list of the following
1817keywords separated by
1818.Li "+"
1819characters:
1820.Bl -tag -width indent -compact
1821.It Li e
1822Count operations affecting E (exclusive) state lines.
1823.It Li i
1824Count operations affecting I (invalid) state lines.
1825.It Li m
1826Count operations affecting M (modified) state lines.
1827.It Li s
1828Count operations affecting S (shared) state lines.
1829.El
1830The default is to count operations affecting all (MESI) state lines.
1831.It Li p6-l2-st
1832Count the number of L2 data stores.
1833An additional qualifier may be specified and comprises a list of the following
1834keywords separated by
1835.Li "+"
1836characters:
1837.Bl -tag -width indent -compact
1838.It Li e
1839Count operations affecting E (exclusive) state lines.
1840.It Li i
1841Count operations affecting I (invalid) state lines.
1842.It Li m
1843Count operations affecting M (modified) state lines.
1844.It Li s
1845Count operations affecting S (shared) state lines.
1846.El
1847The default is to count operations affecting all (MESI) state lines.
1848.It Li p6-ld-blocks
1849Count the number of load operations delayed due to store buffer blocks.
1850.It Li p6-misalign-mem-ref
1851Count the number of misaligned data memory references (crossing a 64
1852bit boundary).
1853.It Li p6-mmx-assist
1854.Pq Tn "Pentium II" , Tn "Pentium III"
1855Count the number of MMX assists executed.
1856.It Li p6-mmx-instr-exec
1857.Pq Tn "Celeron" , Tn "Pentium II"
1858Count the number of MMX instructions executed, except MOVQ and MOVD
1859stores from register to memory.
1860.It Li p6-mmx-instr-ret
1861.Pq Tn "Pentium II"
1862Count the number of MMX instructions retired.
1863.It Li p6-mmx-instr-type-exec Op Li ,umask= Ns Ar qualifier
1864.Pq Tn "Pentium II" , Tn "Pentium III"
1865Count the number of MMX instructions executed.
1866An additional qualifier may be specified and comprises a list of
1867the following keywords separated by
1868.Li "+"
1869characters:
1870.Bl -tag -width indent -compact
1871.It Li pack
1872Count MMX pack operation instructions.
1873.It Li packed-arithmetic
1874Count MMX packed arithmetic instructions.
1875.It Li packed-logical
1876Count MMX packed logical instructions.
1877.It Li packed-multiply
1878Count MMX packed multiply instructions.
1879.It Li packed-shift
1880Count MMX packed shift instructions.
1881.It Li unpack
1882Count MMX unpack operation instructions.
1883.El
1884The default is to count all operations.
1885.It Li p6-mmx-sat-instr-exec
1886.Pq Tn "Pentium II" , Tn "Pentium III"
1887Count the number of MMX saturating instructions executed.
1888.It Li p6-mmx-uops-exec
1889.Pq Tn "Pentium II" , Tn "Pentium III"
1890Count the number of MMX micro-ops executed.
1891.It Li p6-mul
1892Count the number of floating point multiplies.
1893This event is only allocated on counter 1.
1894.It Li p6-partial-rat-stalls
1895Count the number of cycles or events for partial stalls.
1896.It Li p6-resource-stalls
1897Count the number of cycles there was a resource related stall of any kind.
1898.It Li p6-ret-seg-renames
1899.Pq Tn "Pentium II" , Tn "Pentium III"
1900Count the number of segment register rename events retired.
1901.It Li p6-sb-drains
1902Count the number of cycles the store buffer is draining.
1903.It Li p6-seg-reg-renames Op Li ,umask= Ns Ar qualifier
1904.Pq Tn "Pentium II" , Tn "Pentium III"
1905Count the number of segment register renames.
1906An additional qualifier may be specified, and comprises a list of the
1907following keywords separated by
1908.Li "+"
1909characters:
1910.Bl -tag -width indent -compact
1911.It Li ds
1912Count renames for segment register DS.
1913.It Li es
1914Count renames for segment register ES.
1915.It Li fs
1916Count renames for segment register FS.
1917.It Li gs
1918Count renames for segment register GS.
1919.El
1920The default is to count operations affecting all segment registers.
1921.It Li p6-seg-rename-stalls
1922.Pq Tn "Pentium II" , Tn "Pentium III"
1923Count the number of segment register renaming stalls.
1924An additional qualifier may be specified, and comprises a list of the
1925following keywords separated by
1926.Li "+"
1927characters:
1928.Bl -tag -width indent -compact
1929.It Li ds
1930Count stalls for segment register DS.
1931.It Li es
1932Count stalls for segment register ES.
1933.It Li fs
1934Count stalls for segment register FS.
1935.It Li gs
1936Count stalls for segment register GS.
1937.El
1938The default is to count operations affecting all the segment registers.
1939.It Li p6-segment-reg-loads
1940Count the number of segment register loads.
1941.It Li p6-uops-retired
1942Count the number of micro-ops retired.
1943.El
1944.Ss Intel P4 PMCS
1945Intel P4 PMCs are present in Intel
1946.Tn "Pentium 4"
1947and
1948.Tn Xeon
1949processors.
1950These PMCs are documented in
1951.Rs
1952.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
1953.%T "Volume 3: System Programming Guide"
1954.%N "Order Number 245472-012"
1955.%D 2003
1956.%Q "Intel Corporation"
1957.Re
1958Further information about using these PMCs may be found in
1959.Rs
1960.%B "IA-32 Intel(R) Architecture Optimization Guide"
1961.%D 2003
1962.%N "Order Number 248966-009"
1963.%Q "Intel Corporation"
1964.Re
1965Some of these events are affected by processor errata described in
1966.Rs
1967.%B "Intel(R) Pentium(R) 4 Processor Specification Update"
1968.%N "Document Number:  249199-059"
1969.%D "April 2005"
1970.%Q "Intel Corporation"
1971.Re
1972.Pp
1973Event specifiers for Intel P4 PMCs can have the following common
1974qualifiers:
1975.Bl -tag -width indent
1976.It Li active= Ns Ar choice
1977(On P4 HTT CPUs) Filter event counting based on which logical
1978processors are active.
1979The allowed values of
1980.Ar choice
1981are:
1982.Bl -tag -width indent -compact
1983.It Li any
1984Count when either logical processor is active.
1985.It Li both
1986Count when both logical processors are active.
1987.It Li none
1988Count only when neither logical processor is active.
1989.It Li single
1990Count only when one logical processor is active.
1991.El
1992The default is
1993.Li both .
1994.It Li cascade
1995Configure the PMC to cascade onto its partner.
1996The PMC for the partner must already have been allocated by the
1997current process.
1998See
1999.Sx "Cascading P4 PMCs"
2000below for more information.
2001.It Li edge
2002Configure the counter to count false to true transitions of the threshold
2003comparision output.
2004This qualifier only takes effect if a threshold qualifier has also been
2005specified.
2006.It Li complement
2007Configure the counter to increment only when the event count seen is
2008less than the threshold qualifier value specified.
2009.It Li mask= Ns Ar qualifier
2010Many event specifiers for Intel P4 PMCs need to be additionally
2011qualified using a mask qualifier.
2012The allowed syntax for these qualifiers is event specific and is
2013described along with the events.
2014.It Li os
2015Configure the PMC to count when the CPL of the processor is 0.
2016.It Li precise
2017Select precise event based sampling.
2018Precise sampling is supported by the hardware for a limited set of
2019events.
2020.It Li tag= Ns Ar value
2021Configure the PMC to tag the internal uop selected by the other
2022fields in this event specifier with value
2023.Ar value .
2024This feature is used when cascading PMCs.
2025.It Li threshold= Ns Ar value
2026Configure the PMC to increment only when the event counts seen are
2027greater than the specified threshold value
2028.Ar value .
2029.It Li usr
2030Configure the PMC to count when the CPL of the processor is 1, 2 or 3.
2031.El
2032If neither of the
2033.Li os
2034or
2035.Li usr
2036qualifiers are specified, the default is to enable both.
2037.Pp
2038On Intel Pentium 4 processors with HTT, events are
2039divided into two classes:
2040.Bl -tag -width "XXXXXXXXXX" -compact
2041.It "TS Events"
2042are those where hardware can differentiate between events
2043generated on one logical processor from those generated on the
2044other.
2045.It "TI Events"
2046are those where hardware cannot differentiate between events
2047generated by multiple logical processors in a package.
2048.El
2049Only TS events are allowed for use with process-mode PMCs on
2050Pentium-4/HTT CPUs.
2051.Pp
2052The event specifiers supported by Intel P4 PMCs are:
2053.Bl -tag -width indent
2054.It Li p4-128bit-mmx-uop Op Li ,mask= Ns Ar flags
2055.Pq "TI event"
2056Count integer SIMD SSE2 instructions that operate on 128 bit SIMD
2057operands.
2058Qualifier
2059.Ar flags
2060can take the following value (which is also the default):
2061.Bl -tag -width indent -compact
2062.It Li all
2063Count all uops operating on 128 bit SIMD integer operands in memory or
2064XMM register.
2065.El
2066If an instruction contains more than one 128 bit MMX uop, then each
2067uop will be counted.
2068.It Li p4-64bit-mmx-uop Op Li ,mask= Ns Ar flags
2069.Pq "TI event"
2070Count MMX instructions that operate on 64 bit SIMD operands.
2071Qualifier
2072.Ar flags
2073can take the following value (which is also the default):
2074.Bl -tag -width indent -compact
2075.It Li all
2076Count all uops operating on 64 bit SIMD integer operands in memory or
2077in MMX registers.
2078.El
2079If an instruction contains more than one 64 bit MMX uop, then each
2080uop will be counted.
2081.It Li p4-b2b-cycles
2082.Pq "TI event"
2083Count back-to-back bys cycles.
2084Further documentation for this event is unavailable.
2085.It Li p4-bnr
2086.Pq "TI event"
2087Count bus-not-ready conditions.
2088Further documentation for this event is unavailable.
2089.It Li p4-bpu-fetch-request Op Li ,mask= Ns Ar qualifier
2090.Pq "TS event"
2091Count instruction fetch requests qualified by additional
2092flags specified in
2093.Ar qualifier .
2094At this point only one flag is supported:
2095.Bl -tag -width indent -compact
2096.It Li tcmiss
2097Count trace cache lookup misses.
2098.El
2099The default qualifier is also
2100.Ar mask=tcmiss .
2101.It Li p4-branch-retired Op Li ,mask= Ns Ar flags
2102.Pq "TS event"
2103Counts retired branches.
2104Qualifier
2105.Ar flags
2106is a list of the following
2107.Li +
2108separated strings:
2109.Bl -tag -width indent -compact
2110.It Li mmnp
2111Count branches not-taken and predicted.
2112.It Li mmnm
2113Count branches not-taken and mis-predicted.
2114.It Li mmtp
2115Count branches taken and predicted.
2116.It Li mmtm
2117Count branches taken and mis-predicted.
2118.El
2119The default qualifier counts all four kinds of branches.
2120.It Li p4-bsq-active-entries Op Li ,mask= Ns Ar qualifier
2121.Pq "TS event"
2122Count the number of entries (clipped at 15) currently active in the
2123BSQ.
2124Qualifier
2125.Ar qualifier
2126is a
2127.Li +
2128separated set of the following flags:
2129.Bl -tag -width indent -compact
2130.It Li req-type0 , Li req-type1
2131Forms a 2-bit number used to select the request type encoding:
2132.Bl -tag -width indent -compact
2133.It Li 0
2134reads excluding read invalidate
2135.It Li 1
2136read invalidates
2137.It Li 2
2138writes other than writebacks
2139.It Li 3
2140writebacks
2141.El
2142Bit
2143.Li req-type1
2144is the MSB for this two bit number.
2145.It Li req-len0 , Li req-len1
2146Forms a two-bit number that specifies the request length encoding:
2147.Bl -tag -width indent -compact
2148.It Li 0
21490 chunks
2150.It Li 1
21511 chunk
2152.It Li 3
21538 chunks
2154.El
2155Bit
2156.Li req-len1
2157is the MSB for this two bit number.
2158.It Li req-io-type
2159Count requests that are input or output requests.
2160.It Li req-lock-type
2161Count requests that lock the bus.
2162.It Li req-lock-cache
2163Count requests that lock the cache.
2164.It Li req-split-type
2165Count requests that is a bus 8-byte chunk that is split across an
21668-byte boundary.
2167.It Li req-dem-type
2168Count requests that are demand (not prefetches) if set.
2169Count requests that are prefetches if not set.
2170.It Li req-ord-type
2171Count requests that are ordered.
2172.It Li mem-type0 , Li mem-type1 , Li mem-type2
2173Forms a 3-bit number that specifies a memory type encoding:
2174.Bl -tag -width indent -compact
2175.It Li 0
2176UC
2177.It Li 1
2178USWC
2179.It Li 4
2180WT
2181.It Li 5
2182WP
2183.It Li 6
2184WB
2185.El
2186Bit
2187.Li mem-type2
2188is the MSB of this 3-bit number.
2189.El
2190The default qualifier has all the above bits set.
2191.Pp
2192Edge triggering using the
2193.Li edge
2194qualifier should not be used with this event when counting cycles.
2195.It Li p4-bsq-allocation Op Li ,mask= Ns Ar qualifier
2196.Pq "TS event"
2197Count allocations in the bus sequence unit according to the flags
2198specified in
2199.Ar qualifier ,
2200which is a
2201.Li +
2202separated set of the following flags:
2203.Bl -tag -width indent -compact
2204.It Li req-type0 , Li req-type1
2205Forms a 2-bit number used to select the request type encoding:
2206.Bl -tag -width indent -compact
2207.It Li 0
2208reads excluding read invalidate
2209.It Li 1
2210read invalidates
2211.It Li 2
2212writes other than writebacks
2213.It Li 3
2214writebacks
2215.El
2216Bit
2217.Li req-type1
2218is the MSB for this two bit number.
2219.It Li req-len0 , Li req-len1
2220Forms a two-bit number that specifies the request length encoding:
2221.Bl -tag -width indent -compact
2222.It Li 0
22230 chunks
2224.It Li 1
22251 chunk
2226.It Li 3
22278 chunks
2228.El
2229Bit
2230.Li req-len1
2231is the MSB for this two bit number.
2232.It Li req-io-type
2233Count requests that are input or output requests.
2234.It Li req-lock-type
2235Count requests that lock the bus.
2236.It Li req-lock-cache
2237Count requests that lock the cache.
2238.It Li req-split-type
2239Count requests that is a bus 8-byte chunk that is split across an
22408-byte boundary.
2241.It Li req-dem-type
2242Count requests that are demand (not prefetches) if set.
2243Count requests that are prefetches if not set.
2244.It Li req-ord-type
2245Count requests that are ordered.
2246.It Li mem-type0 , Li mem-type1 , Li mem-type2
2247Forms a 3-bit number that specifies a memory type encoding:
2248.Bl -tag -width indent -compact
2249.It Li 0
2250UC
2251.It Li 1
2252USWC
2253.It Li 4
2254WT
2255.It Li 5
2256WP
2257.It Li 6
2258WB
2259.El
2260Bit
2261.Li mem-type2
2262is the MSB of this 3-bit number.
2263.El
2264The default qualifier has all the above bits set.
2265.Pp
2266This event is usually used along with the
2267.Li edge
2268qualifier to avoid multiple counting.
2269.It Li p4-bsq-cache-reference Op Li ,mask= Ns Ar qualifier
2270.Pq "TS event"
2271Count cache references as seen by the bus unit (2nd or 3rd level
2272cache references).
2273Qualifier
2274.Ar qualifier
2275is a
2276.Li +
2277separated list of the following keywords:
2278.Bl -tag -width indent -compact
2279.It Li rd-2ndl-hits
2280Count 2nd level cache hits in the shared state.
2281.It Li rd-2ndl-hite
2282Count 2nd level cache hits in the exclusive state.
2283.It Li rd-2ndl-hitm
2284Count 2nd level cache hits in the modified state.
2285.It Li rd-3rdl-hits
2286Count 3rd level cache hits in the shared state.
2287.It Li rd-3rdl-hite
2288Count 3rd level cache hits in the exclusive state.
2289.It Li rd-3rdl-hitm
2290Count 3rd level cache hits in the modified state.
2291.It Li rd-2ndl-miss
2292Count 2nd level cache misses.
2293.It Li rd-3rdl-miss
2294Count 3rd level cache misses.
2295.It Li wr-2ndl-miss
2296Count write-back lookups from the data access cache that miss the 2nd
2297level cache.
2298.El
2299The default is to count all the above events.
2300.It Li p4-execution-event Op Li ,mask= Ns Ar flags
2301.Pq "TS event"
2302Count the retirement of tagged uops selected through the execution
2303tagging mechanism.
2304Qualifier
2305.Ar flags
2306can contain the following strings separated by
2307.Li +
2308characters:
2309.Bl -tag -width indent -compact
2310.It Li nbogus0 , Li nbogus1 , Li nbogus2 , Li nbogus3
2311The marked uops are not bogus.
2312.It Li bogus0 , Li bogus1 , Li bogus2 , Li bogus3
2313The marked uops are bogus.
2314.El
2315This event requires additional (upstream) events to be allocated to
2316perform the desired uop tagging.
2317The default is to set all the above flags.
2318This event can be used for precise event based sampling.
2319.It Li p4-front-end-event Op Li ,mask= Ns Ar flags
2320.Pq "TS event"
2321Count the retirement of tagged uops selected through the front-end
2322tagging mechanism.
2323Qualifier
2324.Ar flags
2325can contain the following strings separated by
2326.Li +
2327characters:
2328.Bl -tag -width indent -compact
2329.It Li nbogus
2330The marked uops are not bogus.
2331.It Li bogus
2332The marked uops are bogus.
2333.El
2334This event requires additional (upstream) events to be allocated to
2335perform the desired uop tagging.
2336The default is to select both kinds of events.
2337This event can be used for precise event based sampling.
2338.It Li p4-fsb-data-activity Op Li ,mask= Ns Ar flags
2339.Pq "TI event"
2340Count each DBSY or DRDY event selected by qualifier
2341.Ar flags .
2342Qualifier
2343.Ar flags
2344is a
2345.Li +
2346separated set of the following flags:
2347.Bl -tag -width indent -compact
2348.It Li drdy-drv
2349Count when this processor is driving data onto the bus.
2350.It Li drdy-own
2351Count when this processor is reading data from the bus.
2352.It Li drdy-other
2353Count when data is on the bus but not being sampled by this processor.
2354.It Li dbsy-drv
2355Count when this processor reserves the bus for use in the next cycle
2356in order to drive data.
2357.It Li dbsy-own
2358Count when some agent reserves the bus for use in the next bus cycle
2359to drive data that this processor will sample.
2360.It Li dbsy-other
2361Count when some agent reserves the bus for use in the next bus cycle
2362to drive data that this processor will not sample.
2363.El
2364Flags
2365.Li drdy-own
2366and
2367.Li drdy-other
2368are mutually exclusive.
2369Flags
2370.Li dbsy-own
2371and
2372.Li dbsy-other
2373are mutually exclusive.
2374The default value for
2375.Ar qualifier
2376is
2377.Li drdy-drv+drdy-own+dbsy-drv+dbsy-own .
2378.It Li p4-global-power-events Op Li ,mask= Ns Ar flags
2379.Pq "TS event"
2380Count cycles during which the processor is not stopped.
2381Qualifier
2382.Ar flags
2383can take the following value (which is also the default):
2384.Bl -tag -width indent -compact
2385.It Li running
2386Count cycles when the processor is active.
2387.El
2388.It Li p4-instr-retired Op Li ,mask= Ns Ar flags
2389.Pq "TS event"
2390Count instructions retired during a clock cycle.
2391Qualifer
2392.Ar flags
2393comprises of the following strings separated by
2394.Li +
2395characters:
2396.Bl -tag -width indent -compact
2397.It Li nbogusntag
2398Count non-bogus instructions that are not tagged.
2399.It Li nbogustag
2400Count non-bogus instructions that are tagged.
2401.It Li bogusntag
2402Count bogus instructions that are not tagged.
2403.It Li bogustag
2404Count bogus instructions that are tagged.
2405.El
2406The default qualifier counts all the above kinds of instructions.
2407.It Li p4-ioq-active-entries Xo
2408.Op Li ,mask= Ns Ar qualifier
2409.Op Li ,busreqtype= Ns Ar req-type
2410.Xc
2411.Pq "TS event"
2412Count the number of entries (clipped at 15) in the IOQ that are
2413active.
2414The event masks are specified by qualifier
2415.Ar qualifier
2416and
2417.Ar req-type .
2418.Pp
2419Qualifier
2420.Ar qualifier
2421is a
2422.Li +
2423separated set of the following flags:
2424.Bl -tag -width indent -compact
2425.It Li all-read
2426Count read entries.
2427.It Li all-write
2428Count write entries.
2429.It Li mem-uc
2430Count entries accessing uncacheable memory.
2431.It Li mem-wc
2432Count entries accessing write-combining memory.
2433.It Li mem-wt
2434Count entries accessing write-through memory.
2435.It Li mem-wp
2436Count entries accessing write-protected memory
2437.It Li mem-wb
2438Count entries accessing write-back memory.
2439.It Li own
2440Count store requests driven by the processor (i.e., not by other
2441processors or by DMA).
2442.It Li other
2443Count store requests driven by other processors or by DMA.
2444.It Li prefetch
2445Include hardware and software prefetch requests in the count.
2446.El
2447The default value for
2448.Ar qualifier
2449is to enable all the above flags.
2450.Pp
2451The
2452.Ar req-type
2453qualifier is a 5-bit number can be additionally used to select a
2454specific bus request type.
2455The default is 0.
2456.Pp
2457The
2458.Li edge
2459qualifier should not be used when counting cycles with this event.
2460The exact behaviour of this event depends on the processor revision.
2461.It Li p4-ioq-allocation Xo
2462.Op Li ,mask= Ns Ar qualifier
2463.Op Li ,busreqtype= Ns Ar req-type
2464.Xc
2465.Pq "TS event"
2466Count various types of transactions on the bus matching the flags set
2467in
2468.Ar qualifier
2469and
2470.Ar req-type .
2471.Pp
2472Qualifier
2473.Ar qualifier
2474is a
2475.Li +
2476separated set of the following flags:
2477.Bl -tag -width indent -compact
2478.It Li all-read
2479Count read entries.
2480.It Li all-write
2481Count write entries.
2482.It Li mem-uc
2483Count entries accessing uncacheable memory.
2484.It Li mem-wc
2485Count entries accessing write-combining memory.
2486.It Li mem-wt
2487Count entries accessing write-through memory.
2488.It Li mem-wp
2489Count entries accessing write-protected memory
2490.It Li mem-wb
2491Count entries accessing write-back memory.
2492.It Li own
2493Count store requests driven by the processor (i.e., not by other
2494processors or by DMA).
2495.It Li other
2496Count store requests driven by other processors or by DMA.
2497.It Li prefetch
2498Include hardware and software prefetch requests in the count.
2499.El
2500The default value for
2501.Ar qualifier
2502is to enable all the above flags.
2503.Pp
2504The
2505.Ar req-type
2506qualifier is a 5-bit number can be additionally used to select a
2507specific bus request type.
2508The default is 0.
2509.Pp
2510The
2511.Li edge
2512qualifier is normally used with this event to prevent multiple
2513counting.
2514The exact behaviour of this event depends on the processor revision.
2515.It Li p4-itlb-reference Op mask= Ns Ar qualifier
2516.Pq "TS event"
2517Count translations using the intruction translation look-aside
2518buffer.
2519The
2520.Ar qualifier
2521argument is a list of the following strings separated by
2522.Li +
2523characters.
2524.Bl -tag -width indent -compact
2525.It Li hit
2526Count ITLB hits.
2527.It Li miss
2528Count ITLB misses.
2529.It Li hit-uc
2530Count uncacheable ITLB hits.
2531.El
2532If no
2533.Ar qualifier
2534is specified the default is to count all the three kinds of ITLB
2535translations.
2536.It Li p4-load-port-replay Op Li ,mask= Ns Ar qualifier
2537.Pq "TS event"
2538Count replayed events at the load port.
2539Qualifier
2540.Ar qualifier
2541can take on one value:
2542.Bl -tag -width indent -compact
2543.It Li split-ld
2544Count split loads.
2545.El
2546The default value for
2547.Ar qualifier
2548is
2549.Li split-ld .
2550.It Li p4-mispred-branch-retired Op Li ,mask= Ns Ar flags
2551.Pq "TS event"
2552Count mispredicted IA-32 branch instructions.
2553Qualifier
2554.Ar flags
2555can take the following value (which is also the default):
2556.Bl -tag -width indent -compact
2557.It Li nbogus
2558Count non-bogus retired branch instructions.
2559.El
2560.It Li p4-machine-clear Op Li ,mask= Ns Ar flags
2561.Pq "TS event"
2562Count the number of pipeline clears seen by the processor.
2563Qualifer
2564.Ar flags
2565is a list of the following strings separated by
2566.Li +
2567characters:
2568.Bl -tag -width indent -compact
2569.It Li clear
2570Count for a portion of the many cycles when the machine is being
2571cleared for any reason.
2572.It Li moclear
2573Count machine clears due to memory ordering issues.
2574.It Li smclear
2575Count machine clears due to self-modifying code.
2576.El
2577Use qualifier
2578.Li edge
2579to get a count of occurrences of machine clears.
2580The default qualifier is
2581.Li clear .
2582.It Li p4-memory-cancel Op Li ,mask= Ns Ar event-list
2583.Pq "TS event"
2584Count the cancelling of various kinds of requests in the data cache
2585address control unit of the CPU.
2586The qualifier
2587.Ar event-list
2588is a list of the following strings separated by
2589.Li "+"
2590characters:
2591.Bl -tag -width indent -compact
2592.It Li st-rb-full
2593Requests cancelled because no store request buffer was available.
2594.It Li 64k-conf
2595Requests that conflict due to 64K aliasing.
2596.El
2597If
2598.Ar event-list
2599is not specified, then the default is to count both kinds of events.
2600.It Li p4-memory-complete Op Li ,mask= Ns Ar event-list
2601.Pq "TS event"
2602Count the completion of load split, store split, uncacheable split and
2603uncacheable load operations selected by qualifier
2604.Ar event-list .
2605The qualifier
2606.Ar event-list
2607is a
2608.Li +
2609separated list of the following flags:
2610.Bl -tag -width indent -compact
2611.It Li lsc
2612Count load splits completed, excluding loads from uncacheable or
2613write-combining areas.
2614.It Li ssc
2615Count any split stores completed.
2616.El
2617The default is to count both kinds of operations.
2618.It Li p4-mob-load-replay Op Li ,mask= Ns Ar qualifier
2619.Pq "TS event"
2620Count load replays triggered by the memory order buffer.
2621Qualifier
2622.Ar qualifier
2623can be a
2624.Li +
2625separated list of the following flags:
2626.Bl -tag -width indent -compact
2627.It Li no-sta
2628Count replays because of unknown store addresses.
2629.It Li no-std
2630Count replays because of unknown store data.
2631.It Li partial-data
2632Count replays because of partially overlapped data accesses between
2633load and store operations.
2634.It Li unalgn-addr
2635Count replays because of mismatches in the lower 4 bits of load and
2636store operations.
2637.El
2638The default qualifier is
2639.Ar no-sta+no-std+partial-data+unalgn-addr .
2640.It Li p4-packed-dp-uop Op Li ,mask= Ns Ar flags
2641.Pq "TI event"
2642Count packed double-precision uops.
2643Qualifier
2644.Ar flags
2645can take the following value (which is also the default):
2646.Bl -tag -width indent -compact
2647.It Li all
2648Count all uops operating on packed double-precision operands.
2649.El
2650.It Li p4-packed-sp-uop Op Li ,mask= Ns Ar flags
2651.Pq "TI event"
2652Count packed single-precision uops.
2653Qualifier
2654.Ar flags
2655can take the following value (which is also the default):
2656.Bl -tag -width indent -compact
2657.It Li all
2658Count all uops operating on packed single-precision operands.
2659.El
2660.It Li p4-page-walk-type Op Li ,mask= Ns Ar qualifier
2661.Pq "TI event"
2662Count page walks performed by the page miss handler.
2663Qualifier
2664.Ar qualifier
2665can be a
2666.Li +
2667separated list of the following keywords:
2668.Bl -tag -width indent -compact
2669.It Li dtmiss
2670Count page walks for data TLB misses.
2671.It Li itmiss
2672Count page walks for instruction TLB misses.
2673.El
2674The default value for
2675.Ar qualifier
2676is
2677.Li dtmiss+itmiss .
2678.It Li p4-replay-event Op Li ,mask= Ns Ar flags
2679.Pq "TS event"
2680Count the retirement of tagged uops selected through the replay
2681tagging mechanism.
2682Qualifier
2683.Ar flags
2684contains a
2685.Li +
2686separated set of the following strings:
2687.Bl -tag -width indent -compact
2688.It Li nbogus
2689The marked uops are not bogus.
2690.It Li bogus
2691The marked uops are bogus.
2692.El
2693This event requires additional (upstream) events to be allocated to
2694perform the desired uop tagging.
2695The default qualifier counts both kinds of uops.
2696This event can be used for precise event based sampling.
2697.It Li p4-resource-stall Op Li ,mask= Ns Ar flags
2698.Pq "TS event"
2699Count the occurrence or latency of stalls in the allocator.
2700Qualifier
2701.Ar flags
2702can take the following value (which is also the default):
2703.Bl -tag -width indent -compact
2704.It Li sbfull
2705A stall due to the lack of store buffers.
2706.El
2707.It Li p4-response
2708.Pq "TI event"
2709Count different types of responses.
2710Further documentation on this event is not available.
2711.It Li p4-retired-branch-type Op Li ,mask= Ns Ar flags
2712.Pq "TS event"
2713Count branches retired.
2714Qualifier
2715.Ar flags
2716contains a
2717.Li +
2718separated list of strings:
2719.Bl -tag -width indent -compact
2720.It Li conditional
2721Count conditional jumps.
2722.It Li call
2723Count direct and indirect call branches.
2724.It Li return
2725Count return branches.
2726.It Li indirect
2727Count returns, indirect calls or indirect jumps.
2728.El
2729The default qualifier counts all the above branch types.
2730.It Li p4-retired-mispred-branch-type Op Li ,mask= Ns Ar flags
2731.Pq "TS event"
2732Count mispredicted branches retired.
2733Qualifier
2734.Ar flags
2735contains a
2736.Li +
2737separated list of strings:
2738.Bl -tag -width indent -compact
2739.It Li conditional
2740Count conditional jumps.
2741.It Li call
2742Count indirect call branches.
2743.It Li return
2744Count return branches.
2745.It Li indirect
2746Count returns, indirect calls or indirect jumps.
2747.El
2748The default qualifier counts all the above branch types.
2749.It Li p4-scalar-dp-uop Op Li ,mask= Ns Ar flags
2750.Pq "TI event"
2751Count the number of scalar double-precision uops.
2752Qualifier
2753.Ar flags
2754can take the following value (which is also the default):
2755.Bl -tag -width indent -compact
2756.It Li all
2757Count the number of scalar double-precision uops.
2758.El
2759.It Li p4-scalar-sp-uop Op Li ,mask= Ns Ar flags
2760.Pq "TI event"
2761Count the number of scalar single-precision uops.
2762Qualifier
2763.Ar flags
2764can take the following value (which is also the default):
2765.Bl -tag -width indent -compact
2766.It Li all
2767Count all uops operating on scalar single-precision operands.
2768.El
2769.It Li p4-snoop
2770.Pq "TI event"
2771Count snoop traffic.
2772Further documentation on this event is not available.
2773.It Li p4-sse-input-assist Op Li ,mask= Ns Ar flags
2774.Pq "TI event"
2775Count the number of times an assist is required to handle problems
2776with the operands for SSE and SSE2 operations.
2777Qualifier
2778.Ar flags
2779can take the following value (which is also the default):
2780.Bl -tag -width indent -compact
2781.It Li all
2782Count assists for all SSE and SSE2 uops.
2783.El
2784.It Li p4-store-port-replay Op Li ,mask= Ns Ar qualifier
2785.Pq "TS event"
2786Count events replayed at the store port.
2787Qualifier
2788.Ar qualifier
2789can take on one value:
2790.Bl -tag -width indent -compact
2791.It Li split-st
2792Count split stores.
2793.El
2794The default value for
2795.Ar qualifier
2796is
2797.Li split-st .
2798.It Li p4-tc-deliver-mode Op Li ,mask= Ns Ar qualifier
2799.Pq "TI event"
2800Count the duration in cycles of operating modes of the trace cache and
2801decode engine.
2802The desired operating mode is selected by
2803.Ar qualifier ,
2804which is a list of the following strings separated by
2805.Li "+"
2806characters:
2807.Bl -tag -width indent -compact
2808.It Li DD
2809Both logical processors are in deliver mode.
2810.It Li DB
2811Logical processor 0 is in deliver mode while logical processor 1 is in
2812build mode.
2813.It Li DI
2814Logical processor 0 is in deliver mode while logical processor 1 is
2815halted, or in machine clear, or transitioning to a long microcode
2816flow.
2817.It Li BD
2818Logical processor 0 is in build mode while logical processor 1 is in
2819deliver mode.
2820.It Li BB
2821Both logical processors are in build mode.
2822.It Li BI
2823Logical processor 0 is in build mode while logical processor 1 is
2824halted, or in machine clear or transitioning to a long microcode
2825flow.
2826.It Li ID
2827Logical processor 0 is halted, or in machine clear or transitioning to
2828a long microcode flow while logical processor 1 is in deliver mode.
2829.It Li IB
2830Logical processor 0 is halted, or in machine clear or transitioning to
2831a long microcode flow while logical processor 1 is in build mode.
2832.El
2833If there is only one logical processor in the processor package then
2834the qualifier for logical processor 1 is ignored.
2835If no qualifier is specified, the default qualifier is
2836.Li DD+DB+DI+BD+BB+BI+ID+IB .
2837.It Li p4-tc-ms-xfer Op Li ,mask= Ns Ar flags
2838.Pq "TI event"
2839Count the number of times uop delivery changed from the trace cache to
2840MS ROM.
2841Qualifier
2842.Ar flags
2843can take the following value (which is also the default):
2844.Bl -tag -width indent -compact
2845.It Li cisc
2846Count TC to MS transfers.
2847.El
2848.It Li p4-uop-queue-writes Op Li ,mask= Ns Ar flags
2849.Pq "TS event"
2850Count the number of valid uops written to the uop queue.
2851Qualifier
2852.Ar flags
2853is a list of the following strings, separated by
2854.Li +
2855characters:
2856.Bl -tag -width indent -compact
2857.It Li from-tc-build
2858Count uops being written from the trace cache in build mode.
2859.It Li from-tc-deliver
2860Count uops being written from the trace cache in deliver mode.
2861.It Li from-rom
2862Count uops being written from microcode ROM.
2863.El
2864The default qualifier counts all the above kinds of uops.
2865.It Li p4-uop-type Op Li ,mask= Ns Ar flags
2866.Pq "TS event"
2867This event is used in conjunction with the front-end at-retirement
2868mechanism to tag load and store uops.
2869Qualifer
2870.Ar flags
2871comprises the following strings separated by
2872.Li +
2873characters:
2874.Bl -tag -width indent -compact
2875.It Li tagloads
2876Mark uops that are load operations.
2877.It Li tagstores
2878Mark uops that are store operations.
2879.El
2880The default qualifier counts both kinds of uops.
2881.It Li p4-uops-retired Op Li ,mask= Ns Ar flags
2882.Pq "TS event"
2883Count uops retired during a clock cycle.
2884Qualifier
2885.Ar flags
2886comprises the following strings separated by
2887.Li +
2888characters:
2889.Bl -tag -width indent -compact
2890.It Li nbogus
2891Count marked uops that are not bogus.
2892.It Li bogus
2893Count marked uops that are bogus.
2894.El
2895The default qualifier counts both kinds of uops.
2896.It Li p4-wc-buffer Op Li ,mask= Ns Ar flags
2897.Pq "TI event"
2898Count write-combining buffer operations.
2899Qualifier
2900.Ar flags
2901contains the following strings separated by
2902.Li +
2903characters:
2904.Bl -tag -width indent -compact
2905.It Li wcb-evicts
2906WC buffer evictions due to any cause.
2907.It Li wcb-full-evict
2908WC buffer evictions due to no WC buffer being available.
2909.El
2910The default qualifer counts both kinds of evictions.
2911.It Li p4-x87-assist Op Li ,mask= Ns Ar flags
2912.Pq "TS event"
2913Count the retirement of x87 instructions that required special
2914handling.
2915Qualifier
2916.Ar flags
2917contains the following strings separated by
2918.Li +
2919characters:
2920.Bl -tag -width indent -compact
2921.It Li fpsu
2922Count instructions that saw an FP stack underflow.
2923.It Li fpso
2924Count instructions that saw an FP stack overflow.
2925.It Li poao
2926Count instructions that saw an x87 output overflow.
2927.It Li poau
2928Count instructions that saw an x87 output underflow.
2929.It Li prea
2930Count instructions that needed an x87 input assist.
2931.El
2932The default qualifier counts all the above types of instruction
2933retirements.
2934.It Li p4-x87-fp-uop Op Li ,mask= Ns Ar flags
2935.Pq "TI event"
2936Count x87 floating-point uops.
2937Qualifier
2938.Ar flags
2939can take the following value (which is also the default):
2940.Bl -tag -width indent -compact
2941.It Li all
2942Count all x87 floating-point uops.
2943.El
2944If an instruction contains more than one x87 floating-point uops, then
2945all x87 floating-point uops will be counted.
2946This event does not count x87 floating-point data movement operations.
2947.It Li p4-x87-simd-moves-uop Op Li ,mask= Ns Ar flags
2948.Pq "TI event"
2949Count each x87 FPU, MMX, SSE, or SSE2 uops that load data or store
2950data or perform register-to-register moves.
2951This event does not count integer move uops.
2952Qualifier
2953.Ar flags
2954may contain the following keywords separated by
2955.Li +
2956characters:
2957.Bl -tag -width indent -compact
2958.It Li allp0
2959Count all x87 and SIMD store and move uops.
2960.It Li allp2
2961Count all x87 and SIMD load uops.
2962.El
2963The default is to count all uops.
2964.Pq Errata
2965This event may be affected by processor errata N43.
2966.El
2967.Ss "Cascading P4 PMCs"
2968To be filled in.
2969.Ss "Precise Event Based Sampling"
2970To be filled in.
2971.Sh IMPLEMENTATION NOTES
2972On the i386 architecture,
2973.Fx
2974has historically allowed the use of the RDTSC instruction from
2975user-mode (i.e., at a processor CPL of 3) by any process.
2976This behaviour is preserved by
2977.Xr hwpmc 4 .
2978.Sh RETURN VALUES
2979The
2980.Fn pmc_name_of_capability ,
2981.Fn pmc_name_of_class ,
2982.Fn pmc_name_of_cputype ,
2983.Fn pmc_name_of_disposition ,
2984.Fn pmc_name_of_event ,
2985.Fn pmc_name_of_mode ,
2986and
2987.Fn pmc_name_of_state
2988functions return a pointer to the human readable form of their argument.
2989These pointers may point to statically allocated storage and must
2990not be passed to
2991.Fn free .
2992In case of an error, these functions return
2993.Li NULL
2994and set the global variable
2995.Va errno .
2996.Pp
2997The functions
2998.Fn pmc_ncpu
2999and
3000.Fn pmc_npmc
3001return the number of CPUs and number of PMCs configured respectively;
3002in case of an error they return the value
3003.Li -1
3004and set the global variable
3005.Va errno .
3006.Pp
3007All other functions return the value
3008.Li 0
3009if successful; otherwise the value
3010.Li -1
3011is returned and the global variable
3012.Va errno
3013is set to indicate the error.
3014.Sh ERRORS
3015A call to
3016.Fn pmc_init
3017may fail with the following errors in addition to those returned by
3018.Xr modfind 2 ,
3019.Xr modstat 2
3020and
3021.Xr hwpmc 4 :
3022.Bl -tag -width Er
3023.It Bq Er ENXIO
3024An unknown CPU type was encountered during initialization.
3025.It Bq Er EPROGMISMATCH
3026The version number of the
3027.Xr hwpmc 4
3028kernel module did not match that compiled into the
3029.Xr pmc 3
3030library.
3031.El
3032.Pp
3033A call to
3034.Fn pmc_capabilities ,
3035.Fn pmc_name_of_capability ,
3036.Fn pmc_name_of_disposition ,
3037.Fn pmc_name_of_state ,
3038.Fn pmc_name_of_event ,
3039.Fn pmc_name_of_mode
3040.Fn pmc_name_of_class
3041and
3042.Fn pmc_width
3043may fail with the following error:
3044.Bl -tag -width Er
3045.It Bq Er EINVAL
3046An invalid argument was passed to the function.
3047.El
3048.Pp
3049A call to
3050.Fn pmc_cpuinfo
3051or
3052.Fn pmc_ncpu
3053may fail with the following error:
3054.Bl -tag -width Er
3055.It Bq Er ENXIO
3056The
3057.Xr pmc 3
3058has not been initialized.
3059.El
3060.Pp
3061A call to
3062.Fn pmc_npmc
3063may fail with the following errors:
3064.Bl -tag -width Er
3065.It Bq Er EINVAL
3066The argument passed in was out of range.
3067.It Bq Er ENXIO
3068The
3069.Xr pmc 3
3070library has not been initialized.
3071.El
3072.Pp
3073A call to
3074.Fn pmc_pmcinfo
3075may fail with the following errors, in addition to those returned by
3076.Xr hwpmc 4 :
3077.Bl -tag -width Er
3078.It Bq Er ENXIO
3079The
3080.Xr pmc 3
3081library is not yet initialized.
3082.El
3083.Pp
3084A call to
3085.Fn pmc_allocate
3086may fail with the following errors, in addition to those returned by
3087.Xr hwpmc 4 :
3088.Bl -tag -width Er
3089.It Bq Er EINVAL
3090The
3091.Fa mode
3092argument passed in had an illegal value, or the event specification
3093.Fa ctrspec
3094was unrecognized for this cpu type.
3095.El
3096.Pp
3097Calls to
3098.Fn pmc_attach ,
3099.Fn pmc_detach ,
3100.Fn pmc_release ,
3101.Fn pmc_start ,
3102.Fn pmc_stop ,
3103.Fn pmc_read ,
3104.Fn pmc_write ,
3105.Fn pmc_rw ,
3106.Fn pmc_set ,
3107.Fn pmc_configure_logfile ,
3108.Fn pmc_get_driver_stats ,
3109.Fn pmc_enable ,
3110.Fn pmc_disable ,
3111and
3112.Fn pmc_x86_get_msr
3113may fail with the errors described in
3114.Xr hwpmc 4 .
3115.Sh SEE ALSO
3116.Xr modfind 2 ,
3117.Xr modstat 2 ,
3118.Xr hwpmc 4 ,
3119.Xr pmccontrol 8 ,
3120.Xr pmcreport 8 ,
3121.Xr pmcstat 8
3122.Sh BUGS
3123The information returned by
3124.Fn pmc_cpuinfo ,
3125.Fn pmc_ncpu
3126and possibly
3127.Fn pmc_npmc
3128should really be available all the time, through a better designed
3129interface.
3130.Pp
3131The API for
3132.Fn pmc_cpuinfo
3133and
3134.Fn pmc_pmcinfo
3135expose too much of the underlying
3136.Xr hwpmc 4
3137driver's internals to userland.
3138