1.\" Copyright (c) 2003-2005 Joseph Koshy. All rights reserved. 2.\" 3.\" Redistribution and use in source and binary forms, with or without 4.\" modification, are permitted provided that the following conditions 5.\" are met: 6.\" 1. Redistributions of source code must retain the above copyright 7.\" notice, this list of conditions and the following disclaimer. 8.\" 2. Redistributions in binary form must reproduce the above copyright 9.\" notice, this list of conditions and the following disclaimer in the 10.\" documentation and/or other materials provided with the distribution. 11.\" 12.\" This software is provided by Joseph Koshy ``as is'' and 13.\" any express or implied warranties, including, but not limited to, the 14.\" implied warranties of merchantability and fitness for a particular purpose 15.\" are disclaimed. in no event shall Joseph Koshy be liable 16.\" for any direct, indirect, incidental, special, exemplary, or consequential 17.\" damages (including, but not limited to, procurement of substitute goods 18.\" or services; loss of use, data, or profits; or business interruption) 19.\" however caused and on any theory of liability, whether in contract, strict 20.\" liability, or tort (including negligence or otherwise) arising in any way 21.\" out of the use of this software, even if advised of the possibility of 22.\" such damage. 23.\" 24.\" $FreeBSD$ 25.\" 26.Dd Apr 15, 2005 27.Os 28.Dt PMC 3 29.Sh NAME 30.Nm pmc_allocate , 31.Nm pmc_attach , 32.Nm pmc_capabilities , 33.Nm pmc_configure_logfile , 34.Nm pmc_cpuinfo , 35.Nm pmc_detach , 36.Nm pmc_disable , 37.Nm pmc_enable , 38.Nm pmc_event_names_of_class , 39.Nm pmc_flush_logfile , 40.Nm pmc_get_driver_stats , 41.Nm pmc_get_msr , 42.Nm pmc_init , 43.Nm pmc_name_of_capability , 44.Nm pmc_name_of_class , 45.Nm pmc_name_of_cputype , 46.Nm pmc_name_of_event , 47.Nm pmc_name_of_mode , 48.Nm pmc_name_of_state , 49.Nm pmc_ncpu , 50.Nm pmc_npmc , 51.Nm pmc_pmcinfo , 52.Nm pmc_read , 53.Nm pmc_release , 54.Nm pmc_rw , 55.Nm pmc_set , 56.Nm pmc_start , 57.Nm pmc_stop , 58.Nm pmc_width , 59.Nm pmc_write , 60.Nm pmc_writelog 61.Nd programming API for using hardware performance monitoring counters 62.Sh LIBRARY 63.Lb libpmc 64.Sh SYNOPSIS 65.In pmc.h 66.Ft int 67.Fo pmc_allocate 68.Fa "const char *eventspecifier" 69.Fa "enum pmc_mode mode" 70.Fa "uint32_t flags" 71.Fa "uint32_t cpu" 72.Fa "pmc_id_t *pmcid" 73.Fc 74.Ft int 75.Fo pmc_attach 76.Fa "pmc_id_t pmcid" 77.Fa "pid_t pid" 78.Fc 79.Ft int 80.Fn pmc_capabilities "pmc_id_t pmc" "uint32_t *caps" 81.Ft int 82.Fn pmc_configure_logfile "int fd" 83.Ft int 84.Fn pmc_cpuinfo "const struct pmc_cpuinfo **cpu_info" 85.Ft int 86.Fo pmc_detach 87.Fa "pmc_id_t pmcid" 88.Fa "pid_t pid" 89.Fc 90.Ft int 91.Fn pmc_disable "uint32_t cpu" "int pmc" 92.Ft int 93.Fn pmc_enable "uint32_t cpu" "int pmc" 94.Ft int 95.Fo pmc_event_names_of_class 96.Fa "enum pmc_class cl" 97.Fa "const char ***eventnames" 98.Fa "int *nevents" 99.Fc 100.Ft int 101.Fn pmc_flush_logfile "void" 102.Ft int 103.Fn pmc_get_driver_stats "struct pmc_driverstats *gms" 104.Ft int 105.Fn pmc_get_msr "pmc_id_t pmc" "uint32_t *msr" 106.Ft int 107.Fn pmc_init "void" 108.Ft "const char *" 109.Fn pmc_name_of_capability "enum pmc_caps pc" 110.Ft "const char *" 111.Fn pmc_name_of_class "enum pmc_class pc" 112.Ft "const char *" 113.Fn pmc_name_of_cputype "enum pmc_cputype ct" 114.Ft "const char *" 115.Fn pmc_name_of_disposition "enum pmc_disp pd" 116.Ft "const char *" 117.Fn pmc_name_of_event "enum pmc_event pe" 118.Ft "const char *" 119.Fn pmc_name_of_mode "enum pmc_mode pm" 120.Ft "const char *" 121.Fn pmc_name_of_state "enum pmc_state ps" 122.Ft int 123.Fn pmc_ncpu "void" 124.Ft int 125.Fn pmc_npmc "uint32_t cpu" 126.Ft int 127.Fn pmc_pmcinfo "uint32_t cpu" "struct pmc_pmcinfo **pmc_info" 128.Ft int 129.Fn pmc_read "pmc_id_t pmc" "pmc_value_t *value" 130.Ft int 131.Fn pmc_release "pmc_id_t pmc" 132.Ft int 133.Fn pmc_rw "pmc_id_t pmc" "pmc_value_t newvalue" "pmc_value_t *oldvaluep" 134.Ft int 135.Fn pmc_set "pmc_id_t pmc" "pmc_value_t value" 136.Ft int 137.Fn pmc_start "pmc_id_t pmc" 138.Ft int 139.Fn pmc_stop "pmc_id_t pmc" 140.Ft int 141.Fn pmc_write "pmc_id_t pmc" "pmc_value_t value" 142.Ft int 143.Fn pmc_writelog "uint32_t userdata" 144.Ft int 145.Fn pmc_width "pmc_id_t pmc" "uint32_t *width" 146.Sh DESCRIPTION 147These functions implement a high-level library for using the 148system's hardware performance counters. 149.Pp 150PMCs are allocated using 151.Fn pmc_allocate , 152released using 153.Fn pmc_release 154and read using 155.Fn pmc_read . 156Allocated PMCs may be started or stopped at any time using 157.Fn pmc_start 158and 159.Fn pmc_stop 160respectively. 161An allocated PMC may be of 162.Qq global 163scope, meaning that the PMC measures system-wide events, or 164.Qq process-private 165scope, meaning that the PMC only counts hardware events when 166the allocating process (or, optionally, its children) 167are active. 168.Pp 169PMCs may further be in 170.Qq "counting mode" , 171or in 172.Qq "sampling mode" . 173Sampling mode PMCs deliver an interrupt to the CPU after 174a configured number of hardware events have been seen. 175A process-private sampling mode PMC will cause its owner 176process to get periodic 177.Sy SIGPROF 178interrupts, while a global sampling mode PMC is used to 179do system-wide statistical sampling (see 180.Xr hwpmc 4 ) . 181The sampling rate desired of a sampling-mode PMC is set using 182.Fn pmc_set . 183Counting mode PMCs do not interrupt the CPU; their values 184can be read using 185.Fn pmc_read . 186.Pp 187System-wide statistical sampling is configured by allocating 188at least one sampling mode PMC with 189global scope, and when a log file is configured using 190.Fn pmc_configure_logfile . 191The 192.Xr hwpmc 4 193driver manages system-wide statistical sampling; for more 194information please see 195.Xr hwpmc 4 . 196.Ss APPLICATION PROGRAMMING INTERFACE 197.Fn pmc_init 198initializes the 199.Xr pmc 3 200library. 201This function must be called first, before any of the other 202functions in the library. 203.Pp 204.Fn pmc_allocate 205allocates a counter that counts the events named by 206.Fa eventspecifier , 207and writes the allocated counter id to 208.Fa *pmcid . 209Argument 210.Fa eventspecifier 211comprises an PMC event name followed by an optional comma separated 212list of keywords and qualifiers. 213The allowed syntax for 214.Fa eventspecifier 215is processor architecture specific and is listed in section 216.Sx "EVENT SPECIFIERS" 217below. 218The desired PMC mode is specified by 219.Fa mode , 220and any mode specific modifiers are specified using 221.Fa flags . 222The 223.Fa cpu 224argument is the value 225.Li PMC_CPU_ANY , 226or names the cpu the allocation is to be on. 227Requesting a specific CPU makes only makes sense for global PMCs; 228process-private PMC allocations should always specify 229.Li PMC_CPU_ANY . 230.Pp 231By default a PMC configured in process-virtual counting mode is setup 232to profile its owner process. 233The function 234.Fn pmc_attach 235may be used to attach the PMC to a different process. 236.Fn pmc_attach 237needs to be called before the counter is first started 238with 239.Fn pmc_start . 240The function 241.Fn pmc_detach 242may be used to detach a PMC from a process it was attached to 243using a prior call to 244.Fn pmc_attach . 245.Pp 246.Fn pmc_release 247releases a PMC previously allocated with 248.Fn pmc_allocate . 249This function call implicitly detaches the PMC from all its target 250processes. 251.Pp 252An allocated PMC may be started and stopped using 253.Fn pmc_start 254and 255.Fn pmc_stop 256respectively. 257.Pp 258The current value of a PMC may be read with 259.Fn pmc_read 260and written using 261.Fn pmc_write , 262provided the underlying hardware supports these operations on 263the allocated PMC. 264The read and write operation may be combined using 265.Fn pmc_rw . 266.Pp 267The function 268.Fn pmc_capabilities 269sets argument 270.Fa caps 271to a bitmask of capabilities supported by the PMC denoted by 272argument 273.Fa pmc . 274The function 275.Fn pmc_width 276sets argument 277.Fa width 278to the width of the PMC denoted by argument 279.Fa pmc . 280.Pp 281The 282.Fn pmc_configure_logfile 283function causes the 284.Xr hwpmc 4 285driver to log performance data to file corresponding 286to the process' file handle 287.Fa fd . 288If argument 289.Fa fd 290is -1, then any previously configured logging is reset 291and all data queued to be written are discarded. 292.Pp 293The 294.Fn pmc_flush_logfile 295function will send all data queued inside the 296.Xr hwpmc 4 297driver to the configured log file before returning. 298The 299.Fn pmc_writelog 300function will append a log entry containing the argument 301.Fa userdata 302to the log file. 303.Pp 304.Fn pmc_set 305configures an sampling PMC 306.Fa pmc 307to interrupt every 308.Fa value 309events. 310For counting PMCs, 311.Fn pmc_set 312sets the initial value of the PMC to 313.Fa value . 314.Pp 315.Fn pmc_get_driver_statistics 316copies a snapshot of the usage statistics maintained by 317.Xr hwpmc 4 318into the memory area pointed to be argument 319.Fa gms . 320.Ss SIGNAL HANDLING REQUIREMENTS 321Applications using PMCs are required to handle the following signals: 322.Bl -tag -width indent 323.It SIGBUS 324When the 325.Xr hwpmc 4 326module is unloaded using 327.Xr kldunload 8 , 328processes that have PMCs allocated to them will be sent a 329SIGBUS signal. 330.It SIGIO 331The 332.Xr hwpmc 4 333driver will send a PMC owning process a SIGIO signal if: 334.Bl -bullet 335.It 336If any process-mode PMC allocated by it loses all its 337target processes. 338.It 339If the driver encounters an error when writing log data to a 340configured log file. 341This error may be retrieved by a subsequent call to 342.Fn pmc_flush_logfile . 343.El 344.El 345.Ss CONVENIENCE FUNCTIONS 346.Fn pmc_ncpu 347returns the number of CPUs present in the system. 348.Pp 349.Fn pmc_npmc 350returns the number of PMCs supported on CPU 351.Fa cpu . 352.Fn pmc_cpuinfo 353sets argument 354.Fa cpu_info 355to point to a structure with information about the system's CPUs. 356Function 357.Fn pmc_pmcinfo 358returns information about the current state of CPU 359.Fa cpu Ap s 360PMCs. 361This function sets argument 362.Fa *pmc_info 363to point to a memory area allocated with 364.Xr calloc 3 . 365The caller is expected to 366.Fn free 367the area when done. 368.Pp 369The functions 370.Fn pmc_name_of_capability , 371.Fn pmc_name_of_class , 372.Fn pmc_name_of_cputype , 373.Fn pmc_name_of_disposition , 374.Fn pmc_name_of_event , 375.Fn pmc_name_of_mode 376and 377.Fn pmc_name_of_state 378are useful for code wanting to print error messages. 379They return 380.Ft "const char *" 381pointers to human-readable representations of their arguments. 382These return values should not be freed using 383.Xr free 3 . 384.Pp 385.Fn pmc_event_names_of_class 386returns a list of event names supported by a given PMC class 387.Fa cl . 388On successful return, an array of 389.Ft "const char *" 390pointers to the names of valid events supported by class 391.Fa cl 392is allocated by the library using 393.Xr malloc 3 , 394and a pointer to this array is returned in the location pointed to by 395.Fa eventnames . 396The number of pointers allocated is returned in the location pointed 397to by 398.Fa nevents . 399.Ss ADMINISTRATION 400Individual PMCs may be enabled or disabled on a given CPU using 401.Fn pmc_enable 402and 403.Fn pmc_disable 404respectively. 405For these functions, 406.Fa cpu 407is the CPU number, and 408.Fa pmc 409is the index of the PMC to be operated on. 410Only the super-user is allowed to enable and disable PMCs. 411.Ss X86 ARCHITECTURE SPECIFIC API 412The 413.Fn pmc_get_msr 414function returns the processor model specific register number 415associated with 416.Fa pmc . 417Applications may use the x86 418.Sy RDPMC 419instruction to directly read the contents of the PMC. 420.Sh EVENT SPECIFIERS 421Event specifiers are strings comprising of an event name, followed by 422optional parameters modifying the semantics of the hardware event 423being probed. 424Event names are PMC architecture dependent, but the 425.Xr hwpmc 4 426library defines machine independent aliases for commonly used 427events. 428.Ss Event Name Aliases 429Event name aliases are CPU architecture independent names for commonly 430used events. 431The following aliases are known to this version of the 432.Xr pmc 3 433library: 434.Bl -tag -width indent 435.It Li branches 436Measure the number of branches retired. 437.It Li branch-mispredicts 438Measure the number of retired branches that were mispredicted. 439.It Li cycles 440Measure processor cycles. 441This event is implemented using the processor's Time Stamp Counter 442register. 443.It Li dc-misses 444Measure the number of data cache misses. 445.It Li ic-misses 446Measure the number of instruction cache misses. 447.It Li instructions 448Measure the number of instructions retired. 449.It Li interrupts 450Measure the number of interrupts seen. 451.El 452.Ss Time Stamp Counter (TSC) 453The timestamp counter is a monontonically non-decreasing counter that 454counts processor cycles. 455.Pp 456In the i386 architecture this counter may 457be selected by requesting an event with eventspecifier 458.Ic tsc . 459The 460.Ic tsc 461event does not support any further qualifiers. 462It can only be allocated in system-wide counting mode, 463and is a read-only counter. 464Multiple processes are allowed to allocate the TSC. 465Once allocated, it may be read using the 466.Fn pmc_read 467function, or by using the RDTSC instruction. 468.Ss AMD (K7) PMCs 469These PMCs are present in the 470.Tn "AMD Athlon" 471series of CPUs and are documented in: 472.Rs 473.%B "AMD Athlon Processor x86 Code Optimization Guide" 474.%N "Publication No. 22007" 475.%D "February 2002" 476.%Q "Advanced Micronic Devices, Inc." 477.Re 478.Pp 479Event specifiers for AMD K7 PMCs can have the following optional 480qualifiers: 481.Bl -tag -width indent 482.It Li count= Ns Ar value 483Configure the counter to increment only if the number of configured 484events measured in a cycle is greater than or equal to 485.Ar value . 486.It Li edge 487Configure the counter to only count negated-to-asserted transitions 488of the conditions expressed by the other qualifiers. 489In other words, the counter will increment only once whenever a given 490condition becomes true, irrespective of the number of clocks during 491which the condition remains true. 492.It Li inv 493Invert the sense of comparision when the 494.Li count 495qualifier is present, making the counter to increment when the 496number of events per cycle is less than the value specified by 497the 498.Li count 499qualifier. 500.It Li os 501Configure the PMC to count events happening at privilege level 0. 502.It Li unitmask= Ns Ar mask 503This qualifier is used to further qualify a select few events, 504.Li k7-dc-refills-from-l2 , 505.Li k7-dc-refills-from-system 506and 507.Li k7-dc-writebacks . 508Here 509.Ar mask 510is a string of the following characters optionally seperated by 511.Li "+" 512characters: 513.Bl -tag -width indent -compact 514.It Li m 515Count operations for lines in the 516.Dq Modified 517state. 518.It Li o 519Count operations for lines in the 520.Dq Owner 521state. 522.It Li e 523Count operations for lines in the 524.Dq Exclusive 525state. 526.It Li s 527Count operations for lines in the 528.Dq Shared 529state. 530.It Li i 531Count operations for lines in the 532.Dq Invalid 533state. 534.El 535If no 536.Ar unitmask 537qualifier is specified, the default is to count events for caches 538lines in any of the above states. 539.It Li usr 540Configure the PMC to count events occurring at privilege levels 1, 2 541or 3. 542.El 543If neither of the 544.Li os 545or 546.Li usr 547qualifiers were specified, the default is to enable both. 548.Pp 549The event specifiers support on AMD K7 PMCs are: 550.Bl -tag -width indent 551.It Li k7-dc-accesses 552Count data cache accesses. 553.It Li k7-dc-misses 554Count data cache misses. 555.It Li k7-dc-refills-from-l2 Op Li ,unitmask= Ns Ar mask 556Count data cache refills from L2 cache. 557This event may be further qualified using the 558.Li unitmask 559qualifier. 560.It Li k7-dc-refills-from-system Op Li ,unitmask= Ns Ar mask 561Count data cache refills from system memory. 562This event may be further qualified using the 563.Li unitmask 564qualifier. 565.It Li k7-dc-writebacks Op Li ,unitmask= Ns Ar mask 566Count data cache writebacks. 567This event may be further qualified using the 568.Li unitmask 569qualifier. 570.It Li k7-l1-dtlb-miss-and-l2-dtlb-hits 571Count L1 DTLB misses and L2 DTLB hits. 572.It Li k7-l1-and-l2-dtlb-misses 573Count L1 and L2 DTLB misses. 574.It Li k7-misaligned-references 575Count misaligned data references. 576.It Li k7-ic-fetches 577Count instruction cache fetches. 578.It Li k7-ic-misses 579Count instruction cache misses. 580.It Li k7-l1-itlb-misses 581Count L1 ITLB misses that are L2 ITLB hits. 582.It Li k7-l1-l2-itlb-misses 583Count L1 (and L2) ITLB misses. 584.It Li k7-retired-instructions 585Count all retired instructions. 586.It Li k7-retired-ops 587Count retired ops. 588.It Li k7-retired-branches 589Count all retired branches (conditional, unconditional, exceptions 590and interrupts). 591.It Li k7-retired-branches-mispredicted 592Count all misprediced retired branches. 593.It Li k7-retired-taken-branches 594Count retired taken branches. 595.It Li k7-retired-taken-branches-mispredicted 596Count mispredicted taken branches that were retired. 597.It Li k7-retired-far-control-transfers 598Count retired far control transfers. 599.It Li k7-retired-resync-branches 600Count retired resync branches (non control transfer branches). 601.It Li k7-interrupts-masked-cycles 602Count the number of cycles when the processor's 603.Li IF 604flag was zero. 605.It Li k7-interrupts-masked-while-pending-cycles 606Count the number of cycles interrupts were masked while pending due 607to the processor's 608.Li IF 609flag being zero. 610.It Li k7-hardware-interrupts 611Count the number of taken hardware interrupts. 612.El 613.Ss AMD (K8) PMCs 614These PMCs are present in the 615.Tn "AMD Athlon64" 616and 617.Tn "AMD Opteron" 618series of CPUs. 619They are documented in: 620.Rs 621.%B "BIOS and Kernel Developer's Guide for the AMD Athlon(tm) 64 and AMD Opteron Processors" 622.%N "Publication No. 26094" 623.%D "April 2004" 624.%Q "Advanced Micronic Devices, Inc." 625.Re 626.Pp 627Event specifiers for AMD K8 PMCs can have the following optional 628qualifiers: 629.Bl -tag -width indent 630.It Li count= Ns Ar value 631Configure the counter to increment only if the number of configured 632events measured in a cycle is greater than or equal to 633.Ar value . 634.It Li edge 635Configure the counter to only count negated-to-asserted transitions 636of the conditions expressed by the other fields. 637In other words, the counter will increment only once whenever a given 638condition becomes true, irrespective of the number of clocks during 639which the condition remains true. 640.It Li inv 641Invert the sense of comparision when the 642.Li count 643qualifier is present, making the counter to increment when the 644number of events per cycle is less than the value specified by 645the 646.Li count 647qualifier. 648.It Li mask= Ns Ar qualifier 649Many event specifiers for AMD K8 PMCs need to be additionally 650qualified using a mask qualifier. 651These additional qualifiers are event-specific and are documented 652along with their associated event specifiers below. 653.It Li os 654Configure the PMC to count events happening at privilege level 0. 655.It Li usr 656Configure the PMC to count events occurring at privilege levels 1, 2 657or 3. 658.El 659If neither of the 660.Li os 661or 662.Li usr 663qualifiers were specified, the default is to enable both. 664.Pp 665The event specifiers support on AMD K8 PMCs are: 666.Bl -tag -width indent 667.It Li k8-bu-cpu-clk-unhalted 668Count the number of clock cycles when the CPU is not in the HLT or 669STPCLK states. 670.It Li k8-bu-fill-request-l2-miss Op Li ,mask= Ns Ar qualifier 671Count fill requests that missed in the L2 cache. 672This event may be further qualified using 673.Ar qualifier , 674which is a 675.Li + Ns - Ns 676separated set of the following keywords: 677.Bl -tag -width "XXXXXXXXXX" -compact 678.It Li dc-fill 679Count data cache fill requests. 680.It Li ic-fill 681Count instruction cache fill requests. 682.It Li tlb-reload 683Count TLB reloads. 684.El 685The default is to count all types of requests. 686.It Li k8-bu-internal-l2-request Op Li ,mask= Ns Ar qualifier 687Count internally generated requests to the L2 cache. 688This event may be further qualified using 689.Ar qualifier , 690which is a 691.Li "+" Ns - Ns 692separated set of the following keywords: 693.Bl -tag -width "XXXXXXXXXX" -compact 694.It Li cancelled 695Count cancelled requests. 696.It Li dc-fill 697Count data cache fill requests. 698.It Li ic-fill 699Count instruction cache fill requests. 700.It Li tag-snoop 701Count tag snoop requests. 702.It Li tlb-reload 703Count TLB reloads. 704.El 705The default is to count all types of requests. 706.It Li k8-dc-access 707Count data cache accesses including microcode scratchpad accesses. 708.It Li k8-dc-copyback Op Li ,mask= Ns Ar qualifier 709Count data cache copyback operations. 710This event may be further qualified using 711.Ar qualifier , 712which is a 713.Li "+" Ns - Ns 714separated set of the following keywords: 715.Bl -tag -width "exclusive" -compact 716.It Li exclusive 717Count operations for lines in the 718.Dq exclusive 719state. 720.It Li invalid 721Count operations for lines in the 722.Dq invalid 723state. 724.It Li modified 725Count operations for lines in the 726.Dq modified 727state. 728.It Li owner 729Count operations for lines in the 730.Dq owner 731state. 732.It Li shared 733Count operations for lines in the 734.Dq shared 735state. 736.El 737The default is to count operations for lines in all the 738above states. 739.It Li k8-dc-dcache-accesses-by-locks Op Li ,mask= Ns Ar qualifier 740Count data cache accesses by lock instructions. 741This event is only available on processors of revision C or later 742vintage. 743This event may be further qualified using 744.Ar qualifier , 745which is a 746.Li "+" Ns - Ns 747separated set of the following keywords: 748.Bl -tag -width "exclusive" -compact 749.It Li accesses 750Count data cache accesses by lock instructions. 751.It Li misses 752Count data cache misses by lock instructions. 753.El 754The default is to count all accesses. 755.It Li k8-dc-dispatched-prefetch-instructions Op Li ,mask= Ns Ar qualifier 756Count the number of dispatched prefetch instructions. 757This event may be further qualified using 758.Ar qualifier , 759which is a 760.Li "+" Ns - Ns 761separated set of the following keywords: 762.Bl -tag -width "exclusive" -compact 763.It Li load 764Count load operations. 765.It Li nta 766Count non-temporal operations. 767.It Li store 768Count store operations. 769.El 770The default is to count all operations. 771.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-hit 772Count L1 DTLB misses that are L2 DTLB hits. 773.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-miss 774Count L1 DTLB misses that are also misses in the L2 DTLB. 775.It Li k8-dc-microarchitectural-early-cancel-of-an-access 776Count microarchitectural early cancels of data cache accesses. 777.It Li k8-dc-microarchitectural-late-cancel-of-an-access 778Count microarchitectural late cancels of data cache accesses. 779.It Li k8-dc-misaligned-data-reference 780Count misaligned data references. 781.It Li k8-dc-miss 782Count data cache misses. 783.It Li k8-dc-one-bit-ecc-error Op Li ,mask= Ns Ar qualifier 784Count one bit ECC errors found by the scrubber. 785This event may be further qualified using 786.Ar qualifier , 787which is a 788.Li "+" Ns - Ns 789separated set of the following keywords: 790.Bl -tag -width "piggyback" -compact 791.It Li scrubber 792Count scrubber detected errors. 793.It Li piggyback 794Count piggyback scrubber errors. 795.El 796The default is to count both kinds of errors. 797.It Li k8-dc-refill-from-l2 Op Li ,mask= Ns Ar qualifier 798Count data cache refills from L2 cache. 799This event may be further qualified using 800.Ar qualifier , 801which is a 802.Li "+" Ns - Ns 803separated set of the following keywords: 804.Bl -tag -width "exclusive" -compact 805.It Li exclusive 806Count operations for lines in the 807.Dq exclusive 808state. 809.It Li invalid 810Count operations for lines in the 811.Dq invalid 812state. 813.It Li modified 814Count operations for lines in the 815.Dq modified 816state. 817.It Li owner 818Count operations for lines in the 819.Dq owner 820state. 821.It Li shared 822Count operations for lines in the 823.Dq shared 824state. 825.El 826The default is to count operations for lines in all the 827above states. 828.It Li k8-dc-refill-from-system Op Li ,mask= Ns Ar qualifier 829Count data cache refills from system memory. 830This event may be further qualified using 831.Ar qualifier , 832which is a 833.Li "+" Ns - Ns 834separated set of the following keywords: 835.Bl -tag -width "exclusive" -compact 836.It Li exclusive 837Count operations for lines in the 838.Dq exclusive 839state. 840.It Li invalid 841Count operations for lines in the 842.Dq invalid 843state. 844.It Li modified 845Count operations for lines in the 846.Dq modified 847state. 848.It Li owner 849Count operations for lines in the 850.Dq owner 851state. 852.It Li shared 853Count operations for lines in the 854.Dq shared 855state. 856.El 857The default is to count operations for lines in all the 858above states. 859.It Li k8-fp-dispatched-fpu-ops Op Li ,mask= Ns Ar qualifier 860Count the number of dispatched FPU ops. 861This event is supported in revision B and later CPUs. 862This event may be further qualified using 863.Ar qualifier , 864which is a 865.Li "+" Ns - Ns 866separated set of the following keywords: 867.Bl -tag -width "XXXXXXXXXX" -compact 868.It Li add-pipe-excluding-junk-ops 869Count add pipe ops excluding junk ops. 870.It Li add-pipe-junk-ops 871Count junk ops in the add pipe. 872.It Li multiply-pipe-excluding-junk-ops 873Count multiply pipe ops excluding junk ops. 874.It Li multiply-pipe-junk-ops 875Count junk ops in the multiply pipe. 876.It Li store-pipe-excluding-junk-ops 877Count store pipe ops excluding junk ops 878.It Li store-pipe-junk-ops 879Count junk ops in the store pipe. 880.El 881The default is to count all types of ops. 882.It Li k8-fp-cycles-with-no-fpu-ops-retired 883Count cycles when no FPU ops were retired. 884This event is supported in revision B and later CPUs. 885.It Li k8-fp-dispatched-fpu-fast-flag-ops 886Count dispatched FPU ops that use the fast flag interface. 887This event is supported in revision B and later CPUs. 888.It Li k8-fr-decoder-empty 889Count cycles when there was nothing to dispatch (i.e., the decoder 890was empty). 891.It Li k8-fr-dispatch-stalls 892Count all dispatch stalls. 893.It Li k8-fr-dispatch-stall-for-segment-load 894Count dispatch stalls for segment loads. 895.It Li k8-fr-dispatch-stall-for-serialization 896Count dispatch stalls for serialization. 897.It Li k8-fr-dispatch-stall-from-branch-abort-to-retire 898Count dispatch stalls from branch abort to retiral. 899.It Li k8-fr-dispatch-stall-when-fpu-is-full 900Count dispatch stalls when the FPU is full. 901.It Li k8-fr-dispatch-stall-when-ls-is-full 902Count dispatch stalls when the load/store unit is full. 903.It Li k8-fr-dispatch-stall-when-reorder-buffer-is-full 904Count dispatch stalls when the reorder buffer is full. 905.It Li k8-fr-dispatch-stall-when-reservation-stations-are-full 906Count dispatch stalls when reservation stations are full. 907.It Li k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet 908Count dispatch stalls when waiting for all to be quiet. 909.\" XXX What does "waiting for all to be quiet" mean? 910.It Li k8-fr-dispatch-stall-when-waiting-far-xfer-or-resync-branch-pending 911Count dispatch stalls when a far control transfer or a resync branch 912is pending. 913.It Li k8-fr-fpu-exceptions Op Li ,mask= Ns Ar qualifier 914Count FPU exceptions. 915This event is supported in revision B and later CPUs. 916This event may be further qualified using 917.Ar qualifier , 918which is a 919.Li "+" Ns - Ns 920separated set of the following keywords: 921.Bl -tag -width "XXXXXXXXXX" -compact 922.It Li sse-and-x87-microtraps 923Count SSE and x87 microtraps. 924.It Li sse-reclass-microfaults 925Count SSE reclass microfaults 926.It Li sse-retype-microfaults 927Count SSE retype microfaults 928.It Li x87-reclass-microfaults 929Count x87 reclass microfaults. 930.El 931The default is to count all types of exceptions. 932.It Li k8-fr-interrupts-masked-cycles 933Count cycles when interrupts were masked (by CPU RFLAGS field IF was zero). 934.It Li k8-fr-interrupts-masked-while-pending-cycles 935Count cycles while interrupts were masked while pending (i.e., cycles 936when INTR was asserted while CPU RFLAGS field IF was zero). 937.It Li k8-fr-number-of-breakpoints-for-dr0 938Count the number of breakpoints for DR0. 939.It Li k8-fr-number-of-breakpoints-for-dr1 940Count the number of breakpoints for DR1. 941.It Li k8-fr-number-of-breakpoints-for-dr2 942Count the number of breakpoints for DR2. 943.It Li k8-fr-number-of-breakpoints-for-dr3 944Count the number of breakpoints for DR3. 945.It Li k8-fr-retired-branches 946Count retired branches including exceptions and interrupts. 947.It Li k8-fr-retired-branches-mispredicted 948Count mispredicted retired branches. 949.It Li k8-fr-retired-far-control-transfers 950Count retired far control transfers (which are always mispredicted). 951.It Li k8-fr-retired-fastpath-double-op-instructions Op Li ,mask= Ns Ar qualifier 952Count retired fastpath double op instructions. 953This event is supported in revision B and later CPUs. 954This event may be further qualified using 955.Ar qualifier , 956which is a 957.Li "+" Ns - Ns 958separated set of the following keywords: 959.Bl -tag -width "XXXXXXXXXXXX" -compact 960.It Li low-op-pos-0 961Count instructions with the low op in position 0. 962.It Li low-op-pos-1 963Count instructions with the low op in position 1. 964.It Li low-op-pos-2 965Count instructions with the low op in position 2. 966.El 967The default is to count all types of instructions. 968.It Li k8-fr-retired-fpu-instructions Op Li ,mask= Ns Ar qualifier 969Count retired FPU instructions. 970This event is supported in revision B and later CPUs. 971This event may be further qualified using 972.Ar qualifier , 973which is a 974.Li "+" Ns - Ns 975separated set of the following keywords: 976.Bl -tag -width "XXXXXXXXXX" -compact 977.It Li mmx-3dnow 978Count MMX and 3DNow! instructions. 979.It Li packed-sse-sse2 980Count packed SSE and SSE2 instructions. 981.It Li scalar-sse-sse2 982Count scalar SSE and SSE2 instructions 983.It Li x87 984Count x87 instructions. 985.El 986The default is to count all types of instructions. 987.It Li k8-fr-retired-near-returns 988Count retired near returns. 989.It Li k8-fr-retired-near-returns-mispredicted 990Count mispredicted near returns. 991.It Li k8-fr-retired-resyncs 992Count retired resyncs (non-control transfer branches). 993.It Li k8-fr-retired-taken-hardware-interrupts 994Count retired taken hardware interrupts. 995.It Li k8-fr-retired-taken-branches 996Count retired taken branches. 997.It Li k8-fr-retired-taken-branches-mispredicted 998Count retired taken branches that were mispredicted. 999.It Li k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare 1000Count retired taken branches that were mispredicted only due to an 1001address miscompare. 1002.It Li k8-fr-retired-uops 1003Count retired uops. 1004.It Li k8-fr-retired-x86-instructions 1005Count retired x86 instructions including exceptions and interrupts. 1006.It Li k8-ic-fetch 1007Count instruction cache fetches. 1008.It Li k8-ic-instruction-fetch-stall 1009Count cycles in stalls due to instruction fetch. 1010.It Li k8-ic-l1-itlb-miss-and-l2-itlb-hit 1011Count L1 ITLB misses that are L2 ITLB hits. 1012.It Li k8-ic-l1-itlb-miss-and-l2-itlb-miss 1013Count ITLB misses that miss in both L1 and L2 ITLBs. 1014.It Li k8-ic-microarchitectural-resync-by-snoop 1015Count microarchitectural resyncs caused by snoops. 1016.It Li k8-ic-miss 1017Count instruction cache misses. 1018.It Li k8-ic-refill-from-l2 1019Count instruction cache refills from L2 cache. 1020.It Li k8-ic-refill-from-system 1021Count instruction cache refills from system memory. 1022.It Li k8-ic-return-stack-hits 1023Count hits to the return stack. 1024.It Li k8-ic-return-stack-overflow 1025Count overflows of the return stack. 1026.It Li k8-ls-buffer2-full 1027Count load/store buffer2 full events. 1028.It Li k8-ls-locked-operation Op Li ,mask= Ns Ar qualifier 1029Count locked operations. 1030For revision C and later CPUs, the following qualifiers are supported: 1031.Bl -tag -width "XXXXXXXXXXXXX" -compact 1032.It Li cycles-in-request 1033Count the number of cycles in the lock request/grant stage. 1034.It Li cycles-to-complete 1035Count the number of cycles a lock takes to complete once it is 1036non-speculative and is the older load/store operation. 1037.It Li locked-instructions 1038Count the number of lock instructions executed. 1039.El 1040The default is to count the number of lock instructions executed. 1041.It Li k8-ls-microarchitectural-late-cancel 1042Count microarchitectural late cancels of operations in the load/store 1043unit. 1044.It Li k8-ls-microarchitectural-resync-by-self-modifying-code 1045Count microarchitectural resyncs caused by self-modifying code. 1046.It Li k8-ls-microarchitectural-resync-by-snoop 1047Count microarchitectural resyncs caused by snoops. 1048.It Li k8-ls-retired-cflush-instructions 1049Count retired CFLUSH instructions. 1050.It Li k8-ls-retired-cpuid-instructions 1051Count retired CPUID instructions. 1052.It Li k8-ls-segment-register-load Op Li ,mask= Ns Ar qualifier 1053Count segment register loads. 1054This event may be further qualified using 1055.Ar qualifier , 1056which is a 1057.Li "+" Ns - Ns 1058separated set of the following keywords: 1059.Bl -tag -width "XX" -compact 1060.It Li cs 1061Count CS register loads. 1062.It Li ds 1063Count DS register loads. 1064.It Li es 1065Count ES register loads. 1066.It Li fs 1067Count FS register loads. 1068.It Li gs 1069Count GS register loads. 1070.\" .It Ic hs 1071.\" Count HS register loads. 1072.\" XXX "HS" register? 1073.It Li ss 1074Count SS register loads. 1075.El 1076The default is to count all types of loads. 1077.It Li k8-nb-memory-controller-bypass-saturation Op Li ,mask= Ns Ar qualifier 1078Count memory controller bypass counter saturation events. 1079This event may be further qualified using 1080.Ar qualifier , 1081which is a 1082.Li "+" Ns - Ns 1083separated set of the following keywords: 1084.Bl -tag -width "XXXXXXXXXX" -compact 1085.It Li dram-controller-interface-bypass 1086Count DRAM controller interface bypass. 1087.It Li dram-controller-queue-bypass 1088Count DRAM controller queue bypass. 1089.It Li memory-controller-hi-pri-bypass 1090Count memory controller high priority bypasses. 1091.It Li memory-controller-lo-pri-bypass 1092Count memory controller low priority bypasses. 1093.El 1094.It Li k8-nb-memory-controller-dram-slots-missed 1095Count memory controller DRAM command slots missed (in MemClks). 1096.It Li k8-nb-memory-controller-page-access-event Op Li ,mask= Ns Ar qualifier 1097Count memory controller page access events. 1098This event may be further qualified using 1099.Ar qualifier , 1100which is a 1101.Li "+" Ns - Ns 1102separated set of the following keywords: 1103.Bl -tag -width "XXXXXXXXXX" -compact 1104.It Li page-conflict 1105Count page conflicts. 1106.It Li page-hit 1107Count page hits. 1108.It Li page-miss 1109Count page misses. 1110.El 1111The default is to count all types of events. 1112.It Li k8-nb-memory-controller-page-table-overflow 1113Count memory control page table overflow events. 1114.It Li k8-nb-probe-result Op Li ,mask= Ns Ar qualifier 1115Count probe events. 1116This event may be further qualified using 1117.Ar qualifier , 1118which is a 1119.Li "+" Ns - Ns 1120separated set of the following keywords: 1121.Bl -tag -width "exclusive" -compact 1122.It Li probe-hit 1123Count all probe hits. 1124.It Li probe-hit-dirty-no-memory-cancel 1125Count probe hits without memory cancels. 1126.It Li probe-hit-dirty-with-memory-cancel 1127Count probe hits with memory cancels. 1128.It Li probe-miss 1129Count probe misses. 1130.El 1131.It Li k8-nb-sized-commands Op Li ,mask= Ns Ar qualifier 1132Count sized commands issued. 1133This event may be further qualified using 1134.Ar qualifier , 1135which is a 1136.Li "+" Ns - Ns 1137separated set of the following keywords: 1138.Bl -tag -width "exclusive" -compact 1139.It Li nonpostwrszbyte 1140.It Li nonpostwrszdword 1141.It Li postwrszbyte 1142.It Li postwrszdword 1143.It Li rdszbyte 1144.It Li rdszdword 1145.It Li rdmodwr 1146.El 1147The default is to count all types of commands. 1148.It Li k8-nb-memory-controller-turnaround Op Li ,mask= Ns Ar qualifier 1149Count memory control turnaround events. 1150This event may be further qualified using 1151.Ar qualifier , 1152which is a 1153.Li "+" Ns - Ns 1154separated set of the following keywords: 1155.Bl -tag -width "XXXXXXXXXX" -compact 1156.\" XXX doc is unclear whether these are cycle counts or event counts 1157.It Li dimm-turnaround 1158Count DIMM turnarounds. 1159.It Li read-to-write-turnaround 1160Count read to write turnarounds. 1161.It Li write-to-read-turnaround 1162Count write to read turnarounds. 1163.El 1164The default is to count all types of events. 1165.It Li k8-nb-ht-bus0-bandwidth Op Li ,mask= Ns Ar qualifier 1166.It Li k8-nb-ht-bus1-bandwidth Op Li ,mask= Ns Ar qualifier 1167.It Li k8-nb-ht-bus2-bandwidth Op Li ,mask= Ns Ar qualifier 1168Count events on the HyperTransport(tm) buses. 1169These events may be further qualified using 1170.Ar qualifier , 1171which is a 1172.Li "+" Ns - Ns 1173separated set of the following keywords: 1174.Bl -tag -width "XXXXXXXXXX" -compact 1175.It Li buffer-release 1176Count buffer release messages sent. 1177.It Li command 1178Count command messages sent. 1179.It Li data 1180Count data messages sent. 1181.It Li nop 1182Count nop messages sent. 1183.El 1184The default is to count all types of messages. 1185.El 1186.Ss Intel P6 PMCS 1187Intel P6 PMCs are present in Intel 1188.Tn "Pentium Pro" , 1189.Tn "Pentium II" , 1190.Tn "Celeron" , 1191.Tn "Pentium III" 1192and 1193.Tn "Pentium M" 1194processors. 1195.Pp 1196These CPUs have two counters. 1197Some events may only be used on specific counters and some events are 1198defined only on specific processor models. 1199.Pp 1200These PMCs are documented in 1201.Rs 1202.%B "IA-32 Intel(R) Architecture Software Developer's Manual" 1203.%T "Volume 3: System Programming Guide" 1204.%N "Order Number 245472-012" 1205.%D 2003 1206.%Q "Intel Corporation" 1207.Re 1208.Pp 1209Some of these events are affected by processor errata described in 1210.Rs 1211.%B "Intel(R) Pentium(R) III Processor Specification Update" 1212.%N "Document Number: 244453-054" 1213.%D "April 2005" 1214.%Q "Intel Corporation" 1215.Re 1216.Pp 1217Event specifiers for Intel P6 PMCs can have the following common 1218qualifiers: 1219.Bl -tag -width indent 1220.It Li cmask= Ns Ar value 1221Configure the PMC to increment only if the number of configured 1222events measured in a cycle is greater than or equal to 1223.Ar value . 1224.It Li edge 1225Configure the PMC to count the number of deasserted to asserted 1226transitions of the conditions expressed by the other qualifiers. 1227If specified, the counter will increment only once whenever a 1228condition becomes true, irrespective of the number of clocks during 1229which the condition remains true. 1230.It Li inv 1231Invert the sense of comparision when the 1232.Ar cmask 1233qualifier is present, making the counter increment when the number of 1234events per cycle is less than the value specified by the 1235.Ar cmask 1236qualifier. 1237.It Li os 1238Configure the PMC to count events happening at processor privilege 1239level 0. 1240.It Li umask= Ns Ar value 1241This qualifier is used to further qualify the event selected (see 1242below). 1243.It Li usr 1244Configure the PMC to count events occurring at privilege levels 1, 2 1245or 3. 1246.El 1247If neither of the 1248.Li os 1249or 1250.Li usr 1251qualifiers are specified, the default is to enable both. 1252.Pp 1253The event specifiers supported by Intel P6 PMCs are: 1254.Bl -tag -width indent 1255.It Li p6-baclears 1256Count the number of times a static branch prediction was made by the 1257branch decoder because the BTB did not have a prediction. 1258.It Li p6-br-bac-missp-exec 1259.Pq Tn "Pentium M" 1260Count the number of branch instructions executed that where 1261mispredicted at the Front End (BAC). 1262.It Li p6-br-bogus 1263Count the number of bogus branches. 1264.It Li p6-br-call-exec 1265.Pq Tn "Pentium M" 1266Count the number of call instructions executed. 1267.It Li p6-br-call-missp-exec 1268.Pq Tn "Pentium M" 1269Count the number of call instructions executed that were mispredicted. 1270.It Li p6-br-cnd-exec 1271.Pq Tn "Pentium M" 1272Count the number of conditional branch instructions executed. 1273.It Li p6-br-cnd-missp-exec 1274.Pq Tn "Pentium M" 1275Count the number of conditional branch instructions executed that were 1276mispredicted. 1277.It Li p6-br-ind-call-exec 1278.Pq Tn "Pentium M" 1279Count the number of indirect call instructions executed. 1280.It Li p6-br-ind-exec 1281.Pq Tn "Pentium M" 1282Count the number of indirect branch instructions executed. 1283.It Li p6-br-ind-missp-exec 1284.Pq Tn "Pentium M" 1285Count the number of indirect branch instructions executed that were 1286mispredicted. 1287.It Li p6-br-inst-decoded 1288Count the number of branch instructions decoded. 1289.It Li p6-br-inst-exec 1290.Pq Tn "Pentium M" 1291Count the number of branch instructions executed but necessarily retired. 1292.It Li p6-br-inst-retired 1293Count the number of branch instructions retired. 1294.It Li p6-br-miss-pred-retired 1295Count the number of mispredicted branch instructions retired. 1296.It Li p6-br-miss-pred-taken-ret 1297Count the number of taken mispredicted branches retired. 1298.It Li p6-br-missp-exec 1299.Pq Tn "Pentium M" 1300Count the number of branch instructions executed that were 1301mispredicted at execution. 1302.It Li p6-br-ret-bac-missp-exec 1303.Pq Tn "Pentium M" 1304Count the number of return instructions executed that were 1305mispredicted at the Front End (BAC). 1306.It Li p6-br-ret-exec 1307.Pq Tn "Pentium M" 1308Count the number of return instructions executed. 1309.It Li p6-br-ret-missp-exec 1310.Pq Tn "Pentium M" 1311Count the number of return instructions executed that were 1312mispredicted at execution. 1313.It Li p6-br-taken-retired 1314Count the number of taken branches retired. 1315.It Li p6-btb-misses 1316Count the number of branches for which the BTB did not produce a 1317prediction. 1318.It Li p6-bus-bnr-drv 1319Count the number of bus clock cycles during which this processor is 1320driving the BNR# pin. 1321.It Li p6-bus-data-rcv 1322Count the number of bus clock cycles during which this processor is 1323receiving data. 1324.It Li p6-bus-drdy-clocks Op Li ,umask= Ns Ar qualifier 1325Count the number of clocks during which DRDY# is asserted. 1326An additional qualifier may be specified, and comprises one of the 1327following keywords: 1328.Bl -tag -width indent -compact 1329.It Li any 1330Count transactions generated by any agent on the bus. 1331.It Li self 1332Count transactions generated by this processor. 1333.El 1334The default is to count operations generated by this processor. 1335.It Li p6-bus-hit-drv 1336Count the number of bus clock cycles during which this processor is 1337driving the HIT# pin. 1338.It Li p6-bus-hitm-drv 1339Count the number of bus clock cycles during which this processor is 1340driving the HITM# pin. 1341.It Li p6-bus-lock-clocks Op Li ,umask= Ns Ar qualifier 1342Count the number of clocks during with LOCK# is asserted on the 1343external system bus. 1344An additional qualifier may be specified and comprises one of the following 1345keywords: 1346.Bl -tag -width indent -compact 1347.It Li any 1348Count transactions generated by any agent on the bus. 1349.It Li self 1350Count transactions generated by this processor. 1351.El 1352The default is to count operations generated by this processor. 1353.It Li p6-bus-req-outstanding 1354Count the number of bus requests outstanding in any given cycle. 1355.It Li p6-bus-snoop-stall 1356Count the number of clock cycles during which the bus is snoop stalled. 1357.It Li p6-bus-tran-any Op Li ,umask= Ns Ar qualifier 1358Count the number of completed bus transactions of any kind. 1359An additional qualifier may be specified and comprises one of the following 1360keywords: 1361.Bl -tag -width indent -compact 1362.It Li any 1363Count transactions generated by any agent on the bus. 1364.It Li self 1365Count transactions generated by this processor. 1366.El 1367The default is to count operations generated by this processor. 1368.It Li p6-bus-tran-brd Op Li ,umask= Ns Ar qualifier 1369Count the number of burst read transactions. 1370An additional qualifier may be specified and comprises one of the following 1371keywords: 1372.Bl -tag -width indent -compact 1373.It Li any 1374Count transactions generated by any agent on the bus. 1375.It Li self 1376Count transactions generated by this processor. 1377.El 1378The default is to count operations generated by this processor. 1379.It Li p6-bus-tran-burst Op Li ,umask= Ns Ar qualifier 1380Count the number of completed burst transactions. 1381An additional qualifier may be specified and comprises one of the following 1382keywords: 1383.Bl -tag -width indent -compact 1384.It Li any 1385Count transactions generated by any agent on the bus. 1386.It Li self 1387Count transactions generated by this processor. 1388.El 1389The default is to count operations generated by this processor. 1390.It Li p6-bus-tran-def Op Li ,umask= Ns Ar qualifier 1391Count the number of completed deferred transactions. 1392An additional qualifier may be specified and comprises one of the following 1393keywords: 1394.Bl -tag -width indent -compact 1395.It Li any 1396Count transactions generated by any agent on the bus. 1397.It Li self 1398Count transactions generated by this processor. 1399.El 1400The default is to count operations generated by this processor. 1401.It Li p6-bus-tran-ifetch Op Li ,umask= Ns Ar qualifier 1402Count the number of completed instruction fetch transactions. 1403An additional qualifier may be specified and comprises one of the following 1404keywords: 1405.Bl -tag -width indent -compact 1406.It Li any 1407Count transactions generated by any agent on the bus. 1408.It Li self 1409Count transactions generated by this processor. 1410.El 1411The default is to count operations generated by this processor. 1412.It Li p6-bus-tran-inval Op Li ,umask= Ns Ar qualifier 1413Count the number of completed invalidate transactions. 1414An additional qualifier may be specified and comprises one of the following 1415keywords: 1416.Bl -tag -width indent -compact 1417.It Li any 1418Count transactions generated by any agent on the bus. 1419.It Li self 1420Count transactions generated by this processor. 1421.El 1422The default is to count operations generated by this processor. 1423.It Li p6-bus-tran-mem Op Li ,umask= Ns Ar qualifier 1424Count the number of completed memory transactions. 1425An additional qualifier may be specified and comprises one of the following 1426keywords: 1427.Bl -tag -width indent -compact 1428.It Li any 1429Count transactions generated by any agent on the bus. 1430.It Li self 1431Count transactions generated by this processor. 1432.El 1433The default is to count operations generated by this processor. 1434.It Li p6-bus-tran-pwr Op Li ,umask= Ns Ar qualifier 1435Count the number of completed partial write transactions. 1436An additional qualifier may be specified and comprises one of the following 1437keywords: 1438.Bl -tag -width indent -compact 1439.It Li any 1440Count transactions generated by any agent on the bus. 1441.It Li self 1442Count transactions generated by this processor. 1443.El 1444The default is to count operations generated by this processor. 1445.It Li p6-bus-tran-rfo Op Li ,umask= Ns Ar qualifier 1446Count the number of completed read-for-ownership transactions. 1447An additional qualifier may be specified and comprises one of the following 1448keywords: 1449.Bl -tag -width indent -compact 1450.It Li any 1451Count transactions generated by any agent on the bus. 1452.It Li self 1453Count transactions generated by this processor. 1454.El 1455The default is to count operations generated by this processor. 1456.It Li p6-bus-trans-io Op Li ,umask= Ns Ar qualifier 1457Count the number of completed I/O transactions. 1458An additional qualifier may be specified and comprises one of the following 1459keywords: 1460.Bl -tag -width indent -compact 1461.It Li any 1462Count transactions generated by any agent on the bus. 1463.It Li self 1464Count transactions generated by this processor. 1465.El 1466The default is to count operations generated by this processor. 1467.It Li p6-bus-trans-p Op Li ,umask= Ns Ar qualifier 1468Count the number of completed partial transactions. 1469An additional qualifier may be specified and comprises one of the following 1470keywords: 1471.Bl -tag -width indent -compact 1472.It Li any 1473Count transactions generated by any agent on the bus. 1474.It Li self 1475Count transactions generated by this processor. 1476.El 1477The default is to count operations generated by this processor. 1478.It Li p6-bus-trans-wb Op Li ,umask= Ns Ar qualifier 1479Count the number of completed write-back transactions. 1480An additional qualifier may be specified and comprises one of the following 1481keywords: 1482.Bl -tag -width indent -compact 1483.It Li any 1484Count transactions generated by any agent on the bus. 1485.It Li self 1486Count transactions generated by this processor. 1487.El 1488The default is to count operations generated by this processor. 1489.It Li p6-cpu-clk-unhalted 1490Count the number of cycles during with the processor was not halted. 1491.Pp 1492.Pq Tn "Pentium M" 1493Count the number of cycles during with the processor was not halted 1494and not in a thermal trip. 1495.It Li p6-cycles-div-busy 1496Count the number of cycles during which the divider is busy and cannot 1497accept new divides. 1498This event is only allocated on counter 0. 1499.It Li p6-cycles-in-pending-and-masked 1500Count the number of processor cycles for which interrupts were 1501disabled and interrupts were pending. 1502.It Li p6-cycles-int-masked 1503Count the number of processor cycles for which interrupts were 1504disabled. 1505.It Li p6-data-mem-refs 1506Count all loads and all stores using any memory type, including 1507internal retries. 1508Each part of a split store is counted seperately. 1509.It Li p6-dcu-lines-in 1510Count the total lines allocated in the data cache unit. 1511.It Li p6-dcu-m-lines-in 1512Count the number of M state lines allocated in the data cache unit. 1513.It Li p6-dcu-m-lines-out 1514Count the number of M state lines evicted from the data cache unit. 1515.It Li p6-dcu-miss-outstanding 1516Count the weighted number of cycles while a data cache unit miss is 1517outstanding, incremented by the number of outstanding cache misses at 1518any time. 1519.It Li p6-div 1520Count the number of floating point multiplies. 1521This event is only allocated on counter 1. 1522.It Li p6-emon-esp-uops 1523.Pq Tn "Pentium M" 1524Count the total number of micro-ops. 1525.It Li p6-emon-est-trans Op Li ,umask= Ns Ar qualifier 1526.Pq Tn "Pentium M" 1527Count the number of 1528.Tn "Enhanced Intel SpeedStep" 1529transitions. 1530An additional qualifier may be specified, and can be one of the 1531following keywords: 1532.Bl -tag -width indent -compact 1533.It Li all 1534Count all transitions. 1535.It Li freq 1536Count only frequency transitions. 1537.El 1538The default is to count all transitions. 1539.It Li p6-emon-fused-uops-ret Op Li ,umask= Ns Ar qualifier 1540.Pq Tn "Pentium M" 1541Count the number of retired fused micro-ops. 1542An additional qualifier may be specified, and may be one of the 1543following keywords: 1544.Bl -tag -width indent -compact 1545.It Li all 1546Count all fused micro-ops. 1547.It Li loadop 1548Count only load and op micro-ops. 1549.It Li stdsta 1550Count only STD/STA micro-ops. 1551.El 1552The default is to count all fused micro-ops. 1553.It Li p6-emon-kni-comp-inst-ret 1554.Pq Tn "Pentium III" 1555Count the number of SSE computational instructions retired. 1556An additional qualifier may be specified, and comprises one of the 1557following keywords: 1558.Bl -tag -width indent -compact 1559.It Li packed-and-scalar 1560Count packed and scalar operations. 1561.It Li scalar 1562Count scalar operations only. 1563.El 1564The default is to count packed and scalar operations. 1565.It Li p6-emon-kni-inst-retired Op Li ,umask= Ns Ar qualifier 1566.Pq Tn "Pentium III" 1567Count the number of SSE instructions retired. 1568An additional qualifier may be specified, and comprises one of the 1569following keywords: 1570.Bl -tag -width indent -compact 1571.It Li packed-and-scalar 1572Count packed and scalar operations. 1573.It Li scalar 1574Count scalar operations only. 1575.El 1576The default is to count packed and scalar operations. 1577.It Li p6-emon-kni-pref-dispatched Op Li ,umask= Ns Ar qualifier 1578.Pq Tn "Pentium III" 1579Count the number of SSE prefetch or weakly ordered instructions 1580dispatched (including speculative prefetches). 1581An additional qualifier may be specified, and comprises one of the 1582following keywords: 1583.Bl -tag -width indent -compact 1584.It Li nta 1585Count non-temporal prefetches. 1586.It Li t1 1587Count prefetches to L1. 1588.It Li t2 1589Count prefetches to L2. 1590.It Li wos 1591Count weakly ordered stores. 1592.El 1593The default is to count non-temporal prefetches. 1594.It Li p6-emon-kni-pref-miss Op Li ,umask= Ns Ar qualifier 1595.Pq Tn "Pentium III" 1596Count the number of prefetch or weakly ordered instructions that miss 1597all caches. 1598An additional qualifier may be specified, and comprises one of the 1599following keywords: 1600.Bl -tag -width indent -compact 1601.It Li nta 1602Count non-temporal prefetches. 1603.It Li t1 1604Count prefetches to L1. 1605.It Li t2 1606Count prefetches to L2. 1607.It Li wos 1608Count weakly ordered stores. 1609.El 1610The default is to count non-temporal prefetches. 1611.It Li p6-emon-pref-rqsts-dn 1612.Pq Tn "Pentium M" 1613Count the number of downward prefetches issued. 1614.It Li p6-emon-pref-rqsts-up 1615.Pq Tn "Pentium M" 1616Count the number of upward prefetches issued. 1617.It Li p6-emon-simd-instr-retired 1618.Pq Tn "Pentium M" 1619Count the number of retired 1620.Tn MMX 1621instructions. 1622.It Li p6-emon-sse-sse2-comp-inst-retired Op Li ,umask= Ns Ar qualifier 1623.Pq Tn "Pentium M" 1624Count the number of computational SSE instructions retired. 1625An additional qualifier may be specified and can be one of the 1626following keywords: 1627.Bl -tag -width indent -compact 1628.It Li sse-packed-single 1629Count SSE packed-single instructions. 1630.It Li sse-scalar-single 1631Count SSE scalar-single instructions. 1632.It Li sse2-packed-double 1633Count SSE2 packed-double instructions. 1634.It Li sse2-scalar-double 1635Count SSE2 scalar-double instructions. 1636.El 1637The default is to count SSE packed-single instructions. 1638.It Li p6-emon-sse-sse2-inst-retired Op Li ,umask= Ns Ar qualifer 1639.Pp 1640.Pq Tn "Pentium M" 1641Count the number of SSE instructions retired. 1642An additional qualifier can be specified, and can be one of the 1643following keywords: 1644.Bl -tag -width indent -compact 1645.It Li sse-packed-single 1646Count SSE packed-single instructions. 1647.It Li sse-packed-single-scalar-single 1648Count SSE packed-single and scalar-single instructions. 1649.It Li sse2-packed-double 1650Count SSE2 packed-double instructions. 1651.It Li sse2-scalar-double 1652Count SSE2 scalar-double instructions. 1653.El 1654The default is to count SSE packed-single instructions. 1655.It Li p6-emon-synch-uops 1656.Pq Tn "Pentium M" 1657Count the number of sync micro-ops. 1658.It Li p6-emon-thermal-trip 1659.Pq Tn "Pentium M" 1660Count the duration or occurrences of thermal trips. 1661Use the 1662.Ar edge 1663qualifier to count occurrences of thermal trips. 1664.It Li p6-emon-unfusion 1665.Pq Tn "Pentium M" 1666Count the number of unfusion events in the reorder buffer. 1667.It Li p6-flops 1668Count the number of computational floating point operations retired. 1669This event is only allocated on counter 0. 1670.It Li p6-fp-assist 1671Count the number of floating point exceptions handled by microcode. 1672This event is only allocated on counter 1. 1673.It Li p6-fp-comps-ops-exe 1674Count the number of computation floating point operations executed. 1675This event is only allocated on counter 0. 1676.It Li p6-fp-mmx-trans Op Li ,umask= Ns Ar qualifier 1677.Pq Tn "Pentium II" , Tn "Pentium III" 1678Count the number of transitions between MMX and floating-point 1679instructions. 1680An additional qualifier may be specified, and comprises one of the 1681following keywords: 1682.Bl -tag -width indent -compact 1683.It Li mmxtofp 1684Count transitions from MMX instructions to floating-point instructions. 1685.It Li fptommx 1686Count transitions from floating-point instructions to MMX instructions. 1687.El 1688The default is to count MMX to floating-point transitions. 1689.It Li p6-hw-int-rx 1690Count the number of hardware interrupts received. 1691.It Li p6-ifu-fetch 1692Count the number of instruction fetches, both cacheable and non-cacheable. 1693.It Li p6-ifu-fetch-miss 1694Count the number of instruction fetch misses (i.e., those that produce 1695memory accesses). 1696.It Li p6-ifu-mem-stall 1697Count the number of cycles instruction fetch is stalled for any reason. 1698.It Li p6-ild-stall 1699Count the number of cycles the instruction length decoder is stalled. 1700.It Li p6-inst-decoded 1701Count the number of instructions decoded. 1702.It Li p6-inst-retired 1703Count the number of instructions retired. 1704.It Li p6-itlb-miss 1705Count the number of instruction TLB misses. 1706.It Li p6-l2-ads 1707Count the number of L2 address strobes. 1708.It Li p6-l2-dbus-busy 1709Count the number of cycles during which the L2 cache data bus was busy. 1710.It Li p6-l2-dbus-busy-rd 1711Count the number of cycles during which the L2 cache data bus was busy 1712transferring read data from L2 to the processor. 1713.It Li p6-l2-ifetch Op Li ,umask= Ns Ar qualifier 1714Count the number of L2 instruction fetches. 1715An additional qualifier may be specified and comprises a list of the following 1716keywords separated by 1717.Li "+" 1718characters: 1719.Bl -tag -width indent -compact 1720.It Li e 1721Count operations affecting E (exclusive) state lines. 1722.It Li i 1723Count operations affecting I (invalid) state lines. 1724.It Li m 1725Count operations affecting M (modified) state lines. 1726.It Li s 1727Count operations affecting S (shared) state lines. 1728.El 1729The default is to count operations affecting all (MESI) state lines. 1730.It Li p6-l2-ld Op Li ,umask= Ns Ar qualifier 1731Count the number of L2 data loads. 1732An additional qualifier may be specified and comprises a list of the following 1733keywords separated by 1734.Li "+" 1735characters: 1736.Bl -tag -width indent -compact 1737.It Li both 1738.Pq Tn "Pentium M" 1739Count both hardware-prefetched lines and non-hardware-prefetched lines. 1740.It Li e 1741Count operations affecting E (exclusive) state lines. 1742.It Li hw 1743.Pq Tn "Pentium M" 1744Count hardware-prefetched lines only. 1745.It Li i 1746Count operations affecting I (invalid) state lines. 1747.It Li m 1748Count operations affecting M (modified) state lines. 1749.It Li nonhw 1750.Pq Tn "Pentium M" 1751Exclude hardware-prefetched lines. 1752.It Li s 1753Count operations affecting S (shared) state lines. 1754.El 1755The default on processors other than 1756.Tn "Pentium M" 1757processors is to count operations affecting all (MESI) state lines. 1758The default on 1759.Tn "Pentium M" 1760processors is to count both hardware-prefetched and 1761non-hardware-prefetch operations on all (MESI) state lines. 1762.Pq Errata 1763This event is affected by processor errata E53. 1764.It Li p6-l2-lines-in Op Li ,umask= Ns Ar qualifier 1765Count the number of L2 lines allocated. 1766An additional qualifier may be specified and comprises a list of the following 1767keywords separated by 1768.Li "+" 1769characters: 1770.Bl -tag -width indent -compact 1771.It Li both 1772.Pq Tn "Pentium M" 1773Count both hardware-prefetched lines and non-hardware-prefetched lines. 1774.It Li e 1775Count operations affecting E (exclusive) state lines. 1776.It Li hw 1777.Pq Tn "Pentium M" 1778Count hardware-prefetched lines only. 1779.It Li i 1780Count operations affecting I (invalid) state lines. 1781.It Li m 1782Count operations affecting M (modified) state lines. 1783.It Li nonhw 1784.Pq Tn "Pentium M" 1785Exclude hardware-prefetched lines. 1786.It Li s 1787Count operations affecting S (shared) state lines. 1788.El 1789The default on processors other than 1790.Tn "Pentium M" 1791processors is to count operations affecting all (MESI) state lines. 1792The default on 1793.Tn "Pentium M" 1794processors is to count both hardware-prefetched and 1795non-hardware-prefetch operations on all (MESI) state lines. 1796.Pq Errata 1797This event is affected by processor errata E45. 1798.It Li p6-l2-lines-out Op Li ,umask= Ns Ar qualifier 1799Count the number of L2 lines evicted. 1800An additional qualifier may be specified and comprises a list of the following 1801keywords separated by 1802.Li "+" 1803characters: 1804.Bl -tag -width indent -compact 1805.It Li both 1806.Pq Tn "Pentium M" 1807Count both hardware-prefetched lines and non-hardware-prefetched lines. 1808.It Li e 1809Count operations affecting E (exclusive) state lines. 1810.It Li hw 1811.Pq Tn "Pentium M" 1812Count hardware-prefetched lines only. 1813.It Li i 1814Count operations affecting I (invalid) state lines. 1815.It Li m 1816Count operations affecting M (modified) state lines. 1817.It Li nonhw 1818.Pq Tn "Pentium M" only 1819Exclude hardware-prefetched lines. 1820.It Li s 1821Count operations affecting S (shared) state lines. 1822.El 1823The default on processors other than 1824.Tn "Pentium M" 1825processors is to count operations affecting all (MESI) state lines. 1826The default on 1827.Tn "Pentium M" 1828processors is to count both hardware-prefetched and 1829non-hardware-prefetch operations on all (MESI) state lines. 1830.Pq Errata 1831This event is affected by processor errata E45. 1832.It Li p6-l2-m-lines-inm 1833Count the number of modified lines allocated in L2 cache. 1834.It Li p6-l2-m-lines-outm Op Li ,umask= Ns Ar qualifier 1835Count the number of L2 M-state lines evicted. 1836.Pp 1837.Pq Tn "Pentium M" 1838On these processors an additional qualifier may be specified and 1839comprises a list of the following keywords separated by 1840.Li "+" 1841characters: 1842.Bl -tag -width indent -compact 1843.It Li both 1844Count both hardware-prefetched lines and non-hardware-prefetched lines. 1845.It Li hw 1846Count hardware-prefetched lines only. 1847.It Li nonhw 1848Exclude hardware-prefetched lines. 1849.El 1850The default is to count both hardware-prefetched and 1851non-hardware-prefetch operations. 1852.Pq Errata 1853This event is affected by processor errata E53. 1854.It Li p6-l2-rqsts Op Li ,umask= Ns Ar qualifier 1855Count the total number of L2 requests. 1856An additional qualifier may be specified and comprises a list of the following 1857keywords separated by 1858.Li "+" 1859characters: 1860.Bl -tag -width indent -compact 1861.It Li e 1862Count operations affecting E (exclusive) state lines. 1863.It Li i 1864Count operations affecting I (invalid) state lines. 1865.It Li m 1866Count operations affecting M (modified) state lines. 1867.It Li s 1868Count operations affecting S (shared) state lines. 1869.El 1870The default is to count operations affecting all (MESI) state lines. 1871.It Li p6-l2-st 1872Count the number of L2 data stores. 1873An additional qualifier may be specified and comprises a list of the following 1874keywords separated by 1875.Li "+" 1876characters: 1877.Bl -tag -width indent -compact 1878.It Li e 1879Count operations affecting E (exclusive) state lines. 1880.It Li i 1881Count operations affecting I (invalid) state lines. 1882.It Li m 1883Count operations affecting M (modified) state lines. 1884.It Li s 1885Count operations affecting S (shared) state lines. 1886.El 1887The default is to count operations affecting all (MESI) state lines. 1888.It Li p6-ld-blocks 1889Count the number of load operations delayed due to store buffer blocks. 1890.It Li p6-misalign-mem-ref 1891Count the number of misaligned data memory references (crossing a 64 1892bit boundary). 1893.It Li p6-mmx-assist 1894.Pq Tn "Pentium II" , Tn "Pentium III" 1895Count the number of MMX assists executed. 1896.It Li p6-mmx-instr-exec 1897.Pq Tn "Celeron" , Tn "Pentium II" 1898Count the number of MMX instructions executed, except MOVQ and MOVD 1899stores from register to memory. 1900.It Li p6-mmx-instr-ret 1901.Pq Tn "Pentium II" 1902Count the number of MMX instructions retired. 1903.It Li p6-mmx-instr-type-exec Op Li ,umask= Ns Ar qualifier 1904.Pq Tn "Pentium II" , Tn "Pentium III" 1905Count the number of MMX instructions executed. 1906An additional qualifier may be specified and comprises a list of 1907the following keywords separated by 1908.Li "+" 1909characters: 1910.Bl -tag -width indent -compact 1911.It Li pack 1912Count MMX pack operation instructions. 1913.It Li packed-arithmetic 1914Count MMX packed arithmetic instructions. 1915.It Li packed-logical 1916Count MMX packed logical instructions. 1917.It Li packed-multiply 1918Count MMX packed multiply instructions. 1919.It Li packed-shift 1920Count MMX packed shift instructions. 1921.It Li unpack 1922Count MMX unpack operation instructions. 1923.El 1924The default is to count all operations. 1925.It Li p6-mmx-sat-instr-exec 1926.Pq Tn "Pentium II" , Tn "Pentium III" 1927Count the number of MMX saturating instructions executed. 1928.It Li p6-mmx-uops-exec 1929.Pq Tn "Pentium II" , Tn "Pentium III" 1930Count the number of MMX micro-ops executed. 1931.It Li p6-mul 1932Count the number of floating point multiplies. 1933This event is only allocated on counter 1. 1934.It Li p6-partial-rat-stalls 1935Count the number of cycles or events for partial stalls. 1936.It Li p6-resource-stalls 1937Count the number of cycles there was a resource related stall of any kind. 1938.It Li p6-ret-seg-renames 1939.Pq Tn "Pentium II" , Tn "Pentium III" 1940Count the number of segment register rename events retired. 1941.It Li p6-sb-drains 1942Count the number of cycles the store buffer is draining. 1943.It Li p6-seg-reg-renames Op Li ,umask= Ns Ar qualifier 1944.Pq Tn "Pentium II" , Tn "Pentium III" 1945Count the number of segment register renames. 1946An additional qualifier may be specified, and comprises a list of the 1947following keywords separated by 1948.Li "+" 1949characters: 1950.Bl -tag -width indent -compact 1951.It Li ds 1952Count renames for segment register DS. 1953.It Li es 1954Count renames for segment register ES. 1955.It Li fs 1956Count renames for segment register FS. 1957.It Li gs 1958Count renames for segment register GS. 1959.El 1960The default is to count operations affecting all segment registers. 1961.It Li p6-seg-rename-stalls 1962.Pq Tn "Pentium II" , Tn "Pentium III" 1963Count the number of segment register renaming stalls. 1964An additional qualifier may be specified, and comprises a list of the 1965following keywords separated by 1966.Li "+" 1967characters: 1968.Bl -tag -width indent -compact 1969.It Li ds 1970Count stalls for segment register DS. 1971.It Li es 1972Count stalls for segment register ES. 1973.It Li fs 1974Count stalls for segment register FS. 1975.It Li gs 1976Count stalls for segment register GS. 1977.El 1978The default is to count operations affecting all the segment registers. 1979.It Li p6-segment-reg-loads 1980Count the number of segment register loads. 1981.It Li p6-uops-retired 1982Count the number of micro-ops retired. 1983.El 1984.Ss Intel P4 PMCS 1985Intel P4 PMCs are present in Intel 1986.Tn "Pentium 4" 1987and 1988.Tn Xeon 1989processors. 1990These PMCs are documented in 1991.Rs 1992.%B "IA-32 Intel(R) Architecture Software Developer's Manual" 1993.%T "Volume 3: System Programming Guide" 1994.%N "Order Number 245472-012" 1995.%D 2003 1996.%Q "Intel Corporation" 1997.Re 1998Further information about using these PMCs may be found in 1999.Rs 2000.%B "IA-32 Intel(R) Architecture Optimization Guide" 2001.%D 2003 2002.%N "Order Number 248966-009" 2003.%Q "Intel Corporation" 2004.Re 2005Some of these events are affected by processor errata described in 2006.Rs 2007.%B "Intel(R) Pentium(R) 4 Processor Specification Update" 2008.%N "Document Number: 249199-059" 2009.%D "April 2005" 2010.%Q "Intel Corporation" 2011.Re 2012.Pp 2013Event specifiers for Intel P4 PMCs can have the following common 2014qualifiers: 2015.Bl -tag -width indent 2016.It Li active= Ns Ar choice 2017(On P4 HTT CPUs) Filter event counting based on which logical 2018processors are active. 2019The allowed values of 2020.Ar choice 2021are: 2022.Bl -tag -width indent -compact 2023.It Li any 2024Count when either logical processor is active. 2025.It Li both 2026Count when both logical processors are active. 2027.It Li none 2028Count only when neither logical processor is active. 2029.It Li single 2030Count only when one logical processor is active. 2031.El 2032The default is 2033.Li both . 2034.It Li cascade 2035Configure the PMC to cascade onto its partner. 2036The PMC for the partner must already have been allocated by the 2037current process. 2038See 2039.Sx "Cascading P4 PMCs" 2040below for more information. 2041.It Li edge 2042Configure the counter to count false to true transitions of the threshold 2043comparision output. 2044This qualifier only takes effect if a threshold qualifier has also been 2045specified. 2046.It Li complement 2047Configure the counter to increment only when the event count seen is 2048less than the threshold qualifier value specified. 2049.It Li mask= Ns Ar qualifier 2050Many event specifiers for Intel P4 PMCs need to be additionally 2051qualified using a mask qualifier. 2052The allowed syntax for these qualifiers is event specific and is 2053described along with the events. 2054.It Li os 2055Configure the PMC to count when the CPL of the processor is 0. 2056.It Li precise 2057Select precise event based sampling. 2058Precise sampling is supported by the hardware for a limited set of 2059events. 2060.It Li tag= Ns Ar value 2061Configure the PMC to tag the internal uop selected by the other 2062fields in this event specifier with value 2063.Ar value . 2064This feature is used when cascading PMCs. 2065.It Li threshold= Ns Ar value 2066Configure the PMC to increment only when the event counts seen are 2067greater than the specified threshold value 2068.Ar value . 2069.It Li usr 2070Configure the PMC to count when the CPL of the processor is 1, 2 or 3. 2071.El 2072If neither of the 2073.Li os 2074or 2075.Li usr 2076qualifiers are specified, the default is to enable both. 2077.Pp 2078On Intel Pentium 4 processors with HTT, events are 2079divided into two classes: 2080.Bl -tag -width "XXXXXXXXXX" -compact 2081.It "TS Events" 2082are those where hardware can differentiate between events 2083generated on one logical processor from those generated on the 2084other. 2085.It "TI Events" 2086are those where hardware cannot differentiate between events 2087generated by multiple logical processors in a package. 2088.El 2089Only TS events are allowed for use with process-mode PMCs on 2090Pentium-4/HTT CPUs. 2091.Pp 2092The event specifiers supported by Intel P4 PMCs are: 2093.Bl -tag -width indent 2094.It Li p4-128bit-mmx-uop Op Li ,mask= Ns Ar flags 2095.Pq "TI event" 2096Count integer SIMD SSE2 instructions that operate on 128 bit SIMD 2097operands. 2098Qualifier 2099.Ar flags 2100can take the following value (which is also the default): 2101.Bl -tag -width indent -compact 2102.It Li all 2103Count all uops operating on 128 bit SIMD integer operands in memory or 2104XMM register. 2105.El 2106If an instruction contains more than one 128 bit MMX uop, then each 2107uop will be counted. 2108.It Li p4-64bit-mmx-uop Op Li ,mask= Ns Ar flags 2109.Pq "TI event" 2110Count MMX instructions that operate on 64 bit SIMD operands. 2111Qualifier 2112.Ar flags 2113can take the following value (which is also the default): 2114.Bl -tag -width indent -compact 2115.It Li all 2116Count all uops operating on 64 bit SIMD integer operands in memory or 2117in MMX registers. 2118.El 2119If an instruction contains more than one 64 bit MMX uop, then each 2120uop will be counted. 2121.It Li p4-b2b-cycles 2122.Pq "TI event" 2123Count back-to-back bys cycles. 2124Further documentation for this event is unavailable. 2125.It Li p4-bnr 2126.Pq "TI event" 2127Count bus-not-ready conditions. 2128Further documentation for this event is unavailable. 2129.It Li p4-bpu-fetch-request Op Li ,mask= Ns Ar qualifier 2130.Pq "TS event" 2131Count instruction fetch requests qualified by additional 2132flags specified in 2133.Ar qualifier . 2134At this point only one flag is supported: 2135.Bl -tag -width indent -compact 2136.It Li tcmiss 2137Count trace cache lookup misses. 2138.El 2139The default qualifier is also 2140.Ar mask=tcmiss . 2141.It Li p4-branch-retired Op Li ,mask= Ns Ar flags 2142.Pq "TS event" 2143Counts retired branches. 2144Qualifier 2145.Ar flags 2146is a list of the following 2147.Li + 2148separated strings: 2149.Bl -tag -width indent -compact 2150.It Li mmnp 2151Count branches not-taken and predicted. 2152.It Li mmnm 2153Count branches not-taken and mis-predicted. 2154.It Li mmtp 2155Count branches taken and predicted. 2156.It Li mmtm 2157Count branches taken and mis-predicted. 2158.El 2159The default qualifier counts all four kinds of branches. 2160.It Li p4-bsq-active-entries Op Li ,mask= Ns Ar qualifier 2161.Pq "TS event" 2162Count the number of entries (clipped at 15) currently active in the 2163BSQ. 2164Qualifier 2165.Ar qualifier 2166is a 2167.Li + 2168separated set of the following flags: 2169.Bl -tag -width indent -compact 2170.It Li req-type0 , Li req-type1 2171Forms a 2-bit number used to select the request type encoding: 2172.Bl -tag -width indent -compact 2173.It Li 0 2174reads excluding read invalidate 2175.It Li 1 2176read invalidates 2177.It Li 2 2178writes other than writebacks 2179.It Li 3 2180writebacks 2181.El 2182Bit 2183.Li req-type1 2184is the MSB for this two bit number. 2185.It Li req-len0 , Li req-len1 2186Forms a two-bit number that specifies the request length encoding: 2187.Bl -tag -width indent -compact 2188.It Li 0 21890 chunks 2190.It Li 1 21911 chunk 2192.It Li 3 21938 chunks 2194.El 2195Bit 2196.Li req-len1 2197is the MSB for this two bit number. 2198.It Li req-io-type 2199Count requests that are input or output requests. 2200.It Li req-lock-type 2201Count requests that lock the bus. 2202.It Li req-lock-cache 2203Count requests that lock the cache. 2204.It Li req-split-type 2205Count requests that is a bus 8-byte chunk that is split across an 22068-byte boundary. 2207.It Li req-dem-type 2208Count requests that are demand (not prefetches) if set. 2209Count requests that are prefetches if not set. 2210.It Li req-ord-type 2211Count requests that are ordered. 2212.It Li mem-type0 , Li mem-type1 , Li mem-type2 2213Forms a 3-bit number that specifies a memory type encoding: 2214.Bl -tag -width indent -compact 2215.It Li 0 2216UC 2217.It Li 1 2218USWC 2219.It Li 4 2220WT 2221.It Li 5 2222WP 2223.It Li 6 2224WB 2225.El 2226Bit 2227.Li mem-type2 2228is the MSB of this 3-bit number. 2229.El 2230The default qualifier has all the above bits set. 2231.Pp 2232Edge triggering using the 2233.Li edge 2234qualifier should not be used with this event when counting cycles. 2235.It Li p4-bsq-allocation Op Li ,mask= Ns Ar qualifier 2236.Pq "TS event" 2237Count allocations in the bus sequence unit according to the flags 2238specified in 2239.Ar qualifier , 2240which is a 2241.Li + 2242separated set of the following flags: 2243.Bl -tag -width indent -compact 2244.It Li req-type0 , Li req-type1 2245Forms a 2-bit number used to select the request type encoding: 2246.Bl -tag -width indent -compact 2247.It Li 0 2248reads excluding read invalidate 2249.It Li 1 2250read invalidates 2251.It Li 2 2252writes other than writebacks 2253.It Li 3 2254writebacks 2255.El 2256Bit 2257.Li req-type1 2258is the MSB for this two bit number. 2259.It Li req-len0 , Li req-len1 2260Forms a two-bit number that specifies the request length encoding: 2261.Bl -tag -width indent -compact 2262.It Li 0 22630 chunks 2264.It Li 1 22651 chunk 2266.It Li 3 22678 chunks 2268.El 2269Bit 2270.Li req-len1 2271is the MSB for this two bit number. 2272.It Li req-io-type 2273Count requests that are input or output requests. 2274.It Li req-lock-type 2275Count requests that lock the bus. 2276.It Li req-lock-cache 2277Count requests that lock the cache. 2278.It Li req-split-type 2279Count requests that is a bus 8-byte chunk that is split across an 22808-byte boundary. 2281.It Li req-dem-type 2282Count requests that are demand (not prefetches) if set. 2283Count requests that are prefetches if not set. 2284.It Li req-ord-type 2285Count requests that are ordered. 2286.It Li mem-type0 , Li mem-type1 , Li mem-type2 2287Forms a 3-bit number that specifies a memory type encoding: 2288.Bl -tag -width indent -compact 2289.It Li 0 2290UC 2291.It Li 1 2292USWC 2293.It Li 4 2294WT 2295.It Li 5 2296WP 2297.It Li 6 2298WB 2299.El 2300Bit 2301.Li mem-type2 2302is the MSB of this 3-bit number. 2303.El 2304The default qualifier has all the above bits set. 2305.Pp 2306This event is usually used along with the 2307.Li edge 2308qualifier to avoid multiple counting. 2309.It Li p4-bsq-cache-reference Op Li ,mask= Ns Ar qualifier 2310.Pq "TS event" 2311Count cache references as seen by the bus unit (2nd or 3rd level 2312cache references). 2313Qualifier 2314.Ar qualifier 2315is a 2316.Li + 2317separated list of the following keywords: 2318.Bl -tag -width indent -compact 2319.It Li rd-2ndl-hits 2320Count 2nd level cache hits in the shared state. 2321.It Li rd-2ndl-hite 2322Count 2nd level cache hits in the exclusive state. 2323.It Li rd-2ndl-hitm 2324Count 2nd level cache hits in the modified state. 2325.It Li rd-3rdl-hits 2326Count 3rd level cache hits in the shared state. 2327.It Li rd-3rdl-hite 2328Count 3rd level cache hits in the exclusive state. 2329.It Li rd-3rdl-hitm 2330Count 3rd level cache hits in the modified state. 2331.It Li rd-2ndl-miss 2332Count 2nd level cache misses. 2333.It Li rd-3rdl-miss 2334Count 3rd level cache misses. 2335.It Li wr-2ndl-miss 2336Count write-back lookups from the data access cache that miss the 2nd 2337level cache. 2338.El 2339The default is to count all the above events. 2340.It Li p4-execution-event Op Li ,mask= Ns Ar flags 2341.Pq "TS event" 2342Count the retirement of tagged uops selected through the execution 2343tagging mechanism. 2344Qualifier 2345.Ar flags 2346can contain the following strings separated by 2347.Li + 2348characters: 2349.Bl -tag -width indent -compact 2350.It Li nbogus0 , Li nbogus1 , Li nbogus2 , Li nbogus3 2351The marked uops are not bogus. 2352.It Li bogus0 , Li bogus1 , Li bogus2 , Li bogus3 2353The marked uops are bogus. 2354.El 2355This event requires additional (upstream) events to be allocated to 2356perform the desired uop tagging. 2357The default is to set all the above flags. 2358This event can be used for precise event based sampling. 2359.It Li p4-front-end-event Op Li ,mask= Ns Ar flags 2360.Pq "TS event" 2361Count the retirement of tagged uops selected through the front-end 2362tagging mechanism. 2363Qualifier 2364.Ar flags 2365can contain the following strings separated by 2366.Li + 2367characters: 2368.Bl -tag -width indent -compact 2369.It Li nbogus 2370The marked uops are not bogus. 2371.It Li bogus 2372The marked uops are bogus. 2373.El 2374This event requires additional (upstream) events to be allocated to 2375perform the desired uop tagging. 2376The default is to select both kinds of events. 2377This event can be used for precise event based sampling. 2378.It Li p4-fsb-data-activity Op Li ,mask= Ns Ar flags 2379.Pq "TI event" 2380Count each DBSY or DRDY event selected by qualifier 2381.Ar flags . 2382Qualifier 2383.Ar flags 2384is a 2385.Li + 2386separated set of the following flags: 2387.Bl -tag -width indent -compact 2388.It Li drdy-drv 2389Count when this processor is driving data onto the bus. 2390.It Li drdy-own 2391Count when this processor is reading data from the bus. 2392.It Li drdy-other 2393Count when data is on the bus but not being sampled by this processor. 2394.It Li dbsy-drv 2395Count when this processor reserves the bus for use in the next cycle 2396in order to drive data. 2397.It Li dbsy-own 2398Count when some agent reserves the bus for use in the next bus cycle 2399to drive data that this processor will sample. 2400.It Li dbsy-other 2401Count when some agent reserves the bus for use in the next bus cycle 2402to drive data that this processor will not sample. 2403.El 2404Flags 2405.Li drdy-own 2406and 2407.Li drdy-other 2408are mutually exclusive. 2409Flags 2410.Li dbsy-own 2411and 2412.Li dbsy-other 2413are mutually exclusive. 2414The default value for 2415.Ar qualifier 2416is 2417.Li drdy-drv+drdy-own+dbsy-drv+dbsy-own . 2418.It Li p4-global-power-events Op Li ,mask= Ns Ar flags 2419.Pq "TS event" 2420Count cycles during which the processor is not stopped. 2421Qualifier 2422.Ar flags 2423can take the following value (which is also the default): 2424.Bl -tag -width indent -compact 2425.It Li running 2426Count cycles when the processor is active. 2427.El 2428.It Li p4-instr-retired Op Li ,mask= Ns Ar flags 2429.Pq "TS event" 2430Count instructions retired during a clock cycle. 2431Qualifer 2432.Ar flags 2433comprises of the following strings separated by 2434.Li + 2435characters: 2436.Bl -tag -width indent -compact 2437.It Li nbogusntag 2438Count non-bogus instructions that are not tagged. 2439.It Li nbogustag 2440Count non-bogus instructions that are tagged. 2441.It Li bogusntag 2442Count bogus instructions that are not tagged. 2443.It Li bogustag 2444Count bogus instructions that are tagged. 2445.El 2446The default qualifier counts all the above kinds of instructions. 2447.It Li p4-ioq-active-entries Xo 2448.Op Li ,mask= Ns Ar qualifier 2449.Op Li ,busreqtype= Ns Ar req-type 2450.Xc 2451.Pq "TS event" 2452Count the number of entries (clipped at 15) in the IOQ that are 2453active. 2454The event masks are specified by qualifier 2455.Ar qualifier 2456and 2457.Ar req-type . 2458.Pp 2459Qualifier 2460.Ar qualifier 2461is a 2462.Li + 2463separated set of the following flags: 2464.Bl -tag -width indent -compact 2465.It Li all-read 2466Count read entries. 2467.It Li all-write 2468Count write entries. 2469.It Li mem-uc 2470Count entries accessing uncacheable memory. 2471.It Li mem-wc 2472Count entries accessing write-combining memory. 2473.It Li mem-wt 2474Count entries accessing write-through memory. 2475.It Li mem-wp 2476Count entries accessing write-protected memory 2477.It Li mem-wb 2478Count entries accessing write-back memory. 2479.It Li own 2480Count store requests driven by the processor (i.e., not by other 2481processors or by DMA). 2482.It Li other 2483Count store requests driven by other processors or by DMA. 2484.It Li prefetch 2485Include hardware and software prefetch requests in the count. 2486.El 2487The default value for 2488.Ar qualifier 2489is to enable all the above flags. 2490.Pp 2491The 2492.Ar req-type 2493qualifier is a 5-bit number can be additionally used to select a 2494specific bus request type. 2495The default is 0. 2496.Pp 2497The 2498.Li edge 2499qualifier should not be used when counting cycles with this event. 2500The exact behaviour of this event depends on the processor revision. 2501.It Li p4-ioq-allocation Xo 2502.Op Li ,mask= Ns Ar qualifier 2503.Op Li ,busreqtype= Ns Ar req-type 2504.Xc 2505.Pq "TS event" 2506Count various types of transactions on the bus matching the flags set 2507in 2508.Ar qualifier 2509and 2510.Ar req-type . 2511.Pp 2512Qualifier 2513.Ar qualifier 2514is a 2515.Li + 2516separated set of the following flags: 2517.Bl -tag -width indent -compact 2518.It Li all-read 2519Count read entries. 2520.It Li all-write 2521Count write entries. 2522.It Li mem-uc 2523Count entries accessing uncacheable memory. 2524.It Li mem-wc 2525Count entries accessing write-combining memory. 2526.It Li mem-wt 2527Count entries accessing write-through memory. 2528.It Li mem-wp 2529Count entries accessing write-protected memory 2530.It Li mem-wb 2531Count entries accessing write-back memory. 2532.It Li own 2533Count store requests driven by the processor (i.e., not by other 2534processors or by DMA). 2535.It Li other 2536Count store requests driven by other processors or by DMA. 2537.It Li prefetch 2538Include hardware and software prefetch requests in the count. 2539.El 2540The default value for 2541.Ar qualifier 2542is to enable all the above flags. 2543.Pp 2544The 2545.Ar req-type 2546qualifier is a 5-bit number can be additionally used to select a 2547specific bus request type. 2548The default is 0. 2549.Pp 2550The 2551.Li edge 2552qualifier is normally used with this event to prevent multiple 2553counting. 2554The exact behaviour of this event depends on the processor revision. 2555.It Li p4-itlb-reference Op mask= Ns Ar qualifier 2556.Pq "TS event" 2557Count translations using the intruction translation look-aside 2558buffer. 2559The 2560.Ar qualifier 2561argument is a list of the following strings separated by 2562.Li + 2563characters. 2564.Bl -tag -width indent -compact 2565.It Li hit 2566Count ITLB hits. 2567.It Li miss 2568Count ITLB misses. 2569.It Li hit-uc 2570Count uncacheable ITLB hits. 2571.El 2572If no 2573.Ar qualifier 2574is specified the default is to count all the three kinds of ITLB 2575translations. 2576.It Li p4-load-port-replay Op Li ,mask= Ns Ar qualifier 2577.Pq "TS event" 2578Count replayed events at the load port. 2579Qualifier 2580.Ar qualifier 2581can take on one value: 2582.Bl -tag -width indent -compact 2583.It Li split-ld 2584Count split loads. 2585.El 2586The default value for 2587.Ar qualifier 2588is 2589.Li split-ld . 2590.It Li p4-mispred-branch-retired Op Li ,mask= Ns Ar flags 2591.Pq "TS event" 2592Count mispredicted IA-32 branch instructions. 2593Qualifier 2594.Ar flags 2595can take the following value (which is also the default): 2596.Bl -tag -width indent -compact 2597.It Li nbogus 2598Count non-bogus retired branch instructions. 2599.El 2600.It Li p4-machine-clear Op Li ,mask= Ns Ar flags 2601.Pq "TS event" 2602Count the number of pipeline clears seen by the processor. 2603Qualifer 2604.Ar flags 2605is a list of the following strings separated by 2606.Li + 2607characters: 2608.Bl -tag -width indent -compact 2609.It Li clear 2610Count for a portion of the many cycles when the machine is being 2611cleared for any reason. 2612.It Li moclear 2613Count machine clears due to memory ordering issues. 2614.It Li smclear 2615Count machine clears due to self-modifying code. 2616.El 2617Use qualifier 2618.Li edge 2619to get a count of occurrences of machine clears. 2620The default qualifier is 2621.Li clear . 2622.It Li p4-memory-cancel Op Li ,mask= Ns Ar event-list 2623.Pq "TS event" 2624Count the cancelling of various kinds of requests in the data cache 2625address control unit of the CPU. 2626The qualifier 2627.Ar event-list 2628is a list of the following strings separated by 2629.Li "+" 2630characters: 2631.Bl -tag -width indent -compact 2632.It Li st-rb-full 2633Requests cancelled because no store request buffer was available. 2634.It Li 64k-conf 2635Requests that conflict due to 64K aliasing. 2636.El 2637If 2638.Ar event-list 2639is not specified, then the default is to count both kinds of events. 2640.It Li p4-memory-complete Op Li ,mask= Ns Ar event-list 2641.Pq "TS event" 2642Count the completion of load split, store split, uncacheable split and 2643uncacheable load operations selected by qualifier 2644.Ar event-list . 2645The qualifier 2646.Ar event-list 2647is a 2648.Li + 2649separated list of the following flags: 2650.Bl -tag -width indent -compact 2651.It Li lsc 2652Count load splits completed, excluding loads from uncacheable or 2653write-combining areas. 2654.It Li ssc 2655Count any split stores completed. 2656.El 2657The default is to count both kinds of operations. 2658.It Li p4-mob-load-replay Op Li ,mask= Ns Ar qualifier 2659.Pq "TS event" 2660Count load replays triggered by the memory order buffer. 2661Qualifier 2662.Ar qualifier 2663can be a 2664.Li + 2665separated list of the following flags: 2666.Bl -tag -width indent -compact 2667.It Li no-sta 2668Count replays because of unknown store addresses. 2669.It Li no-std 2670Count replays because of unknown store data. 2671.It Li partial-data 2672Count replays because of partially overlapped data accesses between 2673load and store operations. 2674.It Li unalgn-addr 2675Count replays because of mismatches in the lower 4 bits of load and 2676store operations. 2677.El 2678The default qualifier is 2679.Ar no-sta+no-std+partial-data+unalgn-addr . 2680.It Li p4-packed-dp-uop Op Li ,mask= Ns Ar flags 2681.Pq "TI event" 2682Count packed double-precision uops. 2683Qualifier 2684.Ar flags 2685can take the following value (which is also the default): 2686.Bl -tag -width indent -compact 2687.It Li all 2688Count all uops operating on packed double-precision operands. 2689.El 2690.It Li p4-packed-sp-uop Op Li ,mask= Ns Ar flags 2691.Pq "TI event" 2692Count packed single-precision uops. 2693Qualifier 2694.Ar flags 2695can take the following value (which is also the default): 2696.Bl -tag -width indent -compact 2697.It Li all 2698Count all uops operating on packed single-precision operands. 2699.El 2700.It Li p4-page-walk-type Op Li ,mask= Ns Ar qualifier 2701.Pq "TI event" 2702Count page walks performed by the page miss handler. 2703Qualifier 2704.Ar qualifier 2705can be a 2706.Li + 2707separated list of the following keywords: 2708.Bl -tag -width indent -compact 2709.It Li dtmiss 2710Count page walks for data TLB misses. 2711.It Li itmiss 2712Count page walks for instruction TLB misses. 2713.El 2714The default value for 2715.Ar qualifier 2716is 2717.Li dtmiss+itmiss . 2718.It Li p4-replay-event Op Li ,mask= Ns Ar flags 2719.Pq "TS event" 2720Count the retirement of tagged uops selected through the replay 2721tagging mechanism. 2722Qualifier 2723.Ar flags 2724contains a 2725.Li + 2726separated set of the following strings: 2727.Bl -tag -width indent -compact 2728.It Li nbogus 2729The marked uops are not bogus. 2730.It Li bogus 2731The marked uops are bogus. 2732.El 2733This event requires additional (upstream) events to be allocated to 2734perform the desired uop tagging. 2735The default qualifier counts both kinds of uops. 2736This event can be used for precise event based sampling. 2737.It Li p4-resource-stall Op Li ,mask= Ns Ar flags 2738.Pq "TS event" 2739Count the occurrence or latency of stalls in the allocator. 2740Qualifier 2741.Ar flags 2742can take the following value (which is also the default): 2743.Bl -tag -width indent -compact 2744.It Li sbfull 2745A stall due to the lack of store buffers. 2746.El 2747.It Li p4-response 2748.Pq "TI event" 2749Count different types of responses. 2750Further documentation on this event is not available. 2751.It Li p4-retired-branch-type Op Li ,mask= Ns Ar flags 2752.Pq "TS event" 2753Count branches retired. 2754Qualifier 2755.Ar flags 2756contains a 2757.Li + 2758separated list of strings: 2759.Bl -tag -width indent -compact 2760.It Li conditional 2761Count conditional jumps. 2762.It Li call 2763Count direct and indirect call branches. 2764.It Li return 2765Count return branches. 2766.It Li indirect 2767Count returns, indirect calls or indirect jumps. 2768.El 2769The default qualifier counts all the above branch types. 2770.It Li p4-retired-mispred-branch-type Op Li ,mask= Ns Ar flags 2771.Pq "TS event" 2772Count mispredicted branches retired. 2773Qualifier 2774.Ar flags 2775contains a 2776.Li + 2777separated list of strings: 2778.Bl -tag -width indent -compact 2779.It Li conditional 2780Count conditional jumps. 2781.It Li call 2782Count indirect call branches. 2783.It Li return 2784Count return branches. 2785.It Li indirect 2786Count returns, indirect calls or indirect jumps. 2787.El 2788The default qualifier counts all the above branch types. 2789.It Li p4-scalar-dp-uop Op Li ,mask= Ns Ar flags 2790.Pq "TI event" 2791Count the number of scalar double-precision uops. 2792Qualifier 2793.Ar flags 2794can take the following value (which is also the default): 2795.Bl -tag -width indent -compact 2796.It Li all 2797Count the number of scalar double-precision uops. 2798.El 2799.It Li p4-scalar-sp-uop Op Li ,mask= Ns Ar flags 2800.Pq "TI event" 2801Count the number of scalar single-precision uops. 2802Qualifier 2803.Ar flags 2804can take the following value (which is also the default): 2805.Bl -tag -width indent -compact 2806.It Li all 2807Count all uops operating on scalar single-precision operands. 2808.El 2809.It Li p4-snoop 2810.Pq "TI event" 2811Count snoop traffic. 2812Further documentation on this event is not available. 2813.It Li p4-sse-input-assist Op Li ,mask= Ns Ar flags 2814.Pq "TI event" 2815Count the number of times an assist is required to handle problems 2816with the operands for SSE and SSE2 operations. 2817Qualifier 2818.Ar flags 2819can take the following value (which is also the default): 2820.Bl -tag -width indent -compact 2821.It Li all 2822Count assists for all SSE and SSE2 uops. 2823.El 2824.It Li p4-store-port-replay Op Li ,mask= Ns Ar qualifier 2825.Pq "TS event" 2826Count events replayed at the store port. 2827Qualifier 2828.Ar qualifier 2829can take on one value: 2830.Bl -tag -width indent -compact 2831.It Li split-st 2832Count split stores. 2833.El 2834The default value for 2835.Ar qualifier 2836is 2837.Li split-st . 2838.It Li p4-tc-deliver-mode Op Li ,mask= Ns Ar qualifier 2839.Pq "TI event" 2840Count the duration in cycles of operating modes of the trace cache and 2841decode engine. 2842The desired operating mode is selected by 2843.Ar qualifier , 2844which is a list of the following strings separated by 2845.Li "+" 2846characters: 2847.Bl -tag -width indent -compact 2848.It Li DD 2849Both logical processors are in deliver mode. 2850.It Li DB 2851Logical processor 0 is in deliver mode while logical processor 1 is in 2852build mode. 2853.It Li DI 2854Logical processor 0 is in deliver mode while logical processor 1 is 2855halted, or in machine clear, or transitioning to a long microcode 2856flow. 2857.It Li BD 2858Logical processor 0 is in build mode while logical processor 1 is in 2859deliver mode. 2860.It Li BB 2861Both logical processors are in build mode. 2862.It Li BI 2863Logical processor 0 is in build mode while logical processor 1 is 2864halted, or in machine clear or transitioning to a long microcode 2865flow. 2866.It Li ID 2867Logical processor 0 is halted, or in machine clear or transitioning to 2868a long microcode flow while logical processor 1 is in deliver mode. 2869.It Li IB 2870Logical processor 0 is halted, or in machine clear or transitioning to 2871a long microcode flow while logical processor 1 is in build mode. 2872.El 2873If there is only one logical processor in the processor package then 2874the qualifier for logical processor 1 is ignored. 2875If no qualifier is specified, the default qualifier is 2876.Li DD+DB+DI+BD+BB+BI+ID+IB . 2877.It Li p4-tc-ms-xfer Op Li ,mask= Ns Ar flags 2878.Pq "TI event" 2879Count the number of times uop delivery changed from the trace cache to 2880MS ROM. 2881Qualifier 2882.Ar flags 2883can take the following value (which is also the default): 2884.Bl -tag -width indent -compact 2885.It Li cisc 2886Count TC to MS transfers. 2887.El 2888.It Li p4-uop-queue-writes Op Li ,mask= Ns Ar flags 2889.Pq "TS event" 2890Count the number of valid uops written to the uop queue. 2891Qualifier 2892.Ar flags 2893is a list of the following strings, separated by 2894.Li + 2895characters: 2896.Bl -tag -width indent -compact 2897.It Li from-tc-build 2898Count uops being written from the trace cache in build mode. 2899.It Li from-tc-deliver 2900Count uops being written from the trace cache in deliver mode. 2901.It Li from-rom 2902Count uops being written from microcode ROM. 2903.El 2904The default qualifier counts all the above kinds of uops. 2905.It Li p4-uop-type Op Li ,mask= Ns Ar flags 2906.Pq "TS event" 2907This event is used in conjunction with the front-end at-retirement 2908mechanism to tag load and store uops. 2909Qualifer 2910.Ar flags 2911comprises the following strings separated by 2912.Li + 2913characters: 2914.Bl -tag -width indent -compact 2915.It Li tagloads 2916Mark uops that are load operations. 2917.It Li tagstores 2918Mark uops that are store operations. 2919.El 2920The default qualifier counts both kinds of uops. 2921.It Li p4-uops-retired Op Li ,mask= Ns Ar flags 2922.Pq "TS event" 2923Count uops retired during a clock cycle. 2924Qualifier 2925.Ar flags 2926comprises the following strings separated by 2927.Li + 2928characters: 2929.Bl -tag -width indent -compact 2930.It Li nbogus 2931Count marked uops that are not bogus. 2932.It Li bogus 2933Count marked uops that are bogus. 2934.El 2935The default qualifier counts both kinds of uops. 2936.It Li p4-wc-buffer Op Li ,mask= Ns Ar flags 2937.Pq "TI event" 2938Count write-combining buffer operations. 2939Qualifier 2940.Ar flags 2941contains the following strings separated by 2942.Li + 2943characters: 2944.Bl -tag -width indent -compact 2945.It Li wcb-evicts 2946WC buffer evictions due to any cause. 2947.It Li wcb-full-evict 2948WC buffer evictions due to no WC buffer being available. 2949.El 2950The default qualifer counts both kinds of evictions. 2951.It Li p4-x87-assist Op Li ,mask= Ns Ar flags 2952.Pq "TS event" 2953Count the retirement of x87 instructions that required special 2954handling. 2955Qualifier 2956.Ar flags 2957contains the following strings separated by 2958.Li + 2959characters: 2960.Bl -tag -width indent -compact 2961.It Li fpsu 2962Count instructions that saw an FP stack underflow. 2963.It Li fpso 2964Count instructions that saw an FP stack overflow. 2965.It Li poao 2966Count instructions that saw an x87 output overflow. 2967.It Li poau 2968Count instructions that saw an x87 output underflow. 2969.It Li prea 2970Count instructions that needed an x87 input assist. 2971.El 2972The default qualifier counts all the above types of instruction 2973retirements. 2974.It Li p4-x87-fp-uop Op Li ,mask= Ns Ar flags 2975.Pq "TI event" 2976Count x87 floating-point uops. 2977Qualifier 2978.Ar flags 2979can take the following value (which is also the default): 2980.Bl -tag -width indent -compact 2981.It Li all 2982Count all x87 floating-point uops. 2983.El 2984If an instruction contains more than one x87 floating-point uops, then 2985all x87 floating-point uops will be counted. 2986This event does not count x87 floating-point data movement operations. 2987.It Li p4-x87-simd-moves-uop Op Li ,mask= Ns Ar flags 2988.Pq "TI event" 2989Count each x87 FPU, MMX, SSE, or SSE2 uops that load data or store 2990data or perform register-to-register moves. 2991This event does not count integer move uops. 2992Qualifier 2993.Ar flags 2994may contain the following keywords separated by 2995.Li + 2996characters: 2997.Bl -tag -width indent -compact 2998.It Li allp0 2999Count all x87 and SIMD store and move uops. 3000.It Li allp2 3001Count all x87 and SIMD load uops. 3002.El 3003The default is to count all uops. 3004.Pq Errata 3005This event may be affected by processor errata N43. 3006.El 3007.Ss "Cascading P4 PMCs" 3008To be filled in. 3009.Ss "Precise Event Based Sampling" 3010To be filled in. 3011.Sh IMPLEMENTATION NOTES 3012On the i386 architecture, 3013.Fx 3014has historically allowed the use of the RDTSC instruction from 3015user-mode (i.e., at a processor CPL of 3) by any process. 3016This behaviour is preserved by 3017.Xr hwpmc 4 . 3018.Sh RETURN VALUES 3019The 3020.Fn pmc_name_of_capability , 3021.Fn pmc_name_of_class , 3022.Fn pmc_name_of_cputype , 3023.Fn pmc_name_of_disposition , 3024.Fn pmc_name_of_event , 3025.Fn pmc_name_of_mode , 3026and 3027.Fn pmc_name_of_state 3028functions return a pointer to the human readable form of their argument. 3029These pointers may point to statically allocated storage and must 3030not be passed to 3031.Fn free . 3032In case of an error, these functions return 3033.Li NULL 3034and set the global variable 3035.Va errno . 3036.Pp 3037The functions 3038.Fn pmc_ncpu 3039and 3040.Fn pmc_npmc 3041return the number of CPUs and number of PMCs configured respectively; 3042in case of an error they return the value 3043.Li -1 3044and set the global variable 3045.Va errno . 3046.Pp 3047All other functions return the value 3048.Li 0 3049if successful; otherwise the value 3050.Li -1 3051is returned and the global variable 3052.Va errno 3053is set to indicate the error. 3054.Sh ERRORS 3055A call to 3056.Fn pmc_init 3057may fail with the following errors in addition to those returned by 3058.Xr modfind 2 , 3059.Xr modstat 2 3060and 3061.Xr hwpmc 4 : 3062.Bl -tag -width Er 3063.It Bq Er ENXIO 3064An unknown CPU type was encountered during initialization. 3065.It Bq Er EPROGMISMATCH 3066The version number of the 3067.Xr hwpmc 4 3068kernel module did not match that compiled into the 3069.Xr pmc 3 3070library. 3071.El 3072.Pp 3073A call to 3074.Fn pmc_capabilities , 3075.Fn pmc_name_of_capability , 3076.Fn pmc_name_of_disposition , 3077.Fn pmc_name_of_state , 3078.Fn pmc_name_of_event , 3079.Fn pmc_name_of_mode 3080.Fn pmc_name_of_class 3081and 3082.Fn pmc_width 3083may fail with the following error: 3084.Bl -tag -width Er 3085.It Bq Er EINVAL 3086An invalid argument was passed to the function. 3087.El 3088.Pp 3089A call to 3090.Fn pmc_cpuinfo 3091or 3092.Fn pmc_ncpu 3093may fail with the following error: 3094.Bl -tag -width Er 3095.It Bq Er ENXIO 3096The 3097.Xr pmc 3 3098has not been initialized. 3099.El 3100.Pp 3101A call to 3102.Fn pmc_npmc 3103may fail with the following errors: 3104.Bl -tag -width Er 3105.It Bq Er EINVAL 3106The argument passed in was out of range. 3107.It Bq Er ENXIO 3108The 3109.Xr pmc 3 3110library has not been initialized. 3111.El 3112.Pp 3113A call to 3114.Fn pmc_pmcinfo 3115may fail with the following errors, in addition to those returned by 3116.Xr hwpmc 4 : 3117.Bl -tag -width Er 3118.It Bq Er ENXIO 3119The 3120.Xr pmc 3 3121library is not yet initialized. 3122.El 3123.Pp 3124A call to 3125.Fn pmc_allocate 3126may fail with the following errors, in addition to those returned by 3127.Xr hwpmc 4 : 3128.Bl -tag -width Er 3129.It Bq Er EINVAL 3130The 3131.Fa mode 3132argument passed in had an illegal value, or the event specification 3133.Fa ctrspec 3134was unrecognized for this cpu type. 3135.El 3136.Pp 3137Calls to 3138.Fn pmc_attach , 3139.Fn pmc_configure_logfile , 3140.Fn pmc_detach , 3141.Fn pmc_disable , 3142.Fn pmc_enable , 3143.Fn pmc_get_driver_stats , 3144.Fn pmc_get_msr , 3145.Fn pmc_read , 3146.Fn pmc_release , 3147.Fn pmc_rw , 3148.Fn pmc_set , 3149.Fn pmc_start , 3150.Fn pmc_stop , 3151.Fn pmc_write , 3152and 3153.Fn pmc_writelog 3154may fail with the errors described in 3155.Xr hwpmc 4 . 3156.Pp 3157If a log file was configured using 3158.Fn pmc_configure_logfile 3159and the 3160.Xr hwpmc 4 3161driver encountered an error while logging data to it, then 3162logging will be stopped and a subsequent call to 3163.Fn pmc_flush_logfile 3164will fail with the error code seen by the 3165.Xr hwpmc 4 3166driver. 3167.Sh SEE ALSO 3168.Xr modfind 2 , 3169.Xr modstat 2 , 3170.Xr calloc 3 , 3171.Xr pmclog 3 , 3172.Xr hwpmc 4 , 3173.Xr pmccontrol 8 , 3174.Xr pmcreport 8 , 3175.Xr pmcstat 8 3176.Sh BUGS 3177The information returned by 3178.Fn pmc_cpuinfo , 3179.Fn pmc_ncpu 3180and possibly 3181.Fn pmc_npmc 3182should really be available all the time, through a better designed 3183interface and not just when 3184.Xr hwpmc 4 3185is present in the kernel. 3186