1.\" Copyright (c) 2003-2005 Joseph Koshy. All rights reserved. 2.\" 3.\" Redistribution and use in source and binary forms, with or without 4.\" modification, are permitted provided that the following conditions 5.\" are met: 6.\" 1. Redistributions of source code must retain the above copyright 7.\" notice, this list of conditions and the following disclaimer. 8.\" 2. Redistributions in binary form must reproduce the above copyright 9.\" notice, this list of conditions and the following disclaimer in the 10.\" documentation and/or other materials provided with the distribution. 11.\" 12.\" This software is provided by Joseph Koshy ``as is'' and 13.\" any express or implied warranties, including, but not limited to, the 14.\" implied warranties of merchantability and fitness for a particular purpose 15.\" are disclaimed. in no event shall Joseph Koshy be liable 16.\" for any direct, indirect, incidental, special, exemplary, or consequential 17.\" damages (including, but not limited to, procurement of substitute goods 18.\" or services; loss of use, data, or profits; or business interruption) 19.\" however caused and on any theory of liability, whether in contract, strict 20.\" liability, or tort (including negligence or otherwise) arising in any way 21.\" out of the use of this software, even if advised of the possibility of 22.\" such damage. 23.\" 24.\" $FreeBSD$ 25.\" 26.Dd June 24, 2005 27.Os 28.Dt PMC 3 29.Sh NAME 30.Nm pmc_allocate , 31.Nm pmc_attach , 32.Nm pmc_capabilities , 33.Nm pmc_configure_logfile , 34.Nm pmc_cpuinfo , 35.Nm pmc_detach , 36.Nm pmc_disable , 37.Nm pmc_enable , 38.Nm pmc_event_names_of_class , 39.Nm pmc_flush_logfile , 40.Nm pmc_get_driver_stats , 41.Nm pmc_get_msr , 42.Nm pmc_init , 43.Nm pmc_name_of_capability , 44.Nm pmc_name_of_class , 45.Nm pmc_name_of_cputype , 46.Nm pmc_name_of_event , 47.Nm pmc_name_of_mode , 48.Nm pmc_name_of_state , 49.Nm pmc_ncpu , 50.Nm pmc_npmc , 51.Nm pmc_pmcinfo , 52.Nm pmc_read , 53.Nm pmc_release , 54.Nm pmc_rw , 55.Nm pmc_set , 56.Nm pmc_start , 57.Nm pmc_stop , 58.Nm pmc_width , 59.Nm pmc_write , 60.Nm pmc_writelog 61.Nd programming API for using hardware performance monitoring counters 62.Sh LIBRARY 63.Lb libpmc 64.Sh SYNOPSIS 65.In pmc.h 66.Ft int 67.Fo pmc_allocate 68.Fa "const char *eventspecifier" 69.Fa "enum pmc_mode mode" 70.Fa "uint32_t flags" 71.Fa "uint32_t cpu" 72.Fa "pmc_id_t *pmcid" 73.Fc 74.Ft int 75.Fn pmc_attach "pmc_id_t pmcid" "pid_t pid" 76.Ft int 77.Fn pmc_capabilities "pmc_id_t pmc" "uint32_t *caps" 78.Ft int 79.Fn pmc_configure_logfile "int fd" 80.Ft int 81.Fn pmc_cpuinfo "const struct pmc_cpuinfo **cpu_info" 82.Ft int 83.Fn pmc_detach "pmc_id_t pmcid" "pid_t pid" 84.Ft int 85.Fn pmc_disable "uint32_t cpu" "int pmc" 86.Ft int 87.Fn pmc_enable "uint32_t cpu" "int pmc" 88.Ft int 89.Fo pmc_event_names_of_class 90.Fa "enum pmc_class cl" 91.Fa "const char ***eventnames" 92.Fa "int *nevents" 93.Fc 94.Ft int 95.Fn pmc_flush_logfile void 96.Ft int 97.Fn pmc_get_driver_stats "struct pmc_driverstats *gms" 98.Ft int 99.Fn pmc_get_msr "pmc_id_t pmc" "uint32_t *msr" 100.Ft int 101.Fn pmc_init void 102.Ft "const char *" 103.Fn pmc_name_of_capability "enum pmc_caps pc" 104.Ft "const char *" 105.Fn pmc_name_of_class "enum pmc_class pc" 106.Ft "const char *" 107.Fn pmc_name_of_cputype "enum pmc_cputype ct" 108.Ft "const char *" 109.Fn pmc_name_of_disposition "enum pmc_disp pd" 110.Ft "const char *" 111.Fn pmc_name_of_event "enum pmc_event pe" 112.Ft "const char *" 113.Fn pmc_name_of_mode "enum pmc_mode pm" 114.Ft "const char *" 115.Fn pmc_name_of_state "enum pmc_state ps" 116.Ft int 117.Fn pmc_ncpu void 118.Ft int 119.Fn pmc_npmc "uint32_t cpu" 120.Ft int 121.Fn pmc_pmcinfo "uint32_t cpu" "struct pmc_pmcinfo **pmc_info" 122.Ft int 123.Fn pmc_read "pmc_id_t pmc" "pmc_value_t *value" 124.Ft int 125.Fn pmc_release "pmc_id_t pmc" 126.Ft int 127.Fn pmc_rw "pmc_id_t pmc" "pmc_value_t newvalue" "pmc_value_t *oldvaluep" 128.Ft int 129.Fn pmc_set "pmc_id_t pmc" "pmc_value_t value" 130.Ft int 131.Fn pmc_start "pmc_id_t pmc" 132.Ft int 133.Fn pmc_stop "pmc_id_t pmc" 134.Ft int 135.Fn pmc_write "pmc_id_t pmc" "pmc_value_t value" 136.Ft int 137.Fn pmc_writelog "uint32_t userdata" 138.Ft int 139.Fn pmc_width "pmc_id_t pmc" "uint32_t *width" 140.Sh DESCRIPTION 141These functions implement a high-level library for using the 142system's hardware performance counters. 143.Pp 144PMCs are allocated using 145.Fn pmc_allocate , 146released using 147.Fn pmc_release 148and read using 149.Fn pmc_read . 150Allocated PMCs may be started or stopped at any time using 151.Fn pmc_start 152and 153.Fn pmc_stop 154respectively. 155An allocated PMC may be of 156.Dq global 157scope, meaning that the PMC measures system-wide events, or 158.Dq process-private 159scope, meaning that the PMC only counts hardware events when 160the allocating process (or, optionally, its children) 161are active. 162.Pp 163PMCs may further be in 164.Dq "counting mode" , 165or in 166.Dq "sampling mode" . 167Sampling mode PMCs deliver an interrupt to the CPU after 168a configured number of hardware events have been seen. 169A process-private sampling mode PMC will cause its owner 170process to get periodic 171.Dv SIGPROF 172interrupts, while a global sampling mode PMC is used to 173do system-wide statistical sampling (see 174.Xr hwpmc 4 ) . 175The sampling rate desired of a sampling-mode PMC is set using 176.Fn pmc_set . 177Counting mode PMCs do not interrupt the CPU; their values 178can be read using 179.Fn pmc_read . 180.Pp 181System-wide statistical sampling is configured by allocating 182at least one sampling mode PMC with 183global scope, and when a log file is configured using 184.Fn pmc_configure_logfile . 185The 186.Xr hwpmc 4 187driver manages system-wide statistical sampling; for more 188information please see 189.Xr hwpmc 4 . 190.Ss Application Programming Interface 191The function 192.Fn pmc_init 193initializes the 194.Nm pmc 195library. 196This function must be called first, before any of the other 197functions in the library. 198.Pp 199The function 200.Fn pmc_allocate 201allocates a counter that counts the events named by 202.Fa eventspecifier , 203and writes the allocated counter ID to 204.Fa *pmcid . 205Argument 206.Fa eventspecifier 207comprises an PMC event name followed by an optional comma separated 208list of keywords and qualifiers. 209The allowed syntax for 210.Fa eventspecifier 211is processor architecture specific and is listed in section 212.Sx "EVENT SPECIFIERS" 213below. 214The desired PMC mode is specified by 215.Fa mode , 216and any mode specific modifiers are specified using 217.Fa flags . 218The 219.Fa cpu 220argument is the value 221.Dv PMC_CPU_ANY , 222or names the CPU the allocation is to be on. 223Requesting a specific CPU only makes sense for global PMCs; 224process-private PMC allocations should always specify 225.Dv PMC_CPU_ANY . 226.Pp 227By default, a PMC configured in process-virtual counting mode is set up 228to profile its owner process. 229The function 230.Fn pmc_attach 231may be used to attach the PMC to a different process. 232It 233needs to be called before the counter is first started 234with 235.Fn pmc_start . 236The function 237.Fn pmc_detach 238may be used to detach a PMC from a process it was attached to 239using a prior call to 240.Fn pmc_attach . 241.Pp 242The function 243.Fn pmc_release 244releases a PMC previously allocated with 245.Fn pmc_allocate . 246This function call implicitly detaches the PMC from all its target 247processes. 248.Pp 249An allocated PMC may be started and stopped using 250.Fn pmc_start 251and 252.Fn pmc_stop 253respectively. 254.Pp 255The current value of a PMC may be read with 256.Fn pmc_read 257and written using 258.Fn pmc_write , 259provided the underlying hardware supports these operations on 260the allocated PMC. 261The read and write operation may be combined using 262.Fn pmc_rw . 263.Pp 264The function 265.Fn pmc_capabilities 266sets argument 267.Fa caps 268to a bitmask of capabilities supported by the PMC denoted by 269argument 270.Fa pmc . 271The function 272.Fn pmc_width 273sets argument 274.Fa width 275to the width of the PMC denoted by argument 276.Fa pmc . 277.Pp 278The 279.Fn pmc_configure_logfile 280function causes the 281.Xr hwpmc 4 282driver to log performance data to file corresponding 283to the process' file handle 284.Fa fd . 285If argument 286.Fa fd 287is \-1, then any previously configured logging is reset 288and all data queued to be written are discarded. 289.Pp 290The 291.Fn pmc_flush_logfile 292function will send all data queued inside the 293.Xr hwpmc 4 294driver to the configured log file before returning. 295The 296.Fn pmc_writelog 297function will append a log entry containing the argument 298.Fa userdata 299to the log file. 300.Pp 301The function 302.Fn pmc_set 303configures a sampling PMC 304.Fa pmc 305to interrupt every 306.Fa value 307events. 308For counting PMCs, 309.Fn pmc_set 310sets the initial value of the PMC to 311.Fa value . 312.Pp 313The function 314.Fn pmc_get_driver_statistics 315copies a snapshot of the usage statistics maintained by 316.Xr hwpmc 4 317into the memory area pointed to by argument 318.Fa gms . 319.Ss Signal Handling Requirements 320Applications using PMCs are required to handle the following signals: 321.Bl -tag -width indent 322.It Dv SIGBUS 323When the 324.Xr hwpmc 4 325module is unloaded using 326.Xr kldunload 8 , 327processes that have PMCs allocated to them will be sent a 328.Dv SIGBUS 329signal. 330.It Dv SIGIO 331The 332.Xr hwpmc 4 333driver will send a PMC owning process a 334.Dv SIGIO 335signal if: 336.Bl -bullet 337.It 338If any process-mode PMC allocated by it loses all its 339target processes. 340.It 341If the driver encounters an error when writing log data to a 342configured log file. 343This error may be retrieved by a subsequent call to 344.Fn pmc_flush_logfile . 345.El 346.El 347.Ss Convenience Functions 348The function 349.Fn pmc_ncpu 350returns the number of CPUs present in the system. 351.Pp 352The function 353.Fn pmc_npmc 354returns the number of PMCs supported on CPU 355.Fa cpu . 356The function 357.Fn pmc_cpuinfo 358sets argument 359.Fa cpu_info 360to point to a structure with information about the system's CPUs. 361Function 362.Fn pmc_pmcinfo 363returns information about the current state of CPU 364.Fa cpu Ns 's 365PMCs. 366This function sets argument 367.Fa *pmc_info 368to point to a memory area allocated with 369.Xr calloc 3 . 370The caller is expected to 371.Fn free 372the area when done. 373.Pp 374The functions 375.Fn pmc_name_of_capability , 376.Fn pmc_name_of_class , 377.Fn pmc_name_of_cputype , 378.Fn pmc_name_of_disposition , 379.Fn pmc_name_of_event , 380.Fn pmc_name_of_mode 381and 382.Fn pmc_name_of_state 383are useful for code wanting to print error messages. 384They return 385.Vt "const char *" 386pointers to human-readable representations of their arguments. 387These return values should not be freed using 388.Xr free 3 . 389.Pp 390The function 391.Fn pmc_event_names_of_class 392returns a list of event names supported by a given PMC class 393.Fa cl . 394On successful return, an array of 395.Vt "const char *" 396pointers to the names of valid events supported by class 397.Fa cl 398is allocated by the library using 399.Xr malloc 3 , 400and a pointer to this array is returned in the location pointed to by 401.Fa eventnames . 402The number of pointers allocated is returned in the location pointed 403to by 404.Fa nevents . 405.Ss Administration 406Individual PMCs may be enabled or disabled on a given CPU using 407.Fn pmc_enable 408and 409.Fn pmc_disable 410respectively. 411For these functions, 412.Fa cpu 413is the CPU number, and 414.Fa pmc 415is the index of the PMC to be operated on. 416Only the super-user is allowed to enable and disable PMCs. 417.Ss x86 Architecture Specific API 418The 419.Fn pmc_get_msr 420function returns the processor model specific register number 421associated with 422.Fa pmc . 423Applications may use the x86 424.Ic RDPMC 425instruction to directly read the contents of the PMC. 426.Sh EVENT SPECIFIERS 427Event specifiers are strings comprising of an event name, followed by 428optional parameters modifying the semantics of the hardware event 429being probed. 430Event names are PMC architecture dependent, but the 431.Xr hwpmc 4 432library defines machine independent aliases for commonly used 433events. 434.Ss Event Name Aliases 435Event name aliases are CPU architecture independent names for commonly 436used events. 437The following aliases are known to this version of the 438.Nm pmc 439library: 440.Bl -tag -width indent 441.It Li branches 442Measure the number of branches retired. 443.It Li branch-mispredicts 444Measure the number of retired branches that were mispredicted. 445.It Li cycles 446Measure processor cycles. 447This event is implemented using the processor's Time Stamp Counter 448register. 449.It Li dc-misses 450Measure the number of data cache misses. 451.It Li ic-misses 452Measure the number of instruction cache misses. 453.It Li instructions 454Measure the number of instructions retired. 455.It Li interrupts 456Measure the number of interrupts seen. 457.El 458.Ss Time Stamp Counter (TSC) 459The timestamp counter is a monotonically non-decreasing counter that 460counts processor cycles. 461.Pp 462In the i386 architecture, this counter may 463be selected by requesting an event with event specifier 464.Dq Li tsc . 465The 466.Dq Li tsc 467event does not support any further qualifiers. 468It can only be allocated in system-wide counting mode, 469and is a read-only counter. 470Multiple processes are allowed to allocate the TSC. 471Once allocated, it may be read using the 472.Fn pmc_read 473function, or by using the RDTSC instruction. 474.Ss AMD (K7) PMCs 475These PMCs are present in the 476.Tn "AMD Athlon" 477series of CPUs and are documented in: 478.Rs 479.%B "AMD Athlon Processor x86 Code Optimization Guide" 480.%N "Publication No. 22007" 481.%D "February 2002" 482.%Q "Advanced Micro Devices, Inc." 483.Re 484.Pp 485Event specifiers for AMD K7 PMCs can have the following optional 486qualifiers: 487.Bl -tag -width indent 488.It Li count= Ns Ar value 489Configure the counter to increment only if the number of configured 490events measured in a cycle is greater than or equal to 491.Ar value . 492.It Li edge 493Configure the counter to only count negated-to-asserted transitions 494of the conditions expressed by the other qualifiers. 495In other words, the counter will increment only once whenever a given 496condition becomes true, irrespective of the number of clocks during 497which the condition remains true. 498.It Li inv 499Invert the sense of comparision when the 500.Dq Li count 501qualifier is present, making the counter to increment when the 502number of events per cycle is less than the value specified by 503the 504.Dq Li count 505qualifier. 506.It Li os 507Configure the PMC to count events happening at privilege level 0. 508.It Li unitmask= Ns Ar mask 509This qualifier is used to further qualify a select few events, 510.Dq Li k7-dc-refills-from-l2 , 511.Dq Li k7-dc-refills-from-system 512and 513.Dq Li k7-dc-writebacks . 514Here 515.Ar mask 516is a string of the following characters optionally separated by 517.Ql + 518characters: 519.Pp 520.Bl -tag -width indent -compact 521.It Li m 522Count operations for lines in the 523.Dq Modified 524state. 525.It Li o 526Count operations for lines in the 527.Dq Owner 528state. 529.It Li e 530Count operations for lines in the 531.Dq Exclusive 532state. 533.It Li s 534Count operations for lines in the 535.Dq Shared 536state. 537.It Li i 538Count operations for lines in the 539.Dq Invalid 540state. 541.El 542.Pp 543If no 544.Dq Li unitmask 545qualifier is specified, the default is to count events for caches 546lines in any of the above states. 547.It Li usr 548Configure the PMC to count events occurring at privilege levels 1, 2 549or 3. 550.El 551.Pp 552If neither of the 553.Dq Li os 554or 555.Dq Li usr 556qualifiers were specified, the default is to enable both. 557.Pp 558The event specifiers supported on AMD K7 PMCs are: 559.Bl -tag -width indent 560.It Li k7-dc-accesses 561Count data cache accesses. 562.It Li k7-dc-misses 563Count data cache misses. 564.It Li k7-dc-refills-from-l2 Op Li ,unitmask= Ns Ar mask 565Count data cache refills from L2 cache. 566This event may be further qualified using the 567.Dq Li unitmask 568qualifier. 569.It Li k7-dc-refills-from-system Op Li ,unitmask= Ns Ar mask 570Count data cache refills from system memory. 571This event may be further qualified using the 572.Dq Li unitmask 573qualifier. 574.It Li k7-dc-writebacks Op Li ,unitmask= Ns Ar mask 575Count data cache writebacks. 576This event may be further qualified using the 577.Dq Li unitmask 578qualifier. 579.It Li k7-l1-dtlb-miss-and-l2-dtlb-hits 580Count L1 DTLB misses and L2 DTLB hits. 581.It Li k7-l1-and-l2-dtlb-misses 582Count L1 and L2 DTLB misses. 583.It Li k7-misaligned-references 584Count misaligned data references. 585.It Li k7-ic-fetches 586Count instruction cache fetches. 587.It Li k7-ic-misses 588Count instruction cache misses. 589.It Li k7-l1-itlb-misses 590Count L1 ITLB misses that are L2 ITLB hits. 591.It Li k7-l1-l2-itlb-misses 592Count L1 (and L2) ITLB misses. 593.It Li k7-retired-instructions 594Count all retired instructions. 595.It Li k7-retired-ops 596Count retired ops. 597.It Li k7-retired-branches 598Count all retired branches (conditional, unconditional, exceptions 599and interrupts). 600.It Li k7-retired-branches-mispredicted 601Count all misprediced retired branches. 602.It Li k7-retired-taken-branches 603Count retired taken branches. 604.It Li k7-retired-taken-branches-mispredicted 605Count mispredicted taken branches that were retired. 606.It Li k7-retired-far-control-transfers 607Count retired far control transfers. 608.It Li k7-retired-resync-branches 609Count retired resync branches (non control transfer branches). 610.It Li k7-interrupts-masked-cycles 611Count the number of cycles when the processor's 612.Va IF 613flag was zero. 614.It Li k7-interrupts-masked-while-pending-cycles 615Count the number of cycles interrupts were masked while pending due 616to the processor's 617.Va IF 618flag being zero. 619.It Li k7-hardware-interrupts 620Count the number of taken hardware interrupts. 621.El 622.Ss AMD (K8) PMCs 623These PMCs are present in the 624.Tn "AMD Athlon64" 625and 626.Tn "AMD Opteron" 627series of CPUs. 628They are documented in: 629.Rs 630.%B "BIOS and Kernel Developer's Guide for the AMD Athlon(tm) 64 and AMD Opteron Processors" 631.%N "Publication No. 26094" 632.%D "April 2004" 633.%Q "Advanced Micro Devices, Inc." 634.Re 635.Pp 636Event specifiers for AMD K8 PMCs can have the following optional 637qualifiers: 638.Bl -tag -width indent 639.It Li count= Ns Ar value 640Configure the counter to increment only if the number of configured 641events measured in a cycle is greater than or equal to 642.Ar value . 643.It Li edge 644Configure the counter to only count negated-to-asserted transitions 645of the conditions expressed by the other fields. 646In other words, the counter will increment only once whenever a given 647condition becomes true, irrespective of the number of clocks during 648which the condition remains true. 649.It Li inv 650Invert the sense of comparision when the 651.Dq Li count 652qualifier is present, making the counter to increment when the 653number of events per cycle is less than the value specified by 654the 655.Dq Li count 656qualifier. 657.It Li mask= Ns Ar qualifier 658Many event specifiers for AMD K8 PMCs need to be additionally 659qualified using a mask qualifier. 660These additional qualifiers are event-specific and are documented 661along with their associated event specifiers below. 662.It Li os 663Configure the PMC to count events happening at privilege level 0. 664.It Li usr 665Configure the PMC to count events occurring at privilege levels 1, 2 666or 3. 667.El 668.Pp 669If neither of the 670.Dq Li os 671or 672.Dq Li usr 673qualifiers were specified, the default is to enable both. 674.Pp 675The event specifiers supported on AMD K8 PMCs are: 676.Bl -tag -width indent 677.It Li k8-bu-cpu-clk-unhalted 678Count the number of clock cycles when the CPU is not in the HLT or 679STPCLK states. 680.It Li k8-bu-fill-request-l2-miss Op Li ,mask= Ns Ar qualifier 681Count fill requests that missed in the L2 cache. 682This event may be further qualified using 683.Ar qualifier , 684which is a 685.Ql + 686separated set of the following keywords: 687.Pp 688.Bl -tag -width indent -compact 689.It Li dc-fill 690Count data cache fill requests. 691.It Li ic-fill 692Count instruction cache fill requests. 693.It Li tlb-reload 694Count TLB reloads. 695.El 696.Pp 697The default is to count all types of requests. 698.It Li k8-bu-internal-l2-request Op Li ,mask= Ns Ar qualifier 699Count internally generated requests to the L2 cache. 700This event may be further qualified using 701.Ar qualifier , 702which is a 703.Ql + 704separated set of the following keywords: 705.Pp 706.Bl -tag -width indent -compact 707.It Li cancelled 708Count cancelled requests. 709.It Li dc-fill 710Count data cache fill requests. 711.It Li ic-fill 712Count instruction cache fill requests. 713.It Li tag-snoop 714Count tag snoop requests. 715.It Li tlb-reload 716Count TLB reloads. 717.El 718.Pp 719The default is to count all types of requests. 720.It Li k8-dc-access 721Count data cache accesses including microcode scratchpad accesses. 722.It Li k8-dc-copyback Op Li ,mask= Ns Ar qualifier 723Count data cache copyback operations. 724This event may be further qualified using 725.Ar qualifier , 726which is a 727.Ql + 728separated set of the following keywords: 729.Pp 730.Bl -tag -width indent -compact 731.It Li exclusive 732Count operations for lines in the 733.Dq exclusive 734state. 735.It Li invalid 736Count operations for lines in the 737.Dq invalid 738state. 739.It Li modified 740Count operations for lines in the 741.Dq modified 742state. 743.It Li owner 744Count operations for lines in the 745.Dq owner 746state. 747.It Li shared 748Count operations for lines in the 749.Dq shared 750state. 751.El 752.Pp 753The default is to count operations for lines in all the 754above states. 755.It Li k8-dc-dcache-accesses-by-locks Op Li ,mask= Ns Ar qualifier 756Count data cache accesses by lock instructions. 757This event is only available on processors of revision C or later 758vintage. 759This event may be further qualified using 760.Ar qualifier , 761which is a 762.Ql + 763separated set of the following keywords: 764.Pp 765.Bl -tag -width indent -compact 766.It Li accesses 767Count data cache accesses by lock instructions. 768.It Li misses 769Count data cache misses by lock instructions. 770.El 771.Pp 772The default is to count all accesses. 773.It Li k8-dc-dispatched-prefetch-instructions Op Li ,mask= Ns Ar qualifier 774Count the number of dispatched prefetch instructions. 775This event may be further qualified using 776.Ar qualifier , 777which is a 778.Ql + 779separated set of the following keywords: 780.Pp 781.Bl -tag -width indent -compact 782.It Li load 783Count load operations. 784.It Li nta 785Count non-temporal operations. 786.It Li store 787Count store operations. 788.El 789.Pp 790The default is to count all operations. 791.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-hit 792Count L1 DTLB misses that are L2 DTLB hits. 793.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-miss 794Count L1 DTLB misses that are also misses in the L2 DTLB. 795.It Li k8-dc-microarchitectural-early-cancel-of-an-access 796Count microarchitectural early cancels of data cache accesses. 797.It Li k8-dc-microarchitectural-late-cancel-of-an-access 798Count microarchitectural late cancels of data cache accesses. 799.It Li k8-dc-misaligned-data-reference 800Count misaligned data references. 801.It Li k8-dc-miss 802Count data cache misses. 803.It Li k8-dc-one-bit-ecc-error Op Li ,mask= Ns Ar qualifier 804Count one bit ECC errors found by the scrubber. 805This event may be further qualified using 806.Ar qualifier , 807which is a 808.Ql + 809separated set of the following keywords: 810.Pp 811.Bl -tag -width indent -compact 812.It Li scrubber 813Count scrubber detected errors. 814.It Li piggyback 815Count piggyback scrubber errors. 816.El 817.Pp 818The default is to count both kinds of errors. 819.It Li k8-dc-refill-from-l2 Op Li ,mask= Ns Ar qualifier 820Count data cache refills from L2 cache. 821This event may be further qualified using 822.Ar qualifier , 823which is a 824.Ql + 825separated set of the following keywords: 826.Pp 827.Bl -tag -width indent -compact 828.It Li exclusive 829Count operations for lines in the 830.Dq exclusive 831state. 832.It Li invalid 833Count operations for lines in the 834.Dq invalid 835state. 836.It Li modified 837Count operations for lines in the 838.Dq modified 839state. 840.It Li owner 841Count operations for lines in the 842.Dq owner 843state. 844.It Li shared 845Count operations for lines in the 846.Dq shared 847state. 848.El 849.Pp 850The default is to count operations for lines in all the 851above states. 852.It Li k8-dc-refill-from-system Op Li ,mask= Ns Ar qualifier 853Count data cache refills from system memory. 854This event may be further qualified using 855.Ar qualifier , 856which is a 857.Ql + 858separated set of the following keywords: 859.Pp 860.Bl -tag -width indent -compact 861.It Li exclusive 862Count operations for lines in the 863.Dq exclusive 864state. 865.It Li invalid 866Count operations for lines in the 867.Dq invalid 868state. 869.It Li modified 870Count operations for lines in the 871.Dq modified 872state. 873.It Li owner 874Count operations for lines in the 875.Dq owner 876state. 877.It Li shared 878Count operations for lines in the 879.Dq shared 880state. 881.El 882.Pp 883The default is to count operations for lines in all the 884above states. 885.It Li k8-fp-dispatched-fpu-ops Op Li ,mask= Ns Ar qualifier 886Count the number of dispatched FPU ops. 887This event is supported in revision B and later CPUs. 888This event may be further qualified using 889.Ar qualifier , 890which is a 891.Ql + 892separated set of the following keywords: 893.Pp 894.Bl -tag -width indent -compact 895.It Li add-pipe-excluding-junk-ops 896Count add pipe ops excluding junk ops. 897.It Li add-pipe-junk-ops 898Count junk ops in the add pipe. 899.It Li multiply-pipe-excluding-junk-ops 900Count multiply pipe ops excluding junk ops. 901.It Li multiply-pipe-junk-ops 902Count junk ops in the multiply pipe. 903.It Li store-pipe-excluding-junk-ops 904Count store pipe ops excluding junk ops 905.It Li store-pipe-junk-ops 906Count junk ops in the store pipe. 907.El 908.Pp 909The default is to count all types of ops. 910.It Li k8-fp-cycles-with-no-fpu-ops-retired 911Count cycles when no FPU ops were retired. 912This event is supported in revision B and later CPUs. 913.It Li k8-fp-dispatched-fpu-fast-flag-ops 914Count dispatched FPU ops that use the fast flag interface. 915This event is supported in revision B and later CPUs. 916.It Li k8-fr-decoder-empty 917Count cycles when there was nothing to dispatch (i.e., the decoder 918was empty). 919.It Li k8-fr-dispatch-stalls 920Count all dispatch stalls. 921.It Li k8-fr-dispatch-stall-for-segment-load 922Count dispatch stalls for segment loads. 923.It Li k8-fr-dispatch-stall-for-serialization 924Count dispatch stalls for serialization. 925.It Li k8-fr-dispatch-stall-from-branch-abort-to-retire 926Count dispatch stalls from branch abort to retiral. 927.It Li k8-fr-dispatch-stall-when-fpu-is-full 928Count dispatch stalls when the FPU is full. 929.It Li k8-fr-dispatch-stall-when-ls-is-full 930Count dispatch stalls when the load/store unit is full. 931.It Li k8-fr-dispatch-stall-when-reorder-buffer-is-full 932Count dispatch stalls when the reorder buffer is full. 933.It Li k8-fr-dispatch-stall-when-reservation-stations-are-full 934Count dispatch stalls when reservation stations are full. 935.It Li k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet 936Count dispatch stalls when waiting for all to be quiet. 937.\" XXX What does "waiting for all to be quiet" mean? 938.It Li k8-fr-dispatch-stall-when-waiting-far-xfer-or-resync-branch-pending 939Count dispatch stalls when a far control transfer or a resync branch 940is pending. 941.It Li k8-fr-fpu-exceptions Op Li ,mask= Ns Ar qualifier 942Count FPU exceptions. 943This event is supported in revision B and later CPUs. 944This event may be further qualified using 945.Ar qualifier , 946which is a 947.Ql + 948separated set of the following keywords: 949.Pp 950.Bl -tag -width indent -compact 951.It Li sse-and-x87-microtraps 952Count SSE and x87 microtraps. 953.It Li sse-reclass-microfaults 954Count SSE reclass microfaults 955.It Li sse-retype-microfaults 956Count SSE retype microfaults 957.It Li x87-reclass-microfaults 958Count x87 reclass microfaults. 959.El 960.Pp 961The default is to count all types of exceptions. 962.It Li k8-fr-interrupts-masked-cycles 963Count cycles when interrupts were masked (by CPU RFLAGS field IF was zero). 964.It Li k8-fr-interrupts-masked-while-pending-cycles 965Count cycles while interrupts were masked while pending (i.e., cycles 966when INTR was asserted while CPU RFLAGS field IF was zero). 967.It Li k8-fr-number-of-breakpoints-for-dr0 968Count the number of breakpoints for DR0. 969.It Li k8-fr-number-of-breakpoints-for-dr1 970Count the number of breakpoints for DR1. 971.It Li k8-fr-number-of-breakpoints-for-dr2 972Count the number of breakpoints for DR2. 973.It Li k8-fr-number-of-breakpoints-for-dr3 974Count the number of breakpoints for DR3. 975.It Li k8-fr-retired-branches 976Count retired branches including exceptions and interrupts. 977.It Li k8-fr-retired-branches-mispredicted 978Count mispredicted retired branches. 979.It Li k8-fr-retired-far-control-transfers 980Count retired far control transfers (which are always mispredicted). 981.It Li k8-fr-retired-fastpath-double-op-instructions Op Li ,mask= Ns Ar qualifier 982Count retired fastpath double op instructions. 983This event is supported in revision B and later CPUs. 984This event may be further qualified using 985.Ar qualifier , 986which is a 987.Ql + 988separated set of the following keywords: 989.Pp 990.Bl -tag -width indent -compact 991.It Li low-op-pos-0 992Count instructions with the low op in position 0. 993.It Li low-op-pos-1 994Count instructions with the low op in position 1. 995.It Li low-op-pos-2 996Count instructions with the low op in position 2. 997.El 998.Pp 999The default is to count all types of instructions. 1000.It Li k8-fr-retired-fpu-instructions Op Li ,mask= Ns Ar qualifier 1001Count retired FPU instructions. 1002This event is supported in revision B and later CPUs. 1003This event may be further qualified using 1004.Ar qualifier , 1005which is a 1006.Ql + 1007separated set of the following keywords: 1008.Pp 1009.Bl -tag -width indent -compact 1010.It Li mmx-3dnow 1011Count MMX and 3DNow!\& instructions. 1012.It Li packed-sse-sse2 1013Count packed SSE and SSE2 instructions. 1014.It Li scalar-sse-sse2 1015Count scalar SSE and SSE2 instructions 1016.It Li x87 1017Count x87 instructions. 1018.El 1019.Pp 1020The default is to count all types of instructions. 1021.It Li k8-fr-retired-near-returns 1022Count retired near returns. 1023.It Li k8-fr-retired-near-returns-mispredicted 1024Count mispredicted near returns. 1025.It Li k8-fr-retired-resyncs 1026Count retired resyncs (non-control transfer branches). 1027.It Li k8-fr-retired-taken-hardware-interrupts 1028Count retired taken hardware interrupts. 1029.It Li k8-fr-retired-taken-branches 1030Count retired taken branches. 1031.It Li k8-fr-retired-taken-branches-mispredicted 1032Count retired taken branches that were mispredicted. 1033.It Li k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare 1034Count retired taken branches that were mispredicted only due to an 1035address miscompare. 1036.It Li k8-fr-retired-uops 1037Count retired uops. 1038.It Li k8-fr-retired-x86-instructions 1039Count retired x86 instructions including exceptions and interrupts. 1040.It Li k8-ic-fetch 1041Count instruction cache fetches. 1042.It Li k8-ic-instruction-fetch-stall 1043Count cycles in stalls due to instruction fetch. 1044.It Li k8-ic-l1-itlb-miss-and-l2-itlb-hit 1045Count L1 ITLB misses that are L2 ITLB hits. 1046.It Li k8-ic-l1-itlb-miss-and-l2-itlb-miss 1047Count ITLB misses that miss in both L1 and L2 ITLBs. 1048.It Li k8-ic-microarchitectural-resync-by-snoop 1049Count microarchitectural resyncs caused by snoops. 1050.It Li k8-ic-miss 1051Count instruction cache misses. 1052.It Li k8-ic-refill-from-l2 1053Count instruction cache refills from L2 cache. 1054.It Li k8-ic-refill-from-system 1055Count instruction cache refills from system memory. 1056.It Li k8-ic-return-stack-hits 1057Count hits to the return stack. 1058.It Li k8-ic-return-stack-overflow 1059Count overflows of the return stack. 1060.It Li k8-ls-buffer2-full 1061Count load/store buffer2 full events. 1062.It Li k8-ls-locked-operation Op Li ,mask= Ns Ar qualifier 1063Count locked operations. 1064For revision C and later CPUs, the following qualifiers are supported: 1065.Pp 1066.Bl -tag -width indent -compact 1067.It Li cycles-in-request 1068Count the number of cycles in the lock request/grant stage. 1069.It Li cycles-to-complete 1070Count the number of cycles a lock takes to complete once it is 1071non-speculative and is the older load/store operation. 1072.It Li locked-instructions 1073Count the number of lock instructions executed. 1074.El 1075.Pp 1076The default is to count the number of lock instructions executed. 1077.It Li k8-ls-microarchitectural-late-cancel 1078Count microarchitectural late cancels of operations in the load/store 1079unit. 1080.It Li k8-ls-microarchitectural-resync-by-self-modifying-code 1081Count microarchitectural resyncs caused by self-modifying code. 1082.It Li k8-ls-microarchitectural-resync-by-snoop 1083Count microarchitectural resyncs caused by snoops. 1084.It Li k8-ls-retired-cflush-instructions 1085Count retired CFLUSH instructions. 1086.It Li k8-ls-retired-cpuid-instructions 1087Count retired CPUID instructions. 1088.It Li k8-ls-segment-register-load Op Li ,mask= Ns Ar qualifier 1089Count segment register loads. 1090This event may be further qualified using 1091.Ar qualifier , 1092which is a 1093.Ql + 1094separated set of the following keywords: 1095.Bl -tag -width indent -compact 1096.It Li cs 1097Count CS register loads. 1098.It Li ds 1099Count DS register loads. 1100.It Li es 1101Count ES register loads. 1102.It Li fs 1103Count FS register loads. 1104.It Li gs 1105Count GS register loads. 1106.\" .It Li hs 1107.\" Count HS register loads. 1108.\" XXX "HS" register? 1109.It Li ss 1110Count SS register loads. 1111.El 1112.Pp 1113The default is to count all types of loads. 1114.It Li k8-nb-memory-controller-bypass-saturation Op Li ,mask= Ns Ar qualifier 1115Count memory controller bypass counter saturation events. 1116This event may be further qualified using 1117.Ar qualifier , 1118which is a 1119.Ql + 1120separated set of the following keywords: 1121.Pp 1122.Bl -tag -width indent -compact 1123.It Li dram-controller-interface-bypass 1124Count DRAM controller interface bypass. 1125.It Li dram-controller-queue-bypass 1126Count DRAM controller queue bypass. 1127.It Li memory-controller-hi-pri-bypass 1128Count memory controller high priority bypasses. 1129.It Li memory-controller-lo-pri-bypass 1130Count memory controller low priority bypasses. 1131.El 1132.Pp 1133.It Li k8-nb-memory-controller-dram-slots-missed 1134Count memory controller DRAM command slots missed (in MemClks). 1135.It Li k8-nb-memory-controller-page-access-event Op Li ,mask= Ns Ar qualifier 1136Count memory controller page access events. 1137This event may be further qualified using 1138.Ar qualifier , 1139which is a 1140.Ql + 1141separated set of the following keywords: 1142.Pp 1143.Bl -tag -width indent -compact 1144.It Li page-conflict 1145Count page conflicts. 1146.It Li page-hit 1147Count page hits. 1148.It Li page-miss 1149Count page misses. 1150.El 1151.Pp 1152The default is to count all types of events. 1153.It Li k8-nb-memory-controller-page-table-overflow 1154Count memory control page table overflow events. 1155.It Li k8-nb-probe-result Op Li ,mask= Ns Ar qualifier 1156Count probe events. 1157This event may be further qualified using 1158.Ar qualifier , 1159which is a 1160.Ql + 1161separated set of the following keywords: 1162.Pp 1163.Bl -tag -width indent -compact 1164.It Li probe-hit 1165Count all probe hits. 1166.It Li probe-hit-dirty-no-memory-cancel 1167Count probe hits without memory cancels. 1168.It Li probe-hit-dirty-with-memory-cancel 1169Count probe hits with memory cancels. 1170.It Li probe-miss 1171Count probe misses. 1172.El 1173.It Li k8-nb-sized-commands Op Li ,mask= Ns Ar qualifier 1174Count sized commands issued. 1175This event may be further qualified using 1176.Ar qualifier , 1177which is a 1178.Ql + 1179separated set of the following keywords: 1180.Pp 1181.Bl -tag -width indent -compact 1182.It Li nonpostwrszbyte 1183.It Li nonpostwrszdword 1184.It Li postwrszbyte 1185.It Li postwrszdword 1186.It Li rdszbyte 1187.It Li rdszdword 1188.It Li rdmodwr 1189.El 1190.Pp 1191The default is to count all types of commands. 1192.It Li k8-nb-memory-controller-turnaround Op Li ,mask= Ns Ar qualifier 1193Count memory control turnaround events. 1194This event may be further qualified using 1195.Ar qualifier , 1196which is a 1197.Ql + 1198separated set of the following keywords: 1199.Pp 1200.Bl -tag -width indent -compact 1201.\" XXX doc is unclear whether these are cycle counts or event counts 1202.It Li dimm-turnaround 1203Count DIMM turnarounds. 1204.It Li read-to-write-turnaround 1205Count read to write turnarounds. 1206.It Li write-to-read-turnaround 1207Count write to read turnarounds. 1208.El 1209.Pp 1210The default is to count all types of events. 1211.It Li k8-nb-ht-bus0-bandwidth Op Li ,mask= Ns Ar qualifier 1212.It Li k8-nb-ht-bus1-bandwidth Op Li ,mask= Ns Ar qualifier 1213.It Li k8-nb-ht-bus2-bandwidth Op Li ,mask= Ns Ar qualifier 1214Count events on the HyperTransport(tm) buses. 1215These events may be further qualified using 1216.Ar qualifier , 1217which is a 1218.Ql + 1219separated set of the following keywords: 1220.Pp 1221.Bl -tag -width indent -compact 1222.It Li buffer-release 1223Count buffer release messages sent. 1224.It Li command 1225Count command messages sent. 1226.It Li data 1227Count data messages sent. 1228.It Li nop 1229Count nop messages sent. 1230.El 1231.Pp 1232The default is to count all types of messages. 1233.El 1234.Ss Intel P6 PMCS 1235Intel P6 PMCs are present in Intel 1236.Tn "Pentium Pro" , 1237.Tn "Pentium II" , 1238.Tn Celeron , 1239.Tn "Pentium III" 1240and 1241.Tn "Pentium M" 1242processors. 1243.Pp 1244These CPUs have two counters. 1245Some events may only be used on specific counters and some events are 1246defined only on specific processor models. 1247.Pp 1248These PMCs are documented in 1249.Rs 1250.%B "IA-32 Intel(R) Architecture Software Developer's Manual" 1251.%T "Volume 3: System Programming Guide" 1252.%N "Order Number 245472-012" 1253.%D 2003 1254.%Q "Intel Corporation" 1255.Re 1256.Pp 1257Some of these events are affected by processor errata described in 1258.Rs 1259.%B "Intel(R) Pentium(R) III Processor Specification Update" 1260.%N "Document Number: 244453-054" 1261.%D "April 2005" 1262.%Q "Intel Corporation" 1263.Re 1264.Pp 1265Event specifiers for Intel P6 PMCs can have the following common 1266qualifiers: 1267.Bl -tag -width indent 1268.It Li cmask= Ns Ar value 1269Configure the PMC to increment only if the number of configured 1270events measured in a cycle is greater than or equal to 1271.Ar value . 1272.It Li edge 1273Configure the PMC to count the number of deasserted to asserted 1274transitions of the conditions expressed by the other qualifiers. 1275If specified, the counter will increment only once whenever a 1276condition becomes true, irrespective of the number of clocks during 1277which the condition remains true. 1278.It Li inv 1279Invert the sense of comparision when the 1280.Dq Li cmask 1281qualifier is present, making the counter increment when the number of 1282events per cycle is less than the value specified by the 1283.Dq Li cmask 1284qualifier. 1285.It Li os 1286Configure the PMC to count events happening at processor privilege 1287level 0. 1288.It Li umask= Ns Ar value 1289This qualifier is used to further qualify the event selected (see 1290below). 1291.It Li usr 1292Configure the PMC to count events occurring at privilege levels 1, 2 1293or 3. 1294.El 1295.Pp 1296If neither of the 1297.Dq Li os 1298or 1299.Dq Li usr 1300qualifiers are specified, the default is to enable both. 1301.Pp 1302The event specifiers supported by Intel P6 PMCs are: 1303.Bl -tag -width indent 1304.It Li p6-baclears 1305Count the number of times a static branch prediction was made by the 1306branch decoder because the BTB did not have a prediction. 1307.It Li p6-br-bac-missp-exec 1308.Pq Tn "Pentium M" 1309Count the number of branch instructions executed that where 1310mispredicted at the Front End (BAC). 1311.It Li p6-br-bogus 1312Count the number of bogus branches. 1313.It Li p6-br-call-exec 1314.Pq Tn "Pentium M" 1315Count the number of call instructions executed. 1316.It Li p6-br-call-missp-exec 1317.Pq Tn "Pentium M" 1318Count the number of call instructions executed that were mispredicted. 1319.It Li p6-br-cnd-exec 1320.Pq Tn "Pentium M" 1321Count the number of conditional branch instructions executed. 1322.It Li p6-br-cnd-missp-exec 1323.Pq Tn "Pentium M" 1324Count the number of conditional branch instructions executed that were 1325mispredicted. 1326.It Li p6-br-ind-call-exec 1327.Pq Tn "Pentium M" 1328Count the number of indirect call instructions executed. 1329.It Li p6-br-ind-exec 1330.Pq Tn "Pentium M" 1331Count the number of indirect branch instructions executed. 1332.It Li p6-br-ind-missp-exec 1333.Pq Tn "Pentium M" 1334Count the number of indirect branch instructions executed that were 1335mispredicted. 1336.It Li p6-br-inst-decoded 1337Count the number of branch instructions decoded. 1338.It Li p6-br-inst-exec 1339.Pq Tn "Pentium M" 1340Count the number of branch instructions executed but necessarily retired. 1341.It Li p6-br-inst-retired 1342Count the number of branch instructions retired. 1343.It Li p6-br-miss-pred-retired 1344Count the number of mispredicted branch instructions retired. 1345.It Li p6-br-miss-pred-taken-ret 1346Count the number of taken mispredicted branches retired. 1347.It Li p6-br-missp-exec 1348.Pq Tn "Pentium M" 1349Count the number of branch instructions executed that were 1350mispredicted at execution. 1351.It Li p6-br-ret-bac-missp-exec 1352.Pq Tn "Pentium M" 1353Count the number of return instructions executed that were 1354mispredicted at the Front End (BAC). 1355.It Li p6-br-ret-exec 1356.Pq Tn "Pentium M" 1357Count the number of return instructions executed. 1358.It Li p6-br-ret-missp-exec 1359.Pq Tn "Pentium M" 1360Count the number of return instructions executed that were 1361mispredicted at execution. 1362.It Li p6-br-taken-retired 1363Count the number of taken branches retired. 1364.It Li p6-btb-misses 1365Count the number of branches for which the BTB did not produce a 1366prediction. 1367.It Li p6-bus-bnr-drv 1368Count the number of bus clock cycles during which this processor is 1369driving the BNR# pin. 1370.It Li p6-bus-data-rcv 1371Count the number of bus clock cycles during which this processor is 1372receiving data. 1373.It Li p6-bus-drdy-clocks Op Li ,umask= Ns Ar qualifier 1374Count the number of clocks during which DRDY# is asserted. 1375An additional qualifier may be specified, and comprises one of the 1376following keywords: 1377.Pp 1378.Bl -tag -width indent -compact 1379.It Li any 1380Count transactions generated by any agent on the bus. 1381.It Li self 1382Count transactions generated by this processor. 1383.El 1384.Pp 1385The default is to count operations generated by this processor. 1386.It Li p6-bus-hit-drv 1387Count the number of bus clock cycles during which this processor is 1388driving the HIT# pin. 1389.It Li p6-bus-hitm-drv 1390Count the number of bus clock cycles during which this processor is 1391driving the HITM# pin. 1392.It Li p6-bus-lock-clocks Op Li ,umask= Ns Ar qualifier 1393Count the number of clocks during with LOCK# is asserted on the 1394external system bus. 1395An additional qualifier may be specified and comprises one of the following 1396keywords: 1397.Pp 1398.Bl -tag -width indent -compact 1399.It Li any 1400Count transactions generated by any agent on the bus. 1401.It Li self 1402Count transactions generated by this processor. 1403.El 1404.Pp 1405The default is to count operations generated by this processor. 1406.It Li p6-bus-req-outstanding 1407Count the number of bus requests outstanding in any given cycle. 1408.It Li p6-bus-snoop-stall 1409Count the number of clock cycles during which the bus is snoop stalled. 1410.It Li p6-bus-tran-any Op Li ,umask= Ns Ar qualifier 1411Count the number of completed bus transactions of any kind. 1412An additional qualifier may be specified and comprises one of the following 1413keywords: 1414.Pp 1415.Bl -tag -width indent -compact 1416.It Li any 1417Count transactions generated by any agent on the bus. 1418.It Li self 1419Count transactions generated by this processor. 1420.El 1421.Pp 1422The default is to count operations generated by this processor. 1423.It Li p6-bus-tran-brd Op Li ,umask= Ns Ar qualifier 1424Count the number of burst read transactions. 1425An additional qualifier may be specified and comprises one of the following 1426keywords: 1427.Pp 1428.Bl -tag -width indent -compact 1429.It Li any 1430Count transactions generated by any agent on the bus. 1431.It Li self 1432Count transactions generated by this processor. 1433.El 1434.Pp 1435The default is to count operations generated by this processor. 1436.It Li p6-bus-tran-burst Op Li ,umask= Ns Ar qualifier 1437Count the number of completed burst transactions. 1438An additional qualifier may be specified and comprises one of the following 1439keywords: 1440.Pp 1441.Bl -tag -width indent -compact 1442.It Li any 1443Count transactions generated by any agent on the bus. 1444.It Li self 1445Count transactions generated by this processor. 1446.El 1447.Pp 1448The default is to count operations generated by this processor. 1449.It Li p6-bus-tran-def Op Li ,umask= Ns Ar qualifier 1450Count the number of completed deferred transactions. 1451An additional qualifier may be specified and comprises one of the following 1452keywords: 1453.Pp 1454.Bl -tag -width indent -compact 1455.It Li any 1456Count transactions generated by any agent on the bus. 1457.It Li self 1458Count transactions generated by this processor. 1459.El 1460.Pp 1461The default is to count operations generated by this processor. 1462.It Li p6-bus-tran-ifetch Op Li ,umask= Ns Ar qualifier 1463Count the number of completed instruction fetch transactions. 1464An additional qualifier may be specified and comprises one of the following 1465keywords: 1466.Pp 1467.Bl -tag -width indent -compact 1468.It Li any 1469Count transactions generated by any agent on the bus. 1470.It Li self 1471Count transactions generated by this processor. 1472.El 1473.Pp 1474The default is to count operations generated by this processor. 1475.It Li p6-bus-tran-inval Op Li ,umask= Ns Ar qualifier 1476Count the number of completed invalidate transactions. 1477An additional qualifier may be specified and comprises one of the following 1478keywords: 1479.Pp 1480.Bl -tag -width indent -compact 1481.It Li any 1482Count transactions generated by any agent on the bus. 1483.It Li self 1484Count transactions generated by this processor. 1485.El 1486.Pp 1487The default is to count operations generated by this processor. 1488.It Li p6-bus-tran-mem Op Li ,umask= Ns Ar qualifier 1489Count the number of completed memory transactions. 1490An additional qualifier may be specified and comprises one of the following 1491keywords: 1492.Pp 1493.Bl -tag -width indent -compact 1494.It Li any 1495Count transactions generated by any agent on the bus. 1496.It Li self 1497Count transactions generated by this processor. 1498.El 1499.Pp 1500The default is to count operations generated by this processor. 1501.It Li p6-bus-tran-pwr Op Li ,umask= Ns Ar qualifier 1502Count the number of completed partial write transactions. 1503An additional qualifier may be specified and comprises one of the following 1504keywords: 1505.Pp 1506.Bl -tag -width indent -compact 1507.It Li any 1508Count transactions generated by any agent on the bus. 1509.It Li self 1510Count transactions generated by this processor. 1511.El 1512.Pp 1513The default is to count operations generated by this processor. 1514.It Li p6-bus-tran-rfo Op Li ,umask= Ns Ar qualifier 1515Count the number of completed read-for-ownership transactions. 1516An additional qualifier may be specified and comprises one of the following 1517keywords: 1518.Pp 1519.Bl -tag -width indent -compact 1520.It Li any 1521Count transactions generated by any agent on the bus. 1522.It Li self 1523Count transactions generated by this processor. 1524.El 1525.Pp 1526The default is to count operations generated by this processor. 1527.It Li p6-bus-trans-io Op Li ,umask= Ns Ar qualifier 1528Count the number of completed I/O transactions. 1529An additional qualifier may be specified and comprises one of the following 1530keywords: 1531.Pp 1532.Bl -tag -width indent -compact 1533.It Li any 1534Count transactions generated by any agent on the bus. 1535.It Li self 1536Count transactions generated by this processor. 1537.El 1538.Pp 1539The default is to count operations generated by this processor. 1540.It Li p6-bus-trans-p Op Li ,umask= Ns Ar qualifier 1541Count the number of completed partial transactions. 1542An additional qualifier may be specified and comprises one of the following 1543keywords: 1544.Pp 1545.Bl -tag -width indent -compact 1546.It Li any 1547Count transactions generated by any agent on the bus. 1548.It Li self 1549Count transactions generated by this processor. 1550.El 1551.Pp 1552The default is to count operations generated by this processor. 1553.It Li p6-bus-trans-wb Op Li ,umask= Ns Ar qualifier 1554Count the number of completed write-back transactions. 1555An additional qualifier may be specified and comprises one of the following 1556keywords: 1557.Pp 1558.Bl -tag -width indent -compact 1559.It Li any 1560Count transactions generated by any agent on the bus. 1561.It Li self 1562Count transactions generated by this processor. 1563.El 1564.Pp 1565The default is to count operations generated by this processor. 1566.It Li p6-cpu-clk-unhalted 1567Count the number of cycles during with the processor was not halted. 1568.Pp 1569.Pq Tn "Pentium M" 1570Count the number of cycles during with the processor was not halted 1571and not in a thermal trip. 1572.It Li p6-cycles-div-busy 1573Count the number of cycles during which the divider is busy and cannot 1574accept new divides. 1575This event is only allocated on counter 0. 1576.It Li p6-cycles-in-pending-and-masked 1577Count the number of processor cycles for which interrupts were 1578disabled and interrupts were pending. 1579.It Li p6-cycles-int-masked 1580Count the number of processor cycles for which interrupts were 1581disabled. 1582.It Li p6-data-mem-refs 1583Count all loads and all stores using any memory type, including 1584internal retries. 1585Each part of a split store is counted separately. 1586.It Li p6-dcu-lines-in 1587Count the total lines allocated in the data cache unit. 1588.It Li p6-dcu-m-lines-in 1589Count the number of M state lines allocated in the data cache unit. 1590.It Li p6-dcu-m-lines-out 1591Count the number of M state lines evicted from the data cache unit. 1592.It Li p6-dcu-miss-outstanding 1593Count the weighted number of cycles while a data cache unit miss is 1594outstanding, incremented by the number of outstanding cache misses at 1595any time. 1596.It Li p6-div 1597Count the number of floating point multiplies. 1598This event is only allocated on counter 1. 1599.It Li p6-emon-esp-uops 1600.Pq Tn "Pentium M" 1601Count the total number of micro-ops. 1602.It Li p6-emon-est-trans Op Li ,umask= Ns Ar qualifier 1603.Pq Tn "Pentium M" 1604Count the number of 1605.Tn "Enhanced Intel SpeedStep" 1606transitions. 1607An additional qualifier may be specified, and can be one of the 1608following keywords: 1609.Pp 1610.Bl -tag -width indent -compact 1611.It Li all 1612Count all transitions. 1613.It Li freq 1614Count only frequency transitions. 1615.El 1616.Pp 1617The default is to count all transitions. 1618.It Li p6-emon-fused-uops-ret Op Li ,umask= Ns Ar qualifier 1619.Pq Tn "Pentium M" 1620Count the number of retired fused micro-ops. 1621An additional qualifier may be specified, and may be one of the 1622following keywords: 1623.Pp 1624.Bl -tag -width indent -compact 1625.It Li all 1626Count all fused micro-ops. 1627.It Li loadop 1628Count only load and op micro-ops. 1629.It Li stdsta 1630Count only STD/STA micro-ops. 1631.El 1632.Pp 1633The default is to count all fused micro-ops. 1634.It Li p6-emon-kni-comp-inst-ret 1635.Pq Tn "Pentium III" 1636Count the number of SSE computational instructions retired. 1637An additional qualifier may be specified, and comprises one of the 1638following keywords: 1639.Pp 1640.Bl -tag -width indent -compact 1641.It Li packed-and-scalar 1642Count packed and scalar operations. 1643.It Li scalar 1644Count scalar operations only. 1645.El 1646.Pp 1647The default is to count packed and scalar operations. 1648.It Li p6-emon-kni-inst-retired Op Li ,umask= Ns Ar qualifier 1649.Pq Tn "Pentium III" 1650Count the number of SSE instructions retired. 1651An additional qualifier may be specified, and comprises one of the 1652following keywords: 1653.Pp 1654.Bl -tag -width indent -compact 1655.It Li packed-and-scalar 1656Count packed and scalar operations. 1657.It Li scalar 1658Count scalar operations only. 1659.El 1660.Pp 1661The default is to count packed and scalar operations. 1662.It Li p6-emon-kni-pref-dispatched Op Li ,umask= Ns Ar qualifier 1663.Pq Tn "Pentium III" 1664Count the number of SSE prefetch or weakly ordered instructions 1665dispatched (including speculative prefetches). 1666An additional qualifier may be specified, and comprises one of the 1667following keywords: 1668.Pp 1669.Bl -tag -width indent -compact 1670.It Li nta 1671Count non-temporal prefetches. 1672.It Li t1 1673Count prefetches to L1. 1674.It Li t2 1675Count prefetches to L2. 1676.It Li wos 1677Count weakly ordered stores. 1678.El 1679.Pp 1680The default is to count non-temporal prefetches. 1681.It Li p6-emon-kni-pref-miss Op Li ,umask= Ns Ar qualifier 1682.Pq Tn "Pentium III" 1683Count the number of prefetch or weakly ordered instructions that miss 1684all caches. 1685An additional qualifier may be specified, and comprises one of the 1686following keywords: 1687.Pp 1688.Bl -tag -width indent -compact 1689.It Li nta 1690Count non-temporal prefetches. 1691.It Li t1 1692Count prefetches to L1. 1693.It Li t2 1694Count prefetches to L2. 1695.It Li wos 1696Count weakly ordered stores. 1697.El 1698.Pp 1699The default is to count non-temporal prefetches. 1700.It Li p6-emon-pref-rqsts-dn 1701.Pq Tn "Pentium M" 1702Count the number of downward prefetches issued. 1703.It Li p6-emon-pref-rqsts-up 1704.Pq Tn "Pentium M" 1705Count the number of upward prefetches issued. 1706.It Li p6-emon-simd-instr-retired 1707.Pq Tn "Pentium M" 1708Count the number of retired 1709.Tn MMX 1710instructions. 1711.It Li p6-emon-sse-sse2-comp-inst-retired Op Li ,umask= Ns Ar qualifier 1712.Pq Tn "Pentium M" 1713Count the number of computational SSE instructions retired. 1714An additional qualifier may be specified and can be one of the 1715following keywords: 1716.Pp 1717.Bl -tag -width indent -compact 1718.It Li sse-packed-single 1719Count SSE packed-single instructions. 1720.It Li sse-scalar-single 1721Count SSE scalar-single instructions. 1722.It Li sse2-packed-double 1723Count SSE2 packed-double instructions. 1724.It Li sse2-scalar-double 1725Count SSE2 scalar-double instructions. 1726.El 1727.Pp 1728The default is to count SSE packed-single instructions. 1729.It Li p6-emon-sse-sse2-inst-retired Op Li ,umask= Ns Ar qualifer 1730.Pp 1731.Pq Tn "Pentium M" 1732Count the number of SSE instructions retired. 1733An additional qualifier can be specified, and can be one of the 1734following keywords: 1735.Pp 1736.Bl -tag -width indent -compact 1737.It Li sse-packed-single 1738Count SSE packed-single instructions. 1739.It Li sse-packed-single-scalar-single 1740Count SSE packed-single and scalar-single instructions. 1741.It Li sse2-packed-double 1742Count SSE2 packed-double instructions. 1743.It Li sse2-scalar-double 1744Count SSE2 scalar-double instructions. 1745.El 1746.Pp 1747The default is to count SSE packed-single instructions. 1748.It Li p6-emon-synch-uops 1749.Pq Tn "Pentium M" 1750Count the number of sync micro-ops. 1751.It Li p6-emon-thermal-trip 1752.Pq Tn "Pentium M" 1753Count the duration or occurrences of thermal trips. 1754Use the 1755.Dq Li edge 1756qualifier to count occurrences of thermal trips. 1757.It Li p6-emon-unfusion 1758.Pq Tn "Pentium M" 1759Count the number of unfusion events in the reorder buffer. 1760.It Li p6-flops 1761Count the number of computational floating point operations retired. 1762This event is only allocated on counter 0. 1763.It Li p6-fp-assist 1764Count the number of floating point exceptions handled by microcode. 1765This event is only allocated on counter 1. 1766.It Li p6-fp-comps-ops-exe 1767Count the number of computation floating point operations executed. 1768This event is only allocated on counter 0. 1769.It Li p6-fp-mmx-trans Op Li ,umask= Ns Ar qualifier 1770.Pq Tn "Pentium II" , Tn "Pentium III" 1771Count the number of transitions between MMX and floating-point 1772instructions. 1773An additional qualifier may be specified, and comprises one of the 1774following keywords: 1775.Pp 1776.Bl -tag -width indent -compact 1777.It Li mmxtofp 1778Count transitions from MMX instructions to floating-point instructions. 1779.It Li fptommx 1780Count transitions from floating-point instructions to MMX instructions. 1781.El 1782.Pp 1783The default is to count MMX to floating-point transitions. 1784.It Li p6-hw-int-rx 1785Count the number of hardware interrupts received. 1786.It Li p6-ifu-fetch 1787Count the number of instruction fetches, both cacheable and non-cacheable. 1788.It Li p6-ifu-fetch-miss 1789Count the number of instruction fetch misses (i.e., those that produce 1790memory accesses). 1791.It Li p6-ifu-mem-stall 1792Count the number of cycles instruction fetch is stalled for any reason. 1793.It Li p6-ild-stall 1794Count the number of cycles the instruction length decoder is stalled. 1795.It Li p6-inst-decoded 1796Count the number of instructions decoded. 1797.It Li p6-inst-retired 1798Count the number of instructions retired. 1799.It Li p6-itlb-miss 1800Count the number of instruction TLB misses. 1801.It Li p6-l2-ads 1802Count the number of L2 address strobes. 1803.It Li p6-l2-dbus-busy 1804Count the number of cycles during which the L2 cache data bus was busy. 1805.It Li p6-l2-dbus-busy-rd 1806Count the number of cycles during which the L2 cache data bus was busy 1807transferring read data from L2 to the processor. 1808.It Li p6-l2-ifetch Op Li ,umask= Ns Ar qualifier 1809Count the number of L2 instruction fetches. 1810An additional qualifier may be specified and comprises a list of the following 1811keywords separated by 1812.Ql + 1813characters: 1814.Pp 1815.Bl -tag -width indent -compact 1816.It Li e 1817Count operations affecting E (exclusive) state lines. 1818.It Li i 1819Count operations affecting I (invalid) state lines. 1820.It Li m 1821Count operations affecting M (modified) state lines. 1822.It Li s 1823Count operations affecting S (shared) state lines. 1824.El 1825.Pp 1826The default is to count operations affecting all (MESI) state lines. 1827.It Li p6-l2-ld Op Li ,umask= Ns Ar qualifier 1828Count the number of L2 data loads. 1829An additional qualifier may be specified and comprises a list of the following 1830keywords separated by 1831.Ql + 1832characters: 1833.Pp 1834.Bl -tag -width indent -compact 1835.It Li both 1836.Pq Tn "Pentium M" 1837Count both hardware-prefetched lines and non-hardware-prefetched lines. 1838.It Li e 1839Count operations affecting E (exclusive) state lines. 1840.It Li hw 1841.Pq Tn "Pentium M" 1842Count hardware-prefetched lines only. 1843.It Li i 1844Count operations affecting I (invalid) state lines. 1845.It Li m 1846Count operations affecting M (modified) state lines. 1847.It Li nonhw 1848.Pq Tn "Pentium M" 1849Exclude hardware-prefetched lines. 1850.It Li s 1851Count operations affecting S (shared) state lines. 1852.El 1853.Pp 1854The default on processors other than 1855.Tn "Pentium M" 1856processors is to count operations affecting all (MESI) state lines. 1857The default on 1858.Tn "Pentium M" 1859processors is to count both hardware-prefetched and 1860non-hardware-prefetch operations on all (MESI) state lines. 1861.Pq Errata 1862This event is affected by processor errata E53. 1863.It Li p6-l2-lines-in Op Li ,umask= Ns Ar qualifier 1864Count the number of L2 lines allocated. 1865An additional qualifier may be specified and comprises a list of the following 1866keywords separated by 1867.Ql + 1868characters: 1869.Pp 1870.Bl -tag -width indent -compact 1871.It Li both 1872.Pq Tn "Pentium M" 1873Count both hardware-prefetched lines and non-hardware-prefetched lines. 1874.It Li e 1875Count operations affecting E (exclusive) state lines. 1876.It Li hw 1877.Pq Tn "Pentium M" 1878Count hardware-prefetched lines only. 1879.It Li i 1880Count operations affecting I (invalid) state lines. 1881.It Li m 1882Count operations affecting M (modified) state lines. 1883.It Li nonhw 1884.Pq Tn "Pentium M" 1885Exclude hardware-prefetched lines. 1886.It Li s 1887Count operations affecting S (shared) state lines. 1888.El 1889.Pp 1890The default on processors other than 1891.Tn "Pentium M" 1892processors is to count operations affecting all (MESI) state lines. 1893The default on 1894.Tn "Pentium M" 1895processors is to count both hardware-prefetched and 1896non-hardware-prefetch operations on all (MESI) state lines. 1897.Pq Errata 1898This event is affected by processor errata E45. 1899.It Li p6-l2-lines-out Op Li ,umask= Ns Ar qualifier 1900Count the number of L2 lines evicted. 1901An additional qualifier may be specified and comprises a list of the following 1902keywords separated by 1903.Ql + 1904characters: 1905.Pp 1906.Bl -tag -width indent -compact 1907.It Li both 1908.Pq Tn "Pentium M" 1909Count both hardware-prefetched lines and non-hardware-prefetched lines. 1910.It Li e 1911Count operations affecting E (exclusive) state lines. 1912.It Li hw 1913.Pq Tn "Pentium M" 1914Count hardware-prefetched lines only. 1915.It Li i 1916Count operations affecting I (invalid) state lines. 1917.It Li m 1918Count operations affecting M (modified) state lines. 1919.It Li nonhw 1920.Pq Tn "Pentium M" only 1921Exclude hardware-prefetched lines. 1922.It Li s 1923Count operations affecting S (shared) state lines. 1924.El 1925.Pp 1926The default on processors other than 1927.Tn "Pentium M" 1928processors is to count operations affecting all (MESI) state lines. 1929The default on 1930.Tn "Pentium M" 1931processors is to count both hardware-prefetched and 1932non-hardware-prefetch operations on all (MESI) state lines. 1933.Pq Errata 1934This event is affected by processor errata E45. 1935.It Li p6-l2-m-lines-inm 1936Count the number of modified lines allocated in L2 cache. 1937.It Li p6-l2-m-lines-outm Op Li ,umask= Ns Ar qualifier 1938Count the number of L2 M-state lines evicted. 1939.Pp 1940.Pq Tn "Pentium M" 1941On these processors an additional qualifier may be specified and 1942comprises a list of the following keywords separated by 1943.Ql + 1944characters: 1945.Pp 1946.Bl -tag -width indent -compact 1947.It Li both 1948Count both hardware-prefetched lines and non-hardware-prefetched lines. 1949.It Li hw 1950Count hardware-prefetched lines only. 1951.It Li nonhw 1952Exclude hardware-prefetched lines. 1953.El 1954.Pp 1955The default is to count both hardware-prefetched and 1956non-hardware-prefetch operations. 1957.Pq Errata 1958This event is affected by processor errata E53. 1959.It Li p6-l2-rqsts Op Li ,umask= Ns Ar qualifier 1960Count the total number of L2 requests. 1961An additional qualifier may be specified and comprises a list of the following 1962keywords separated by 1963.Ql + 1964characters: 1965.Pp 1966.Bl -tag -width indent -compact 1967.It Li e 1968Count operations affecting E (exclusive) state lines. 1969.It Li i 1970Count operations affecting I (invalid) state lines. 1971.It Li m 1972Count operations affecting M (modified) state lines. 1973.It Li s 1974Count operations affecting S (shared) state lines. 1975.El 1976.Pp 1977The default is to count operations affecting all (MESI) state lines. 1978.It Li p6-l2-st 1979Count the number of L2 data stores. 1980An additional qualifier may be specified and comprises a list of the following 1981keywords separated by 1982.Ql + 1983characters: 1984.Pp 1985.Bl -tag -width indent -compact 1986.It Li e 1987Count operations affecting E (exclusive) state lines. 1988.It Li i 1989Count operations affecting I (invalid) state lines. 1990.It Li m 1991Count operations affecting M (modified) state lines. 1992.It Li s 1993Count operations affecting S (shared) state lines. 1994.El 1995.Pp 1996The default is to count operations affecting all (MESI) state lines. 1997.It Li p6-ld-blocks 1998Count the number of load operations delayed due to store buffer blocks. 1999.It Li p6-misalign-mem-ref 2000Count the number of misaligned data memory references (crossing a 64 2001bit boundary). 2002.It Li p6-mmx-assist 2003.Pq Tn "Pentium II" , Tn "Pentium III" 2004Count the number of MMX assists executed. 2005.It Li p6-mmx-instr-exec 2006.Pq Tn Celeron , Tn "Pentium II" 2007Count the number of MMX instructions executed, except MOVQ and MOVD 2008stores from register to memory. 2009.It Li p6-mmx-instr-ret 2010.Pq Tn "Pentium II" 2011Count the number of MMX instructions retired. 2012.It Li p6-mmx-instr-type-exec Op Li ,umask= Ns Ar qualifier 2013.Pq Tn "Pentium II" , Tn "Pentium III" 2014Count the number of MMX instructions executed. 2015An additional qualifier may be specified and comprises a list of 2016the following keywords separated by 2017.Ql + 2018characters: 2019.Pp 2020.Bl -tag -width indent -compact 2021.It Li pack 2022Count MMX pack operation instructions. 2023.It Li packed-arithmetic 2024Count MMX packed arithmetic instructions. 2025.It Li packed-logical 2026Count MMX packed logical instructions. 2027.It Li packed-multiply 2028Count MMX packed multiply instructions. 2029.It Li packed-shift 2030Count MMX packed shift instructions. 2031.It Li unpack 2032Count MMX unpack operation instructions. 2033.El 2034.Pp 2035The default is to count all operations. 2036.It Li p6-mmx-sat-instr-exec 2037.Pq Tn "Pentium II" , Tn "Pentium III" 2038Count the number of MMX saturating instructions executed. 2039.It Li p6-mmx-uops-exec 2040.Pq Tn "Pentium II" , Tn "Pentium III" 2041Count the number of MMX micro-ops executed. 2042.It Li p6-mul 2043Count the number of floating point multiplies. 2044This event is only allocated on counter 1. 2045.It Li p6-partial-rat-stalls 2046Count the number of cycles or events for partial stalls. 2047.It Li p6-resource-stalls 2048Count the number of cycles there was a resource related stall of any kind. 2049.It Li p6-ret-seg-renames 2050.Pq Tn "Pentium II" , Tn "Pentium III" 2051Count the number of segment register rename events retired. 2052.It Li p6-sb-drains 2053Count the number of cycles the store buffer is draining. 2054.It Li p6-seg-reg-renames Op Li ,umask= Ns Ar qualifier 2055.Pq Tn "Pentium II" , Tn "Pentium III" 2056Count the number of segment register renames. 2057An additional qualifier may be specified, and comprises a list of the 2058following keywords separated by 2059.Ql + 2060characters: 2061.Pp 2062.Bl -tag -width indent -compact 2063.It Li ds 2064Count renames for segment register DS. 2065.It Li es 2066Count renames for segment register ES. 2067.It Li fs 2068Count renames for segment register FS. 2069.It Li gs 2070Count renames for segment register GS. 2071.El 2072.Pp 2073The default is to count operations affecting all segment registers. 2074.It Li p6-seg-rename-stalls 2075.Pq Tn "Pentium II" , Tn "Pentium III" 2076Count the number of segment register renaming stalls. 2077An additional qualifier may be specified, and comprises a list of the 2078following keywords separated by 2079.Ql + 2080characters: 2081.Pp 2082.Bl -tag -width indent -compact 2083.It Li ds 2084Count stalls for segment register DS. 2085.It Li es 2086Count stalls for segment register ES. 2087.It Li fs 2088Count stalls for segment register FS. 2089.It Li gs 2090Count stalls for segment register GS. 2091.El 2092.Pp 2093The default is to count operations affecting all the segment registers. 2094.It Li p6-segment-reg-loads 2095Count the number of segment register loads. 2096.It Li p6-uops-retired 2097Count the number of micro-ops retired. 2098.El 2099.Ss Intel P4 PMCS 2100Intel P4 PMCs are present in Intel 2101.Tn "Pentium 4" 2102and 2103.Tn Xeon 2104processors. 2105These PMCs are documented in 2106.Rs 2107.%B "IA-32 Intel(R) Architecture Software Developer's Manual" 2108.%T "Volume 3: System Programming Guide" 2109.%N "Order Number 245472-012" 2110.%D 2003 2111.%Q "Intel Corporation" 2112.Re 2113Further information about using these PMCs may be found in 2114.Rs 2115.%B "IA-32 Intel(R) Architecture Optimization Guide" 2116.%D 2003 2117.%N "Order Number 248966-009" 2118.%Q "Intel Corporation" 2119.Re 2120Some of these events are affected by processor errata described in 2121.Rs 2122.%B "Intel(R) Pentium(R) 4 Processor Specification Update" 2123.%N "Document Number: 249199-059" 2124.%D "April 2005" 2125.%Q "Intel Corporation" 2126.Re 2127.Pp 2128Event specifiers for Intel P4 PMCs can have the following common 2129qualifiers: 2130.Bl -tag -width indent 2131.It Li active= Ns Ar choice 2132(On P4 HTT CPUs) Filter event counting based on which logical 2133processors are active. 2134The allowed values of 2135.Ar choice 2136are: 2137.Pp 2138.Bl -tag -width indent -compact 2139.It Li any 2140Count when either logical processor is active. 2141.It Li both 2142Count when both logical processors are active. 2143.It Li none 2144Count only when neither logical processor is active. 2145.It Li single 2146Count only when one logical processor is active. 2147.El 2148.Pp 2149The default is 2150.Dq Li both . 2151.It Li cascade 2152Configure the PMC to cascade onto its partner. 2153See 2154.Sx "Cascading P4 PMCs" 2155below for more information. 2156.It Li edge 2157Configure the counter to count false to true transitions of the threshold 2158comparision output. 2159This qualifier only takes effect if a threshold qualifier has also been 2160specified. 2161.It Li complement 2162Configure the counter to increment only when the event count seen is 2163less than the threshold qualifier value specified. 2164.It Li mask= Ns Ar qualifier 2165Many event specifiers for Intel P4 PMCs need to be additionally 2166qualified using a mask qualifier. 2167The allowed syntax for these qualifiers is event specific and is 2168described along with the events. 2169.It Li os 2170Configure the PMC to count when the CPL of the processor is 0. 2171.It Li precise 2172Select precise event based sampling. 2173Precise sampling is supported by the hardware for a limited set of 2174events. 2175.It Li tag= Ns Ar value 2176Configure the PMC to tag the internal uop selected by the other 2177fields in this event specifier with value 2178.Ar value . 2179This feature is used when cascading PMCs. 2180.It Li threshold= Ns Ar value 2181Configure the PMC to increment only when the event counts seen are 2182greater than the specified threshold value 2183.Ar value . 2184.It Li usr 2185Configure the PMC to count when the CPL of the processor is 1, 2 or 3. 2186.El 2187.Pp 2188If neither of the 2189.Dq Li os 2190or 2191.Dq Li usr 2192qualifiers are specified, the default is to enable both. 2193.Pp 2194On Intel Pentium 4 processors with HTT, events are 2195divided into two classes: 2196.Pp 2197.Bl -tag -width indent -compact 2198.It "TS Events" 2199are those where hardware can differentiate between events 2200generated on one logical processor from those generated on the 2201other. 2202.It "TI Events" 2203are those where hardware cannot differentiate between events 2204generated by multiple logical processors in a package. 2205.El 2206.Pp 2207Only TS events are allowed for use with process-mode PMCs on 2208Pentium-4/HTT CPUs. 2209.Pp 2210The event specifiers supported by Intel P4 PMCs are: 2211.Pp 2212.Bl -tag -width indent 2213.It Li p4-128bit-mmx-uop Op Li ,mask= Ns Ar flags 2214.Pq "TI event" 2215Count integer SIMD SSE2 instructions that operate on 128 bit SIMD 2216operands. 2217Qualifier 2218.Ar flags 2219can take the following value (which is also the default): 2220.Pp 2221.Bl -tag -width indent -compact 2222.It Li all 2223Count all uops operating on 128 bit SIMD integer operands in memory or 2224XMM register. 2225.El 2226.Pp 2227If an instruction contains more than one 128 bit MMX uop, then each 2228uop will be counted. 2229.It Li p4-64bit-mmx-uop Op Li ,mask= Ns Ar flags 2230.Pq "TI event" 2231Count MMX instructions that operate on 64 bit SIMD operands. 2232Qualifier 2233.Ar flags 2234can take the following value (which is also the default): 2235.Pp 2236.Bl -tag -width indent -compact 2237.It Li all 2238Count all uops operating on 64 bit SIMD integer operands in memory or 2239in MMX registers. 2240.El 2241.Pp 2242If an instruction contains more than one 64 bit MMX uop, then each 2243uop will be counted. 2244.It Li p4-b2b-cycles 2245.Pq "TI event" 2246Count back-to-back bys cycles. 2247Further documentation for this event is unavailable. 2248.It Li p4-bnr 2249.Pq "TI event" 2250Count bus-not-ready conditions. 2251Further documentation for this event is unavailable. 2252.It Li p4-bpu-fetch-request Op Li ,mask= Ns Ar qualifier 2253.Pq "TS event" 2254Count instruction fetch requests qualified by additional 2255flags specified in 2256.Ar qualifier . 2257At this point only one flag is supported: 2258.Pp 2259.Bl -tag -width indent -compact 2260.It Li tcmiss 2261Count trace cache lookup misses. 2262.El 2263.Pp 2264The default qualifier is also 2265.Dq Li mask=tcmiss . 2266.It Li p4-branch-retired Op Li ,mask= Ns Ar flags 2267.Pq "TS event" 2268Counts retired branches. 2269Qualifier 2270.Ar flags 2271is a list of the following 2272.Ql + 2273separated strings: 2274.Pp 2275.Bl -tag -width indent -compact 2276.It Li mmnp 2277Count branches not-taken and predicted. 2278.It Li mmnm 2279Count branches not-taken and mis-predicted. 2280.It Li mmtp 2281Count branches taken and predicted. 2282.It Li mmtm 2283Count branches taken and mis-predicted. 2284.El 2285.Pp 2286The default qualifier counts all four kinds of branches. 2287.It Li p4-bsq-active-entries Op Li ,mask= Ns Ar qualifier 2288.Pq "TS event" 2289Count the number of entries (clipped at 15) currently active in the 2290BSQ. 2291Qualifier 2292.Ar qualifier 2293is a 2294.Ql + 2295separated set of the following flags: 2296.Pp 2297.Bl -tag -width indent -compact 2298.It Li req-type0 , Li req-type1 2299Forms a 2-bit number used to select the request type encoding: 2300.Pp 2301.Bl -tag -width indent -compact 2302.It Li 0 2303reads excluding read invalidate 2304.It Li 1 2305read invalidates 2306.It Li 2 2307writes other than writebacks 2308.It Li 3 2309writebacks 2310.El 2311.Pp 2312Bit 2313.Dq Li req-type1 2314is the MSB for this two bit number. 2315.It Li req-len0 , Li req-len1 2316Forms a two-bit number that specifies the request length encoding: 2317.Pp 2318.Bl -tag -width indent -compact 2319.It Li 0 23200 chunks 2321.It Li 1 23221 chunk 2323.It Li 3 23248 chunks 2325.El 2326.Pp 2327Bit 2328.Dq Li req-len1 2329is the MSB for this two bit number. 2330.It Li req-io-type 2331Count requests that are input or output requests. 2332.It Li req-lock-type 2333Count requests that lock the bus. 2334.It Li req-lock-cache 2335Count requests that lock the cache. 2336.It Li req-split-type 2337Count requests that is a bus 8-byte chunk that is split across an 23388-byte boundary. 2339.It Li req-dem-type 2340Count requests that are demand (not prefetches) if set. 2341Count requests that are prefetches if not set. 2342.It Li req-ord-type 2343Count requests that are ordered. 2344.It Li mem-type0 , Li mem-type1 , Li mem-type2 2345Forms a 3-bit number that specifies a memory type encoding: 2346.Pp 2347.Bl -tag -width indent -compact 2348.It Li 0 2349UC 2350.It Li 1 2351USWC 2352.It Li 4 2353WT 2354.It Li 5 2355WP 2356.It Li 6 2357WB 2358.El 2359.Pp 2360Bit 2361.Dq Li mem-type2 2362is the MSB of this 3-bit number. 2363.El 2364.Pp 2365The default qualifier has all the above bits set. 2366.Pp 2367Edge triggering using the 2368.Dq Li edge 2369qualifier should not be used with this event when counting cycles. 2370.It Li p4-bsq-allocation Op Li ,mask= Ns Ar qualifier 2371.Pq "TS event" 2372Count allocations in the bus sequence unit according to the flags 2373specified in 2374.Ar qualifier , 2375which is a 2376.Ql + 2377separated set of the following flags: 2378.Pp 2379.Bl -tag -width indent -compact 2380.It Li req-type0 , Li req-type1 2381Forms a 2-bit number used to select the request type encoding: 2382.Pp 2383.Bl -tag -width indent -compact 2384.It Li 0 2385reads excluding read invalidate 2386.It Li 1 2387read invalidates 2388.It Li 2 2389writes other than writebacks 2390.It Li 3 2391writebacks 2392.El 2393.Pp 2394Bit 2395.Dq Li req-type1 2396is the MSB for this two bit number. 2397.It Li req-len0 , Li req-len1 2398Forms a two-bit number that specifies the request length encoding: 2399.Pp 2400.Bl -tag -width indent -compact 2401.It Li 0 24020 chunks 2403.It Li 1 24041 chunk 2405.It Li 3 24068 chunks 2407.El 2408.Pp 2409Bit 2410.Dq Li req-len1 2411is the MSB for this two bit number. 2412.It Li req-io-type 2413Count requests that are input or output requests. 2414.It Li req-lock-type 2415Count requests that lock the bus. 2416.It Li req-lock-cache 2417Count requests that lock the cache. 2418.It Li req-split-type 2419Count requests that is a bus 8-byte chunk that is split across an 24208-byte boundary. 2421.It Li req-dem-type 2422Count requests that are demand (not prefetches) if set. 2423Count requests that are prefetches if not set. 2424.It Li req-ord-type 2425Count requests that are ordered. 2426.It Li mem-type0 , Li mem-type1 , Li mem-type2 2427Forms a 3-bit number that specifies a memory type encoding: 2428.Pp 2429.Bl -tag -width indent -compact 2430.It Li 0 2431UC 2432.It Li 1 2433USWC 2434.It Li 4 2435WT 2436.It Li 5 2437WP 2438.It Li 6 2439WB 2440.El 2441.Pp 2442Bit 2443.Dq Li mem-type2 2444is the MSB of this 3-bit number. 2445.El 2446.Pp 2447The default qualifier has all the above bits set. 2448.Pp 2449This event is usually used along with the 2450.Dq Li edge 2451qualifier to avoid multiple counting. 2452.It Li p4-bsq-cache-reference Op Li ,mask= Ns Ar qualifier 2453.Pq "TS event" 2454Count cache references as seen by the bus unit (2nd or 3rd level 2455cache references). 2456Qualifier 2457.Ar qualifier 2458is a 2459.Ql + 2460separated list of the following keywords: 2461.Pp 2462.Bl -tag -width indent -compact 2463.It Li rd-2ndl-hits 2464Count 2nd level cache hits in the shared state. 2465.It Li rd-2ndl-hite 2466Count 2nd level cache hits in the exclusive state. 2467.It Li rd-2ndl-hitm 2468Count 2nd level cache hits in the modified state. 2469.It Li rd-3rdl-hits 2470Count 3rd level cache hits in the shared state. 2471.It Li rd-3rdl-hite 2472Count 3rd level cache hits in the exclusive state. 2473.It Li rd-3rdl-hitm 2474Count 3rd level cache hits in the modified state. 2475.It Li rd-2ndl-miss 2476Count 2nd level cache misses. 2477.It Li rd-3rdl-miss 2478Count 3rd level cache misses. 2479.It Li wr-2ndl-miss 2480Count write-back lookups from the data access cache that miss the 2nd 2481level cache. 2482.El 2483.Pp 2484The default is to count all the above events. 2485.It Li p4-execution-event Op Li ,mask= Ns Ar flags 2486.Pq "TS event" 2487Count the retirement of tagged uops selected through the execution 2488tagging mechanism. 2489Qualifier 2490.Ar flags 2491can contain the following strings separated by 2492.Ql + 2493characters: 2494.Pp 2495.Bl -tag -width indent -compact 2496.It Li nbogus0 , Li nbogus1 , Li nbogus2 , Li nbogus3 2497The marked uops are not bogus. 2498.It Li bogus0 , Li bogus1 , Li bogus2 , Li bogus3 2499The marked uops are bogus. 2500.El 2501.Pp 2502This event requires additional (upstream) events to be allocated to 2503perform the desired uop tagging. 2504The default is to set all the above flags. 2505This event can be used for precise event based sampling. 2506.It Li p4-front-end-event Op Li ,mask= Ns Ar flags 2507.Pq "TS event" 2508Count the retirement of tagged uops selected through the front-end 2509tagging mechanism. 2510Qualifier 2511.Ar flags 2512can contain the following strings separated by 2513.Ql + 2514characters: 2515.Pp 2516.Bl -tag -width indent -compact 2517.It Li nbogus 2518The marked uops are not bogus. 2519.It Li bogus 2520The marked uops are bogus. 2521.El 2522.Pp 2523This event requires additional (upstream) events to be allocated to 2524perform the desired uop tagging. 2525The default is to select both kinds of events. 2526This event can be used for precise event based sampling. 2527.It Li p4-fsb-data-activity Op Li ,mask= Ns Ar flags 2528.Pq "TI event" 2529Count each DBSY or DRDY event selected by qualifier 2530.Ar flags . 2531Qualifier 2532.Ar flags 2533is a 2534.Ql + 2535separated set of the following flags: 2536.Pp 2537.Bl -tag -width indent -compact 2538.It Li drdy-drv 2539Count when this processor is driving data onto the bus. 2540.It Li drdy-own 2541Count when this processor is reading data from the bus. 2542.It Li drdy-other 2543Count when data is on the bus but not being sampled by this processor. 2544.It Li dbsy-drv 2545Count when this processor reserves the bus for use in the next cycle 2546in order to drive data. 2547.It Li dbsy-own 2548Count when some agent reserves the bus for use in the next bus cycle 2549to drive data that this processor will sample. 2550.It Li dbsy-other 2551Count when some agent reserves the bus for use in the next bus cycle 2552to drive data that this processor will not sample. 2553.El 2554.Pp 2555Flags 2556.Dq Li drdy-own 2557and 2558.Dq Li drdy-other 2559are mutually exclusive. 2560Flags 2561.Dq Li dbsy-own 2562and 2563.Dq Li dbsy-other 2564are mutually exclusive. 2565The default value for 2566.Ar qualifier 2567is 2568.Dq Li drdy-drv+drdy-own+dbsy-drv+dbsy-own . 2569.It Li p4-global-power-events Op Li ,mask= Ns Ar flags 2570.Pq "TS event" 2571Count cycles during which the processor is not stopped. 2572Qualifier 2573.Ar flags 2574can take the following value (which is also the default): 2575.Pp 2576.Bl -tag -width indent -compact 2577.It Li running 2578Count cycles when the processor is active. 2579.El 2580.Pp 2581.It Li p4-instr-retired Op Li ,mask= Ns Ar flags 2582.Pq "TS event" 2583Count instructions retired during a clock cycle. 2584Qualifer 2585.Ar flags 2586comprises of the following strings separated by 2587.Ql + 2588characters: 2589.Pp 2590.Bl -tag -width indent -compact 2591.It Li nbogusntag 2592Count non-bogus instructions that are not tagged. 2593.It Li nbogustag 2594Count non-bogus instructions that are tagged. 2595.It Li bogusntag 2596Count bogus instructions that are not tagged. 2597.It Li bogustag 2598Count bogus instructions that are tagged. 2599.El 2600.Pp 2601The default qualifier counts all the above kinds of instructions. 2602.It Li p4-ioq-active-entries Xo 2603.Op Li ,mask= Ns Ar qualifier 2604.Op Li ,busreqtype= Ns Ar req-type 2605.Xc 2606.Pq "TS event" 2607Count the number of entries (clipped at 15) in the IOQ that are 2608active. 2609The event masks are specified by qualifier 2610.Ar qualifier 2611and 2612.Ar req-type . 2613.Pp 2614Qualifier 2615.Ar qualifier 2616is a 2617.Ql + 2618separated set of the following flags: 2619.Pp 2620.Bl -tag -width indent -compact 2621.It Li all-read 2622Count read entries. 2623.It Li all-write 2624Count write entries. 2625.It Li mem-uc 2626Count entries accessing uncacheable memory. 2627.It Li mem-wc 2628Count entries accessing write-combining memory. 2629.It Li mem-wt 2630Count entries accessing write-through memory. 2631.It Li mem-wp 2632Count entries accessing write-protected memory 2633.It Li mem-wb 2634Count entries accessing write-back memory. 2635.It Li own 2636Count store requests driven by the processor (i.e., not by other 2637processors or by DMA). 2638.It Li other 2639Count store requests driven by other processors or by DMA. 2640.It Li prefetch 2641Include hardware and software prefetch requests in the count. 2642.El 2643.Pp 2644The default value for 2645.Ar qualifier 2646is to enable all the above flags. 2647.Pp 2648The 2649.Ar req-type 2650qualifier is a 5-bit number can be additionally used to select a 2651specific bus request type. 2652The default is 0. 2653.Pp 2654The 2655.Dq Li edge 2656qualifier should not be used when counting cycles with this event. 2657The exact behaviour of this event depends on the processor revision. 2658.It Li p4-ioq-allocation Xo 2659.Op Li ,mask= Ns Ar qualifier 2660.Op Li ,busreqtype= Ns Ar req-type 2661.Xc 2662.Pq "TS event" 2663Count various types of transactions on the bus matching the flags set 2664in 2665.Ar qualifier 2666and 2667.Ar req-type . 2668.Pp 2669Qualifier 2670.Ar qualifier 2671is a 2672.Ql + 2673separated set of the following flags: 2674.Pp 2675.Bl -tag -width indent -compact 2676.It Li all-read 2677Count read entries. 2678.It Li all-write 2679Count write entries. 2680.It Li mem-uc 2681Count entries accessing uncacheable memory. 2682.It Li mem-wc 2683Count entries accessing write-combining memory. 2684.It Li mem-wt 2685Count entries accessing write-through memory. 2686.It Li mem-wp 2687Count entries accessing write-protected memory 2688.It Li mem-wb 2689Count entries accessing write-back memory. 2690.It Li own 2691Count store requests driven by the processor (i.e., not by other 2692processors or by DMA). 2693.It Li other 2694Count store requests driven by other processors or by DMA. 2695.It Li prefetch 2696Include hardware and software prefetch requests in the count. 2697.El 2698.Pp 2699The default value for 2700.Ar qualifier 2701is to enable all the above flags. 2702.Pp 2703The 2704.Ar req-type 2705qualifier is a 5-bit number can be additionally used to select a 2706specific bus request type. 2707The default is 0. 2708.Pp 2709The 2710.Dq Li edge 2711qualifier is normally used with this event to prevent multiple 2712counting. 2713The exact behaviour of this event depends on the processor revision. 2714.It Li p4-itlb-reference Op mask= Ns Ar qualifier 2715.Pq "TS event" 2716Count translations using the intruction translation look-aside 2717buffer. 2718The 2719.Ar qualifier 2720argument is a list of the following strings separated by 2721.Ql + 2722characters. 2723.Pp 2724.Bl -tag -width indent -compact 2725.It Li hit 2726Count ITLB hits. 2727.It Li miss 2728Count ITLB misses. 2729.It Li hit-uc 2730Count uncacheable ITLB hits. 2731.El 2732.Pp 2733If no 2734.Ar qualifier 2735is specified the default is to count all the three kinds of ITLB 2736translations. 2737.It Li p4-load-port-replay Op Li ,mask= Ns Ar qualifier 2738.Pq "TS event" 2739Count replayed events at the load port. 2740Qualifier 2741.Ar qualifier 2742can take on one value: 2743.Pp 2744.Bl -tag -width indent -compact 2745.It Li split-ld 2746Count split loads. 2747.El 2748.Pp 2749The default value for 2750.Ar qualifier 2751is 2752.Dq Li split-ld . 2753.It Li p4-mispred-branch-retired Op Li ,mask= Ns Ar flags 2754.Pq "TS event" 2755Count mispredicted IA-32 branch instructions. 2756Qualifier 2757.Ar flags 2758can take the following value (which is also the default): 2759.Pp 2760.Bl -tag -width indent -compact 2761.It Li nbogus 2762Count non-bogus retired branch instructions. 2763.El 2764.It Li p4-machine-clear Op Li ,mask= Ns Ar flags 2765.Pq "TS event" 2766Count the number of pipeline clears seen by the processor. 2767Qualifer 2768.Ar flags 2769is a list of the following strings separated by 2770.Ql + 2771characters: 2772.Pp 2773.Bl -tag -width indent -compact 2774.It Li clear 2775Count for a portion of the many cycles when the machine is being 2776cleared for any reason. 2777.It Li moclear 2778Count machine clears due to memory ordering issues. 2779.It Li smclear 2780Count machine clears due to self-modifying code. 2781.El 2782.Pp 2783Use qualifier 2784.Dq Li edge 2785to get a count of occurrences of machine clears. 2786The default qualifier is 2787.Dq Li clear . 2788.It Li p4-memory-cancel Op Li ,mask= Ns Ar event-list 2789.Pq "TS event" 2790Count the cancelling of various kinds of requests in the data cache 2791address control unit of the CPU. 2792The qualifier 2793.Ar event-list 2794is a list of the following strings separated by 2795.Ql + 2796characters: 2797.Pp 2798.Bl -tag -width indent -compact 2799.It Li st-rb-full 2800Requests cancelled because no store request buffer was available. 2801.It Li 64k-conf 2802Requests that conflict due to 64K aliasing. 2803.El 2804.Pp 2805If 2806.Ar event-list 2807is not specified, then the default is to count both kinds of events. 2808.It Li p4-memory-complete Op Li ,mask= Ns Ar event-list 2809.Pq "TS event" 2810Count the completion of load split, store split, uncacheable split and 2811uncacheable load operations selected by qualifier 2812.Ar event-list . 2813The qualifier 2814.Ar event-list 2815is a 2816.Ql + 2817separated list of the following flags: 2818.Pp 2819.Bl -tag -width indent -compact 2820.It Li lsc 2821Count load splits completed, excluding loads from uncacheable or 2822write-combining areas. 2823.It Li ssc 2824Count any split stores completed. 2825.El 2826.Pp 2827The default is to count both kinds of operations. 2828.It Li p4-mob-load-replay Op Li ,mask= Ns Ar qualifier 2829.Pq "TS event" 2830Count load replays triggered by the memory order buffer. 2831Qualifier 2832.Ar qualifier 2833can be a 2834.Ql + 2835separated list of the following flags: 2836.Pp 2837.Bl -tag -width indent -compact 2838.It Li no-sta 2839Count replays because of unknown store addresses. 2840.It Li no-std 2841Count replays because of unknown store data. 2842.It Li partial-data 2843Count replays because of partially overlapped data accesses between 2844load and store operations. 2845.It Li unalgn-addr 2846Count replays because of mismatches in the lower 4 bits of load and 2847store operations. 2848.El 2849.Pp 2850The default qualifier is 2851.Ar no-sta+no-std+partial-data+unalgn-addr . 2852.It Li p4-packed-dp-uop Op Li ,mask= Ns Ar flags 2853.Pq "TI event" 2854Count packed double-precision uops. 2855Qualifier 2856.Ar flags 2857can take the following value (which is also the default): 2858.Pp 2859.Bl -tag -width indent -compact 2860.It Li all 2861Count all uops operating on packed double-precision operands. 2862.El 2863.It Li p4-packed-sp-uop Op Li ,mask= Ns Ar flags 2864.Pq "TI event" 2865Count packed single-precision uops. 2866Qualifier 2867.Ar flags 2868can take the following value (which is also the default): 2869.Pp 2870.Bl -tag -width indent -compact 2871.It Li all 2872Count all uops operating on packed single-precision operands. 2873.El 2874.It Li p4-page-walk-type Op Li ,mask= Ns Ar qualifier 2875.Pq "TI event" 2876Count page walks performed by the page miss handler. 2877Qualifier 2878.Ar qualifier 2879can be a 2880.Ql + 2881separated list of the following keywords: 2882.Pp 2883.Bl -tag -width indent -compact 2884.It Li dtmiss 2885Count page walks for data TLB misses. 2886.It Li itmiss 2887Count page walks for instruction TLB misses. 2888.El 2889.Pp 2890The default value for 2891.Ar qualifier 2892is 2893.Dq Li dtmiss+itmiss . 2894.It Li p4-replay-event Op Li ,mask= Ns Ar flags 2895.Pq "TS event" 2896Count the retirement of tagged uops selected through the replay 2897tagging mechanism. 2898Qualifier 2899.Ar flags 2900contains a 2901.Ql + 2902separated set of the following strings: 2903.Pp 2904.Bl -tag -width indent -compact 2905.It Li nbogus 2906The marked uops are not bogus. 2907.It Li bogus 2908The marked uops are bogus. 2909.El 2910.Pp 2911This event requires additional (upstream) events to be allocated to 2912perform the desired uop tagging. 2913The default qualifier counts both kinds of uops. 2914This event can be used for precise event based sampling. 2915.It Li p4-resource-stall Op Li ,mask= Ns Ar flags 2916.Pq "TS event" 2917Count the occurrence or latency of stalls in the allocator. 2918Qualifier 2919.Ar flags 2920can take the following value (which is also the default): 2921.Pp 2922.Bl -tag -width indent -compact 2923.It Li sbfull 2924A stall due to the lack of store buffers. 2925.El 2926.It Li p4-response 2927.Pq "TI event" 2928Count different types of responses. 2929Further documentation on this event is not available. 2930.It Li p4-retired-branch-type Op Li ,mask= Ns Ar flags 2931.Pq "TS event" 2932Count branches retired. 2933Qualifier 2934.Ar flags 2935contains a 2936.Ql + 2937separated list of strings: 2938.Pp 2939.Bl -tag -width indent -compact 2940.It Li conditional 2941Count conditional jumps. 2942.It Li call 2943Count direct and indirect call branches. 2944.It Li return 2945Count return branches. 2946.It Li indirect 2947Count returns, indirect calls or indirect jumps. 2948.El 2949.Pp 2950The default qualifier counts all the above branch types. 2951.It Li p4-retired-mispred-branch-type Op Li ,mask= Ns Ar flags 2952.Pq "TS event" 2953Count mispredicted branches retired. 2954Qualifier 2955.Ar flags 2956contains a 2957.Ql + 2958separated list of strings: 2959.Pp 2960.Bl -tag -width indent -compact 2961.It Li conditional 2962Count conditional jumps. 2963.It Li call 2964Count indirect call branches. 2965.It Li return 2966Count return branches. 2967.It Li indirect 2968Count returns, indirect calls or indirect jumps. 2969.El 2970.Pp 2971The default qualifier counts all the above branch types. 2972.It Li p4-scalar-dp-uop Op Li ,mask= Ns Ar flags 2973.Pq "TI event" 2974Count the number of scalar double-precision uops. 2975Qualifier 2976.Ar flags 2977can take the following value (which is also the default): 2978.Pp 2979.Bl -tag -width indent -compact 2980.It Li all 2981Count the number of scalar double-precision uops. 2982.El 2983.It Li p4-scalar-sp-uop Op Li ,mask= Ns Ar flags 2984.Pq "TI event" 2985Count the number of scalar single-precision uops. 2986Qualifier 2987.Ar flags 2988can take the following value (which is also the default): 2989.Pp 2990.Bl -tag -width indent -compact 2991.It Li all 2992Count all uops operating on scalar single-precision operands. 2993.El 2994.It Li p4-snoop 2995.Pq "TI event" 2996Count snoop traffic. 2997Further documentation on this event is not available. 2998.It Li p4-sse-input-assist Op Li ,mask= Ns Ar flags 2999.Pq "TI event" 3000Count the number of times an assist is required to handle problems 3001with the operands for SSE and SSE2 operations. 3002Qualifier 3003.Ar flags 3004can take the following value (which is also the default): 3005.Pp 3006.Bl -tag -width indent -compact 3007.It Li all 3008Count assists for all SSE and SSE2 uops. 3009.El 3010.It Li p4-store-port-replay Op Li ,mask= Ns Ar qualifier 3011.Pq "TS event" 3012Count events replayed at the store port. 3013Qualifier 3014.Ar qualifier 3015can take on one value: 3016.Pp 3017.Bl -tag -width indent -compact 3018.It Li split-st 3019Count split stores. 3020.El 3021.Pp 3022The default value for 3023.Ar qualifier 3024is 3025.Dq Li split-st . 3026.It Li p4-tc-deliver-mode Op Li ,mask= Ns Ar qualifier 3027.Pq "TI event" 3028Count the duration in cycles of operating modes of the trace cache and 3029decode engine. 3030The desired operating mode is selected by 3031.Ar qualifier , 3032which is a list of the following strings separated by 3033.Ql + 3034characters: 3035.Pp 3036.Bl -tag -width indent -compact 3037.It Li DD 3038Both logical processors are in deliver mode. 3039.It Li DB 3040Logical processor 0 is in deliver mode while logical processor 1 is in 3041build mode. 3042.It Li DI 3043Logical processor 0 is in deliver mode while logical processor 1 is 3044halted, or in machine clear, or transitioning to a long microcode 3045flow. 3046.It Li BD 3047Logical processor 0 is in build mode while logical processor 1 is in 3048deliver mode. 3049.It Li BB 3050Both logical processors are in build mode. 3051.It Li BI 3052Logical processor 0 is in build mode while logical processor 1 is 3053halted, or in machine clear or transitioning to a long microcode 3054flow. 3055.It Li ID 3056Logical processor 0 is halted, or in machine clear or transitioning to 3057a long microcode flow while logical processor 1 is in deliver mode. 3058.It Li IB 3059Logical processor 0 is halted, or in machine clear or transitioning to 3060a long microcode flow while logical processor 1 is in build mode. 3061.El 3062.Pp 3063If there is only one logical processor in the processor package then 3064the qualifier for logical processor 1 is ignored. 3065If no qualifier is specified, the default qualifier is 3066.Dq Li DD+DB+DI+BD+BB+BI+ID+IB . 3067.It Li p4-tc-ms-xfer Op Li ,mask= Ns Ar flags 3068.Pq "TI event" 3069Count the number of times uop delivery changed from the trace cache to 3070MS ROM. 3071Qualifier 3072.Ar flags 3073can take the following value (which is also the default): 3074.Pp 3075.Bl -tag -width indent -compact 3076.It Li cisc 3077Count TC to MS transfers. 3078.El 3079.It Li p4-uop-queue-writes Op Li ,mask= Ns Ar flags 3080.Pq "TS event" 3081Count the number of valid uops written to the uop queue. 3082Qualifier 3083.Ar flags 3084is a list of the following strings, separated by 3085.Ql + 3086characters: 3087.Pp 3088.Bl -tag -width indent -compact 3089.It Li from-tc-build 3090Count uops being written from the trace cache in build mode. 3091.It Li from-tc-deliver 3092Count uops being written from the trace cache in deliver mode. 3093.It Li from-rom 3094Count uops being written from microcode ROM. 3095.El 3096.Pp 3097The default qualifier counts all the above kinds of uops. 3098.It Li p4-uop-type Op Li ,mask= Ns Ar flags 3099.Pq "TS event" 3100This event is used in conjunction with the front-end at-retirement 3101mechanism to tag load and store uops. 3102Qualifer 3103.Ar flags 3104comprises the following strings separated by 3105.Ql + 3106characters: 3107.Pp 3108.Bl -tag -width indent -compact 3109.It Li tagloads 3110Mark uops that are load operations. 3111.It Li tagstores 3112Mark uops that are store operations. 3113.El 3114.Pp 3115The default qualifier counts both kinds of uops. 3116.It Li p4-uops-retired Op Li ,mask= Ns Ar flags 3117.Pq "TS event" 3118Count uops retired during a clock cycle. 3119Qualifier 3120.Ar flags 3121comprises the following strings separated by 3122.Ql + 3123characters: 3124.Pp 3125.Bl -tag -width indent -compact 3126.It Li nbogus 3127Count marked uops that are not bogus. 3128.It Li bogus 3129Count marked uops that are bogus. 3130.El 3131.Pp 3132The default qualifier counts both kinds of uops. 3133.It Li p4-wc-buffer Op Li ,mask= Ns Ar flags 3134.Pq "TI event" 3135Count write-combining buffer operations. 3136Qualifier 3137.Ar flags 3138contains the following strings separated by 3139.Ql + 3140characters: 3141.Pp 3142.Bl -tag -width indent -compact 3143.It Li wcb-evicts 3144WC buffer evictions due to any cause. 3145.It Li wcb-full-evict 3146WC buffer evictions due to no WC buffer being available. 3147.El 3148.Pp 3149The default qualifer counts both kinds of evictions. 3150.It Li p4-x87-assist Op Li ,mask= Ns Ar flags 3151.Pq "TS event" 3152Count the retirement of x87 instructions that required special 3153handling. 3154Qualifier 3155.Ar flags 3156contains the following strings separated by 3157.Ql + 3158characters: 3159.Pp 3160.Bl -tag -width indent -compact 3161.It Li fpsu 3162Count instructions that saw an FP stack underflow. 3163.It Li fpso 3164Count instructions that saw an FP stack overflow. 3165.It Li poao 3166Count instructions that saw an x87 output overflow. 3167.It Li poau 3168Count instructions that saw an x87 output underflow. 3169.It Li prea 3170Count instructions that needed an x87 input assist. 3171.El 3172.Pp 3173The default qualifier counts all the above types of instruction 3174retirements. 3175.It Li p4-x87-fp-uop Op Li ,mask= Ns Ar flags 3176.Pq "TI event" 3177Count x87 floating-point uops. 3178Qualifier 3179.Ar flags 3180can take the following value (which is also the default): 3181.Pp 3182.Bl -tag -width indent -compact 3183.It Li all 3184Count all x87 floating-point uops. 3185.El 3186.Pp 3187If an instruction contains more than one x87 floating-point uops, then 3188all x87 floating-point uops will be counted. 3189This event does not count x87 floating-point data movement operations. 3190.It Li p4-x87-simd-moves-uop Op Li ,mask= Ns Ar flags 3191.Pq "TI event" 3192Count each x87 FPU, MMX, SSE, or SSE2 uops that load data or store 3193data or perform register-to-register moves. 3194This event does not count integer move uops. 3195Qualifier 3196.Ar flags 3197may contain the following keywords separated by 3198.Ql + 3199characters: 3200.Pp 3201.Bl -tag -width indent -compact 3202.It Li allp0 3203Count all x87 and SIMD store and move uops. 3204.It Li allp2 3205Count all x87 and SIMD load uops. 3206.El 3207.Pp 3208The default is to count all uops. 3209.Pq Errata 3210This event may be affected by processor errata N43. 3211.El 3212.Ss "Cascading P4 PMCs" 3213PMC cascading support is currently poorly implemented. 3214While individual event counters may be allocated with a 3215.Dq Li cascade 3216qualifier, the current API does not offer the ability 3217to name and allocate all the resources needed for a 3218cascaded event counter pair in a single operation. 3219.Ss "Precise Event Based Sampling" 3220Support for precise event based sampling is currently 3221unimplemented in 3222.Xr hwpmc 4 . 3223.Sh IMPLEMENTATION NOTES 3224On the i386 architecture, 3225.Fx 3226has historically allowed the use of the RDTSC instruction from 3227user-mode (i.e., at a processor CPL of 3) by any process. 3228This behaviour is preserved by 3229.Xr hwpmc 4 . 3230.Sh RETURN VALUES 3231The 3232.Fn pmc_name_of_capability , 3233.Fn pmc_name_of_class , 3234.Fn pmc_name_of_cputype , 3235.Fn pmc_name_of_disposition , 3236.Fn pmc_name_of_event , 3237.Fn pmc_name_of_mode , 3238and 3239.Fn pmc_name_of_state 3240functions return a pointer to the human readable form of their argument. 3241These pointers may point to statically allocated storage and must 3242not be passed to 3243.Fn free . 3244In case of an error, these functions return 3245.Dv NULL 3246and set the global variable 3247.Va errno . 3248.Pp 3249The functions 3250.Fn pmc_ncpu 3251and 3252.Fn pmc_npmc 3253return the number of CPUs and number of PMCs configured respectively; 3254in case of an error they return the value 3255\-1 3256and set the global variable 3257.Va errno . 3258.Pp 3259All other functions return the value 32600 3261if successful; otherwise the value 3262\-1 3263is returned and the global variable 3264.Va errno 3265is set to indicate the error. 3266.Sh COMPATIBILITY 3267The interface between the 3268.Nm pmc 3269library and the 3270.Xr hwpmc 4 3271driver is intended to be private to the implementation and may 3272change. 3273In order to ease forward compatibility with future versions of the 3274.Xr hwpmc 4 3275driver, applications are urged to dynamically link with the 3276.Nm pmc 3277library. 3278.Pp 3279The 3280.Nm pmc 3281API is 3282.Ud 3283.Sh ERRORS 3284A call to 3285.Fn pmc_init 3286may fail with the following errors in addition to those returned by 3287.Xr modfind 2 , 3288.Xr modstat 2 3289and 3290.Xr hwpmc 4 : 3291.Bl -tag -width Er 3292.It Bq Er ENXIO 3293An unknown CPU type was encountered during initialization. 3294.It Bq Er EPROGMISMATCH 3295The version number of the 3296.Xr hwpmc 4 3297kernel module did not match that compiled into the 3298.Nm pmc 3299library. 3300.El 3301.Pp 3302A call to 3303.Fn pmc_capabilities , 3304.Fn pmc_name_of_capability , 3305.Fn pmc_name_of_disposition , 3306.Fn pmc_name_of_state , 3307.Fn pmc_name_of_event , 3308.Fn pmc_name_of_mode 3309.Fn pmc_name_of_class 3310and 3311.Fn pmc_width 3312may fail with the following error: 3313.Bl -tag -width Er 3314.It Bq Er EINVAL 3315An invalid argument was passed to the function. 3316.El 3317.Pp 3318A call to 3319.Fn pmc_cpuinfo 3320or 3321.Fn pmc_ncpu 3322may fail with the following error: 3323.Bl -tag -width Er 3324.It Bq Er ENXIO 3325The 3326.Nm pmc 3327has not been initialized. 3328.El 3329.Pp 3330A call to 3331.Fn pmc_npmc 3332may fail with the following errors: 3333.Bl -tag -width Er 3334.It Bq Er EINVAL 3335The argument passed in was out of range. 3336.It Bq Er ENXIO 3337The 3338.Nm pmc 3339library has not been initialized. 3340.El 3341.Pp 3342A call to 3343.Fn pmc_pmcinfo 3344may fail with the following errors, in addition to those returned by 3345.Xr hwpmc 4 : 3346.Bl -tag -width Er 3347.It Bq Er ENXIO 3348The 3349.Nm pmc 3350library is not yet initialized. 3351.El 3352.Pp 3353A call to 3354.Fn pmc_allocate 3355may fail with the following errors, in addition to those returned by 3356.Xr hwpmc 4 : 3357.Bl -tag -width Er 3358.It Bq Er EINVAL 3359The 3360.Fa mode 3361argument passed in had an illegal value, or the event specification 3362.Fa ctrspec 3363was unrecognized for this CPU type. 3364.El 3365.Pp 3366Calls to 3367.Fn pmc_attach , 3368.Fn pmc_configure_logfile , 3369.Fn pmc_detach , 3370.Fn pmc_disable , 3371.Fn pmc_enable , 3372.Fn pmc_get_driver_stats , 3373.Fn pmc_get_msr , 3374.Fn pmc_read , 3375.Fn pmc_release , 3376.Fn pmc_rw , 3377.Fn pmc_set , 3378.Fn pmc_start , 3379.Fn pmc_stop , 3380.Fn pmc_write , 3381and 3382.Fn pmc_writelog 3383may fail with the errors described in 3384.Xr hwpmc 4 . 3385.Pp 3386If a log file was configured using 3387.Fn pmc_configure_logfile 3388and the 3389.Xr hwpmc 4 3390driver encountered an error while logging data to it, then 3391logging will be stopped and a subsequent call to 3392.Fn pmc_flush_logfile 3393will fail with the error code seen by the 3394.Xr hwpmc 4 3395driver. 3396.Sh SEE ALSO 3397.Xr modfind 2 , 3398.Xr modstat 2 , 3399.Xr calloc 3 , 3400.Xr pmclog 3 , 3401.Xr hwpmc 4 , 3402.Xr pmccontrol 8 , 3403.Xr pmcstat 8 3404.Sh HISTORY 3405The 3406.Nm pmc 3407library first appeared in 3408.Fx 6.0 . 3409.Sh BUGS 3410The information returned by 3411.Fn pmc_cpuinfo , 3412.Fn pmc_ncpu 3413and possibly 3414.Fn pmc_npmc 3415should really be available all the time, through a better designed 3416interface and not just when 3417.Xr hwpmc 4 3418is present in the kernel. 3419