1de6cc651SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * Parisc performance counters 41da177e4SLinus Torvalds * Copyright (C) 2001 Randolph Chung <tausq@debian.org> 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This code is derived, with permission, from HP/UX sources. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds /* 101da177e4SLinus Torvalds * Edited comment from original sources: 111da177e4SLinus Torvalds * 121da177e4SLinus Torvalds * This driver programs the PCX-U/PCX-W performance counters 131da177e4SLinus Torvalds * on the PA-RISC 2.0 chips. The driver keeps all images now 1406fe9fb4SDirk Hohndel * internally to the kernel to hopefully eliminate the possibility 151da177e4SLinus Torvalds * of a bad image halting the CPU. Also, there are different 161da177e4SLinus Torvalds * images for the PCX-W and later chips vs the PCX-U chips. 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * Only 1 process is allowed to access the driver at any time, 191da177e4SLinus Torvalds * so the only protection that is needed is at open and close. 201da177e4SLinus Torvalds * A variable "perf_enabled" is used to hold the state of the 211da177e4SLinus Torvalds * driver. The spinlock "perf_lock" is used to protect the 221da177e4SLinus Torvalds * modification of the state during open/close operations so 231da177e4SLinus Torvalds * multiple processes don't get into the driver simultaneously. 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * This driver accesses the processor directly vs going through 261da177e4SLinus Torvalds * the PDC INTRIGUE calls. This is done to eliminate bugs introduced 271da177e4SLinus Torvalds * in various PDC revisions. The code is much more maintainable 281da177e4SLinus Torvalds * and reliable this way vs having to debug on every version of PDC 291da177e4SLinus Torvalds * on every box. 301da177e4SLinus Torvalds */ 311da177e4SLinus Torvalds 32a9415644SRandy Dunlap #include <linux/capability.h> 331da177e4SLinus Torvalds #include <linux/init.h> 341da177e4SLinus Torvalds #include <linux/proc_fs.h> 351da177e4SLinus Torvalds #include <linux/miscdevice.h> 361da177e4SLinus Torvalds #include <linux/spinlock.h> 371da177e4SLinus Torvalds 387c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 391da177e4SLinus Torvalds #include <asm/perf.h> 401da177e4SLinus Torvalds #include <asm/parisc-device.h> 411da177e4SLinus Torvalds #include <asm/processor.h> 421da177e4SLinus Torvalds #include <asm/runway.h> 431da177e4SLinus Torvalds #include <asm/io.h> /* for __raw_read() */ 441da177e4SLinus Torvalds 451da177e4SLinus Torvalds #include "perf_images.h" 461da177e4SLinus Torvalds 471da177e4SLinus Torvalds #define MAX_RDR_WORDS 24 481da177e4SLinus Torvalds #define PERF_VERSION 2 /* derived from hpux's PI v2 interface */ 491da177e4SLinus Torvalds 501da177e4SLinus Torvalds /* definition of RDR regs */ 511da177e4SLinus Torvalds struct rdr_tbl_ent { 521da177e4SLinus Torvalds uint16_t width; 531da177e4SLinus Torvalds uint8_t num_words; 541da177e4SLinus Torvalds uint8_t write_control; 551da177e4SLinus Torvalds }; 561da177e4SLinus Torvalds 578039de10SHelge Deller static int perf_processor_interface __read_mostly = UNKNOWN_INTF; 58cb6fc18eSHelge Deller static int perf_enabled __read_mostly; 5976cffeb6SHelge Deller static DEFINE_SPINLOCK(perf_lock); 60*d863066eSHelge Deller static struct parisc_device *cpu_device __read_mostly; 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds /* RDRs to write for PCX-W */ 63cb6fc18eSHelge Deller static const int perf_rdrs_W[] = 641da177e4SLinus Torvalds { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds /* RDRs to write for PCX-U */ 67cb6fc18eSHelge Deller static const int perf_rdrs_U[] = 681da177e4SLinus Torvalds { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds /* RDR register descriptions for PCX-W */ 71cb6fc18eSHelge Deller static const struct rdr_tbl_ent perf_rdr_tbl_W[] = { 721da177e4SLinus Torvalds { 19, 1, 8 }, /* RDR 0 */ 731da177e4SLinus Torvalds { 16, 1, 16 }, /* RDR 1 */ 741da177e4SLinus Torvalds { 72, 2, 0 }, /* RDR 2 */ 751da177e4SLinus Torvalds { 81, 2, 0 }, /* RDR 3 */ 761da177e4SLinus Torvalds { 328, 6, 0 }, /* RDR 4 */ 771da177e4SLinus Torvalds { 160, 3, 0 }, /* RDR 5 */ 781da177e4SLinus Torvalds { 336, 6, 0 }, /* RDR 6 */ 791da177e4SLinus Torvalds { 164, 3, 0 }, /* RDR 7 */ 801da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 8 */ 811da177e4SLinus Torvalds { 35, 1, 0 }, /* RDR 9 */ 821da177e4SLinus Torvalds { 6, 1, 0 }, /* RDR 10 */ 831da177e4SLinus Torvalds { 18, 1, 0 }, /* RDR 11 */ 841da177e4SLinus Torvalds { 13, 1, 0 }, /* RDR 12 */ 851da177e4SLinus Torvalds { 8, 1, 0 }, /* RDR 13 */ 861da177e4SLinus Torvalds { 8, 1, 0 }, /* RDR 14 */ 871da177e4SLinus Torvalds { 8, 1, 0 }, /* RDR 15 */ 881da177e4SLinus Torvalds { 1530, 24, 0 }, /* RDR 16 */ 891da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 17 */ 901da177e4SLinus Torvalds { 4, 1, 0 }, /* RDR 18 */ 911da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 19 */ 921da177e4SLinus Torvalds { 152, 3, 24 }, /* RDR 20 */ 931da177e4SLinus Torvalds { 152, 3, 24 }, /* RDR 21 */ 941da177e4SLinus Torvalds { 233, 4, 48 }, /* RDR 22 */ 951da177e4SLinus Torvalds { 233, 4, 48 }, /* RDR 23 */ 961da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 24 */ 971da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 25 */ 981da177e4SLinus Torvalds { 11, 1, 0 }, /* RDR 26 */ 991da177e4SLinus Torvalds { 18, 1, 0 }, /* RDR 27 */ 1001da177e4SLinus Torvalds { 128, 2, 0 }, /* RDR 28 */ 1011da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 29 */ 1021da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 30 */ 1031da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 31 */ 1041da177e4SLinus Torvalds }; 1051da177e4SLinus Torvalds 1061da177e4SLinus Torvalds /* RDR register descriptions for PCX-U */ 107cb6fc18eSHelge Deller static const struct rdr_tbl_ent perf_rdr_tbl_U[] = { 1081da177e4SLinus Torvalds { 19, 1, 8 }, /* RDR 0 */ 1091da177e4SLinus Torvalds { 32, 1, 16 }, /* RDR 1 */ 1101da177e4SLinus Torvalds { 20, 1, 0 }, /* RDR 2 */ 1111da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 3 */ 1121da177e4SLinus Torvalds { 344, 6, 0 }, /* RDR 4 */ 1131da177e4SLinus Torvalds { 176, 3, 0 }, /* RDR 5 */ 1141da177e4SLinus Torvalds { 336, 6, 0 }, /* RDR 6 */ 1151da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 7 */ 1161da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 8 */ 1171da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 9 */ 1181da177e4SLinus Torvalds { 28, 1, 0 }, /* RDR 10 */ 1191da177e4SLinus Torvalds { 33, 1, 0 }, /* RDR 11 */ 1201da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 12 */ 1211da177e4SLinus Torvalds { 230, 4, 0 }, /* RDR 13 */ 1221da177e4SLinus Torvalds { 32, 1, 0 }, /* RDR 14 */ 1231da177e4SLinus Torvalds { 128, 2, 0 }, /* RDR 15 */ 1241da177e4SLinus Torvalds { 1494, 24, 0 }, /* RDR 16 */ 1251da177e4SLinus Torvalds { 18, 1, 0 }, /* RDR 17 */ 1261da177e4SLinus Torvalds { 4, 1, 0 }, /* RDR 18 */ 1271da177e4SLinus Torvalds { 0, 0, 0 }, /* RDR 19 */ 1281da177e4SLinus Torvalds { 158, 3, 24 }, /* RDR 20 */ 1291da177e4SLinus Torvalds { 158, 3, 24 }, /* RDR 21 */ 1301da177e4SLinus Torvalds { 194, 4, 48 }, /* RDR 22 */ 1311da177e4SLinus Torvalds { 194, 4, 48 }, /* RDR 23 */ 1321da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 24 */ 1331da177e4SLinus Torvalds { 71, 2, 0 }, /* RDR 25 */ 1341da177e4SLinus Torvalds { 28, 1, 0 }, /* RDR 26 */ 1351da177e4SLinus Torvalds { 33, 1, 0 }, /* RDR 27 */ 1361da177e4SLinus Torvalds { 88, 2, 0 }, /* RDR 28 */ 1371da177e4SLinus Torvalds { 32, 1, 0 }, /* RDR 29 */ 1381da177e4SLinus Torvalds { 24, 1, 0 }, /* RDR 30 */ 1391da177e4SLinus Torvalds { 16, 1, 0 }, /* RDR 31 */ 1401da177e4SLinus Torvalds }; 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds /* 1431da177e4SLinus Torvalds * A non-zero write_control in the above tables is a byte offset into 1441da177e4SLinus Torvalds * this array. 1451da177e4SLinus Torvalds */ 146cb6fc18eSHelge Deller static const uint64_t perf_bitmasks[] = { 1471da177e4SLinus Torvalds 0x0000000000000000ul, /* first dbl word must be zero */ 1481da177e4SLinus Torvalds 0xfdffe00000000000ul, /* RDR0 bitmask */ 1491da177e4SLinus Torvalds 0x003f000000000000ul, /* RDR1 bitmask */ 1501da177e4SLinus Torvalds 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */ 1511da177e4SLinus Torvalds 0xfffffffffffffffful, 1521da177e4SLinus Torvalds 0xfffffffc00000000ul, 1531da177e4SLinus Torvalds 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */ 1541da177e4SLinus Torvalds 0xfffffffffffffffful, 1551da177e4SLinus Torvalds 0xfffffffffffffffcul, 1561da177e4SLinus Torvalds 0xff00000000000000ul 1571da177e4SLinus Torvalds }; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds /* 1601da177e4SLinus Torvalds * Write control bitmasks for Pa-8700 processor given 1611da177e4SLinus Torvalds * some things have changed slightly. 1621da177e4SLinus Torvalds */ 163cb6fc18eSHelge Deller static const uint64_t perf_bitmasks_piranha[] = { 1641da177e4SLinus Torvalds 0x0000000000000000ul, /* first dbl word must be zero */ 1651da177e4SLinus Torvalds 0xfdffe00000000000ul, /* RDR0 bitmask */ 1661da177e4SLinus Torvalds 0x003f000000000000ul, /* RDR1 bitmask */ 1671da177e4SLinus Torvalds 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */ 1681da177e4SLinus Torvalds 0xfffffffffffffffful, 1691da177e4SLinus Torvalds 0xfffffffc00000000ul, 1701da177e4SLinus Torvalds 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */ 1711da177e4SLinus Torvalds 0xfffffffffffffffful, 1721da177e4SLinus Torvalds 0xfffffffffffffffful, 1731da177e4SLinus Torvalds 0xfffc000000000000ul 1741da177e4SLinus Torvalds }; 1751da177e4SLinus Torvalds 176cb6fc18eSHelge Deller static const uint64_t *bitmask_array; /* array of bitmasks to use */ 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds /****************************************************************************** 1791da177e4SLinus Torvalds * Function Prototypes 1801da177e4SLinus Torvalds *****************************************************************************/ 1811da177e4SLinus Torvalds static int perf_config(uint32_t *image_ptr); 1821da177e4SLinus Torvalds static int perf_release(struct inode *inode, struct file *file); 1831da177e4SLinus Torvalds static int perf_open(struct inode *inode, struct file *file); 1841da177e4SLinus Torvalds static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); 18574e3f6e6SArvind Yadav static ssize_t perf_write(struct file *file, const char __user *buf, 18674e3f6e6SArvind Yadav size_t count, loff_t *ppos); 187ad7dd338SChristoph Hellwig static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 1881da177e4SLinus Torvalds static void perf_start_counters(void); 1891da177e4SLinus Torvalds static int perf_stop_counters(uint32_t *raddr); 190cb6fc18eSHelge Deller static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num); 1911da177e4SLinus Torvalds static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer); 1921da177e4SLinus Torvalds static int perf_rdr_clear(uint32_t rdr_num); 1931da177e4SLinus Torvalds static int perf_write_image(uint64_t *memaddr); 1941da177e4SLinus Torvalds static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer); 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds /* External Assembly Routines */ 1971da177e4SLinus Torvalds extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width); 1981da177e4SLinus Torvalds extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width); 1991da177e4SLinus Torvalds extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer); 2001da177e4SLinus Torvalds extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer); 2011da177e4SLinus Torvalds extern void perf_intrigue_enable_perf_counters (void); 2021da177e4SLinus Torvalds extern void perf_intrigue_disable_perf_counters (void); 2031da177e4SLinus Torvalds 2041da177e4SLinus Torvalds /****************************************************************************** 2051da177e4SLinus Torvalds * Function Definitions 2061da177e4SLinus Torvalds *****************************************************************************/ 2071da177e4SLinus Torvalds 2081da177e4SLinus Torvalds 2091da177e4SLinus Torvalds /* 2101da177e4SLinus Torvalds * configure: 2111da177e4SLinus Torvalds * 2121da177e4SLinus Torvalds * Configure the cpu with a given data image. First turn off the counters, 2131da177e4SLinus Torvalds * then download the image, then turn the counters back on. 2141da177e4SLinus Torvalds */ 2151da177e4SLinus Torvalds static int perf_config(uint32_t *image_ptr) 2161da177e4SLinus Torvalds { 2171da177e4SLinus Torvalds long error; 2181da177e4SLinus Torvalds uint32_t raddr[4]; 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds /* Stop the counters*/ 2211da177e4SLinus Torvalds error = perf_stop_counters(raddr); 2221da177e4SLinus Torvalds if (error != 0) { 2231da177e4SLinus Torvalds printk("perf_config: perf_stop_counters = %ld\n", error); 2241da177e4SLinus Torvalds return -EINVAL; 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds printk("Preparing to write image\n"); 2281da177e4SLinus Torvalds /* Write the image to the chip */ 2291da177e4SLinus Torvalds error = perf_write_image((uint64_t *)image_ptr); 2301da177e4SLinus Torvalds if (error != 0) { 2311da177e4SLinus Torvalds printk("perf_config: DOWNLOAD = %ld\n", error); 2321da177e4SLinus Torvalds return -EINVAL; 2331da177e4SLinus Torvalds } 2341da177e4SLinus Torvalds 2351da177e4SLinus Torvalds printk("Preparing to start counters\n"); 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds /* Start the counters */ 2381da177e4SLinus Torvalds perf_start_counters(); 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds return sizeof(uint32_t); 2411da177e4SLinus Torvalds } 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds /* 2441da177e4SLinus Torvalds * Open the device and initialize all of its memory. The device is only 2451da177e4SLinus Torvalds * opened once, but can be "queried" by multiple processes that know its 2461da177e4SLinus Torvalds * file descriptor. 2471da177e4SLinus Torvalds */ 2481da177e4SLinus Torvalds static int perf_open(struct inode *inode, struct file *file) 2491da177e4SLinus Torvalds { 2501da177e4SLinus Torvalds spin_lock(&perf_lock); 2511da177e4SLinus Torvalds if (perf_enabled) { 2521da177e4SLinus Torvalds spin_unlock(&perf_lock); 2531da177e4SLinus Torvalds return -EBUSY; 2541da177e4SLinus Torvalds } 2551da177e4SLinus Torvalds perf_enabled = 1; 2561da177e4SLinus Torvalds spin_unlock(&perf_lock); 2571da177e4SLinus Torvalds 2581da177e4SLinus Torvalds return 0; 2591da177e4SLinus Torvalds } 2601da177e4SLinus Torvalds 2611da177e4SLinus Torvalds /* 2621da177e4SLinus Torvalds * Close the device. 2631da177e4SLinus Torvalds */ 2641da177e4SLinus Torvalds static int perf_release(struct inode *inode, struct file *file) 2651da177e4SLinus Torvalds { 2661da177e4SLinus Torvalds spin_lock(&perf_lock); 2671da177e4SLinus Torvalds perf_enabled = 0; 2681da177e4SLinus Torvalds spin_unlock(&perf_lock); 2691da177e4SLinus Torvalds 2701da177e4SLinus Torvalds return 0; 2711da177e4SLinus Torvalds } 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds /* 2741da177e4SLinus Torvalds * Read does nothing for this driver 2751da177e4SLinus Torvalds */ 2761da177e4SLinus Torvalds static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos) 2771da177e4SLinus Torvalds { 2781da177e4SLinus Torvalds return 0; 2791da177e4SLinus Torvalds } 2801da177e4SLinus Torvalds 2811da177e4SLinus Torvalds /* 2821da177e4SLinus Torvalds * write: 2831da177e4SLinus Torvalds * 2841da177e4SLinus Torvalds * This routine downloads the image to the chip. It must be 2851da177e4SLinus Torvalds * called on the processor that the download should happen 2861da177e4SLinus Torvalds * on. 2871da177e4SLinus Torvalds */ 28874e3f6e6SArvind Yadav static ssize_t perf_write(struct file *file, const char __user *buf, 28974e3f6e6SArvind Yadav size_t count, loff_t *ppos) 2901da177e4SLinus Torvalds { 2919e142b72SHelge Deller size_t image_size __maybe_unused; 2921da177e4SLinus Torvalds uint32_t image_type; 2931da177e4SLinus Torvalds uint32_t interface_type; 2941da177e4SLinus Torvalds uint32_t test; 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) 2971da177e4SLinus Torvalds image_size = PCXU_IMAGE_SIZE; 2981da177e4SLinus Torvalds else if (perf_processor_interface == CUDA_INTF) 2991da177e4SLinus Torvalds image_size = PCXW_IMAGE_SIZE; 3001da177e4SLinus Torvalds else 3011da177e4SLinus Torvalds return -EFAULT; 3021da177e4SLinus Torvalds 303cf91baf3SAlexey Budankov if (!perfmon_capable()) 3041da177e4SLinus Torvalds return -EACCES; 3051da177e4SLinus Torvalds 3061da177e4SLinus Torvalds if (count != sizeof(uint32_t)) 3071da177e4SLinus Torvalds return -EIO; 3081da177e4SLinus Torvalds 30982cbd568SDan Carpenter if (copy_from_user(&image_type, buf, sizeof(uint32_t))) 31082cbd568SDan Carpenter return -EFAULT; 3111da177e4SLinus Torvalds 3121da177e4SLinus Torvalds /* Get the interface type and test type */ 3131da177e4SLinus Torvalds interface_type = (image_type >> 16) & 0xffff; 3141da177e4SLinus Torvalds test = (image_type & 0xffff); 3151da177e4SLinus Torvalds 3161da177e4SLinus Torvalds /* Make sure everything makes sense */ 3171da177e4SLinus Torvalds 3181da177e4SLinus Torvalds /* First check the machine type is correct for 3191da177e4SLinus Torvalds the requested image */ 3201da177e4SLinus Torvalds if (((perf_processor_interface == CUDA_INTF) && 3211da177e4SLinus Torvalds (interface_type != CUDA_INTF)) || 3221da177e4SLinus Torvalds ((perf_processor_interface == ONYX_INTF) && 3231da177e4SLinus Torvalds (interface_type != ONYX_INTF))) 3241da177e4SLinus Torvalds return -EINVAL; 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds /* Next check to make sure the requested image 3271da177e4SLinus Torvalds is valid */ 3281da177e4SLinus Torvalds if (((interface_type == CUDA_INTF) && 3291da177e4SLinus Torvalds (test >= MAX_CUDA_IMAGES)) || 3301da177e4SLinus Torvalds ((interface_type == ONYX_INTF) && 3311da177e4SLinus Torvalds (test >= MAX_ONYX_IMAGES))) 3321da177e4SLinus Torvalds return -EINVAL; 3331da177e4SLinus Torvalds 3341da177e4SLinus Torvalds /* Copy the image into the processor */ 3351da177e4SLinus Torvalds if (interface_type == CUDA_INTF) 3361da177e4SLinus Torvalds return perf_config(cuda_images[test]); 3371da177e4SLinus Torvalds else 3381da177e4SLinus Torvalds return perf_config(onyx_images[test]); 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds return count; 3411da177e4SLinus Torvalds } 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds /* 3441da177e4SLinus Torvalds * Patch the images that need to know the IVA addresses. 3451da177e4SLinus Torvalds */ 3461da177e4SLinus Torvalds static void perf_patch_images(void) 3471da177e4SLinus Torvalds { 3481da177e4SLinus Torvalds #if 0 /* FIXME!! */ 3491da177e4SLinus Torvalds /* 3501da177e4SLinus Torvalds * NOTE: this routine is VERY specific to the current TLB image. 3511da177e4SLinus Torvalds * If the image is changed, this routine might also need to be changed. 3521da177e4SLinus Torvalds */ 3531da177e4SLinus Torvalds extern void $i_itlb_miss_2_0(); 3541da177e4SLinus Torvalds extern void $i_dtlb_miss_2_0(); 3551da177e4SLinus Torvalds extern void PA2_0_iva(); 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds /* 3581da177e4SLinus Torvalds * We can only use the lower 32-bits, the upper 32-bits should be 0 3591da177e4SLinus Torvalds * anyway given this is in the kernel 3601da177e4SLinus Torvalds */ 3611da177e4SLinus Torvalds uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); 3621da177e4SLinus Torvalds uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); 3631da177e4SLinus Torvalds uint32_t IVAaddress = (uint32_t)&PA2_0_iva; 3641da177e4SLinus Torvalds 3651da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 3661da177e4SLinus Torvalds /* clear last 2 bytes */ 3671da177e4SLinus Torvalds onyx_images[TLBMISS][15] &= 0xffffff00; 3681da177e4SLinus Torvalds /* set 2 bytes */ 3691da177e4SLinus Torvalds onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 3701da177e4SLinus Torvalds onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; 3711da177e4SLinus Torvalds onyx_images[TLBMISS][17] = itlb_addr; 3721da177e4SLinus Torvalds 3731da177e4SLinus Torvalds /* clear last 2 bytes */ 3741da177e4SLinus Torvalds onyx_images[TLBHANDMISS][15] &= 0xffffff00; 3751da177e4SLinus Torvalds /* set 2 bytes */ 3761da177e4SLinus Torvalds onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 3771da177e4SLinus Torvalds onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; 3781da177e4SLinus Torvalds onyx_images[TLBHANDMISS][17] = itlb_addr; 3791da177e4SLinus Torvalds 3801da177e4SLinus Torvalds /* clear last 2 bytes */ 3811da177e4SLinus Torvalds onyx_images[BIG_CPI][15] &= 0xffffff00; 3821da177e4SLinus Torvalds /* set 2 bytes */ 3831da177e4SLinus Torvalds onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); 3841da177e4SLinus Torvalds onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; 3851da177e4SLinus Torvalds onyx_images[BIG_CPI][17] = itlb_addr; 3861da177e4SLinus Torvalds 3871da177e4SLinus Torvalds onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */ 3881da177e4SLinus Torvalds onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ 3891da177e4SLinus Torvalds onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; 3901da177e4SLinus Torvalds 3911da177e4SLinus Torvalds 3921da177e4SLinus Torvalds } else if (perf_processor_interface == CUDA_INTF) { 3931da177e4SLinus Torvalds /* Cuda interface */ 3941da177e4SLinus Torvalds cuda_images[TLBMISS][16] = 3951da177e4SLinus Torvalds (cuda_images[TLBMISS][16]&0xffff0000) | 3961da177e4SLinus Torvalds ((dtlb_addr >> 8)&0x0000ffff); 3971da177e4SLinus Torvalds cuda_images[TLBMISS][17] = 3981da177e4SLinus Torvalds ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 3991da177e4SLinus Torvalds cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; 4001da177e4SLinus Torvalds 4011da177e4SLinus Torvalds cuda_images[TLBHANDMISS][16] = 4021da177e4SLinus Torvalds (cuda_images[TLBHANDMISS][16]&0xffff0000) | 4031da177e4SLinus Torvalds ((dtlb_addr >> 8)&0x0000ffff); 4041da177e4SLinus Torvalds cuda_images[TLBHANDMISS][17] = 4051da177e4SLinus Torvalds ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 4061da177e4SLinus Torvalds cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds cuda_images[BIG_CPI][16] = 4091da177e4SLinus Torvalds (cuda_images[BIG_CPI][16]&0xffff0000) | 4101da177e4SLinus Torvalds ((dtlb_addr >> 8)&0x0000ffff); 4111da177e4SLinus Torvalds cuda_images[BIG_CPI][17] = 4121da177e4SLinus Torvalds ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 4131da177e4SLinus Torvalds cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; 4141da177e4SLinus Torvalds } else { 4151da177e4SLinus Torvalds /* Unknown type */ 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds #endif 4181da177e4SLinus Torvalds } 4191da177e4SLinus Torvalds 4201da177e4SLinus Torvalds 4211da177e4SLinus Torvalds /* 4221da177e4SLinus Torvalds * ioctl routine 4231da177e4SLinus Torvalds * All routines effect the processor that they are executed on. Thus you 4241da177e4SLinus Torvalds * must be running on the processor that you wish to change. 4251da177e4SLinus Torvalds */ 4261da177e4SLinus Torvalds 427ad7dd338SChristoph Hellwig static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 4281da177e4SLinus Torvalds { 4291da177e4SLinus Torvalds long error_start; 4301da177e4SLinus Torvalds uint32_t raddr[4]; 431ad7dd338SChristoph Hellwig int error = 0; 4321da177e4SLinus Torvalds 4331da177e4SLinus Torvalds switch (cmd) { 4341da177e4SLinus Torvalds 4351da177e4SLinus Torvalds case PA_PERF_ON: 4361da177e4SLinus Torvalds /* Start the counters */ 4371da177e4SLinus Torvalds perf_start_counters(); 438ad7dd338SChristoph Hellwig break; 4391da177e4SLinus Torvalds 4401da177e4SLinus Torvalds case PA_PERF_OFF: 4411da177e4SLinus Torvalds error_start = perf_stop_counters(raddr); 4421da177e4SLinus Torvalds if (error_start != 0) { 4431da177e4SLinus Torvalds printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); 444ad7dd338SChristoph Hellwig error = -EFAULT; 445ad7dd338SChristoph Hellwig break; 4461da177e4SLinus Torvalds } 4471da177e4SLinus Torvalds 4481da177e4SLinus Torvalds /* copy out the Counters */ 4491da177e4SLinus Torvalds if (copy_to_user((void __user *)arg, raddr, 4501da177e4SLinus Torvalds sizeof (raddr)) != 0) { 451ad7dd338SChristoph Hellwig error = -EFAULT; 452ad7dd338SChristoph Hellwig break; 4531da177e4SLinus Torvalds } 454ad7dd338SChristoph Hellwig break; 4551da177e4SLinus Torvalds 4561da177e4SLinus Torvalds case PA_PERF_VERSION: 4571da177e4SLinus Torvalds /* Return the version # */ 458ad7dd338SChristoph Hellwig error = put_user(PERF_VERSION, (int *)arg); 459ad7dd338SChristoph Hellwig break; 4601da177e4SLinus Torvalds 4611da177e4SLinus Torvalds default: 462ad7dd338SChristoph Hellwig error = -ENOTTY; 4631da177e4SLinus Torvalds } 464ad7dd338SChristoph Hellwig 465ad7dd338SChristoph Hellwig return error; 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds 4685dfe4c96SArjan van de Ven static const struct file_operations perf_fops = { 4691da177e4SLinus Torvalds .read = perf_read, 4701da177e4SLinus Torvalds .write = perf_write, 471ad7dd338SChristoph Hellwig .unlocked_ioctl = perf_ioctl, 472ad7dd338SChristoph Hellwig .compat_ioctl = perf_ioctl, 4731da177e4SLinus Torvalds .open = perf_open, 4741da177e4SLinus Torvalds .release = perf_release 4751da177e4SLinus Torvalds }; 4761da177e4SLinus Torvalds 4771da177e4SLinus Torvalds static struct miscdevice perf_dev = { 4781da177e4SLinus Torvalds MISC_DYNAMIC_MINOR, 4791da177e4SLinus Torvalds PA_PERF_DEV, 4801da177e4SLinus Torvalds &perf_fops 4811da177e4SLinus Torvalds }; 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds /* 4841da177e4SLinus Torvalds * Initialize the module 4851da177e4SLinus Torvalds */ 4861da177e4SLinus Torvalds static int __init perf_init(void) 4871da177e4SLinus Torvalds { 4881da177e4SLinus Torvalds int ret; 4891da177e4SLinus Torvalds 4901da177e4SLinus Torvalds /* Determine correct processor interface to use */ 4911da177e4SLinus Torvalds bitmask_array = perf_bitmasks; 4921da177e4SLinus Torvalds 4931da177e4SLinus Torvalds if (boot_cpu_data.cpu_type == pcxu || 4941da177e4SLinus Torvalds boot_cpu_data.cpu_type == pcxu_) { 4951da177e4SLinus Torvalds perf_processor_interface = ONYX_INTF; 4961da177e4SLinus Torvalds } else if (boot_cpu_data.cpu_type == pcxw || 4971da177e4SLinus Torvalds boot_cpu_data.cpu_type == pcxw_ || 4981da177e4SLinus Torvalds boot_cpu_data.cpu_type == pcxw2 || 4992cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako || 5002cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako2) { 5011da177e4SLinus Torvalds perf_processor_interface = CUDA_INTF; 5021da177e4SLinus Torvalds if (boot_cpu_data.cpu_type == pcxw2 || 5032cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako || 5042cbd42dbSKyle McMartin boot_cpu_data.cpu_type == mako2) 5051da177e4SLinus Torvalds bitmask_array = perf_bitmasks_piranha; 5061da177e4SLinus Torvalds } else { 5071da177e4SLinus Torvalds perf_processor_interface = UNKNOWN_INTF; 5081da177e4SLinus Torvalds printk("Performance monitoring counters not supported on this processor\n"); 5091da177e4SLinus Torvalds return -ENODEV; 5101da177e4SLinus Torvalds } 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds ret = misc_register(&perf_dev); 5131da177e4SLinus Torvalds if (ret) { 5141da177e4SLinus Torvalds printk(KERN_ERR "Performance monitoring counters: " 5151da177e4SLinus Torvalds "cannot register misc device.\n"); 5161da177e4SLinus Torvalds return ret; 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds 5191da177e4SLinus Torvalds /* Patch the images to match the system */ 5201da177e4SLinus Torvalds perf_patch_images(); 5211da177e4SLinus Torvalds 5221da177e4SLinus Torvalds /* TODO: this only lets us access the first cpu.. what to do for SMP? */ 523ef017bebSHelge Deller cpu_device = per_cpu(cpu_data, 0).dev; 5241da177e4SLinus Torvalds printk("Performance monitoring counters enabled for %s\n", 525ef017bebSHelge Deller per_cpu(cpu_data, 0).dev->name); 5261da177e4SLinus Torvalds 5271da177e4SLinus Torvalds return 0; 5281da177e4SLinus Torvalds } 52915becabdSPaul Gortmaker device_initcall(perf_init); 5301da177e4SLinus Torvalds 5311da177e4SLinus Torvalds /* 5321da177e4SLinus Torvalds * perf_start_counters(void) 5331da177e4SLinus Torvalds * 5341da177e4SLinus Torvalds * Start the counters. 5351da177e4SLinus Torvalds */ 5361da177e4SLinus Torvalds static void perf_start_counters(void) 5371da177e4SLinus Torvalds { 5381da177e4SLinus Torvalds /* Enable performance monitor counters */ 5391da177e4SLinus Torvalds perf_intrigue_enable_perf_counters(); 5401da177e4SLinus Torvalds } 5411da177e4SLinus Torvalds 5421da177e4SLinus Torvalds /* 5431da177e4SLinus Torvalds * perf_stop_counters 5441da177e4SLinus Torvalds * 5451da177e4SLinus Torvalds * Stop the performance counters and save counts 5461da177e4SLinus Torvalds * in a per_processor array. 5471da177e4SLinus Torvalds */ 5481da177e4SLinus Torvalds static int perf_stop_counters(uint32_t *raddr) 5491da177e4SLinus Torvalds { 5501da177e4SLinus Torvalds uint64_t userbuf[MAX_RDR_WORDS]; 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds /* Disable performance counters */ 5531da177e4SLinus Torvalds perf_intrigue_disable_perf_counters(); 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 5561da177e4SLinus Torvalds uint64_t tmp64; 5571da177e4SLinus Torvalds /* 5581da177e4SLinus Torvalds * Read the counters 5591da177e4SLinus Torvalds */ 5601da177e4SLinus Torvalds if (!perf_rdr_read_ubuf(16, userbuf)) 5611da177e4SLinus Torvalds return -13; 5621da177e4SLinus Torvalds 5637022672eSSimon Arlott /* Counter0 is bits 1398 to 1429 */ 5641da177e4SLinus Torvalds tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000; 5651da177e4SLinus Torvalds tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; 5661da177e4SLinus Torvalds /* OR sticky0 (bit 1430) to counter0 bit 32 */ 5671da177e4SLinus Torvalds tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; 5681da177e4SLinus Torvalds raddr[0] = (uint32_t)tmp64; 5691da177e4SLinus Torvalds 5707022672eSSimon Arlott /* Counter1 is bits 1431 to 1462 */ 5711da177e4SLinus Torvalds tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff; 5721da177e4SLinus Torvalds /* OR sticky1 (bit 1463) to counter1 bit 32 */ 5731da177e4SLinus Torvalds tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; 5741da177e4SLinus Torvalds raddr[1] = (uint32_t)tmp64; 5751da177e4SLinus Torvalds 5767022672eSSimon Arlott /* Counter2 is bits 1464 to 1495 */ 5771da177e4SLinus Torvalds tmp64 = (userbuf[22] << 24) & 0x00000000ff000000; 5781da177e4SLinus Torvalds tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; 5791da177e4SLinus Torvalds /* OR sticky2 (bit 1496) to counter2 bit 32 */ 5801da177e4SLinus Torvalds tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; 5811da177e4SLinus Torvalds raddr[2] = (uint32_t)tmp64; 5821da177e4SLinus Torvalds 5837022672eSSimon Arlott /* Counter3 is bits 1497 to 1528 */ 5841da177e4SLinus Torvalds tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; 5851da177e4SLinus Torvalds /* OR sticky3 (bit 1529) to counter3 bit 32 */ 5861da177e4SLinus Torvalds tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; 5871da177e4SLinus Torvalds raddr[3] = (uint32_t)tmp64; 5881da177e4SLinus Torvalds 5891da177e4SLinus Torvalds /* 5901da177e4SLinus Torvalds * Zero out the counters 5911da177e4SLinus Torvalds */ 5921da177e4SLinus Torvalds 5931da177e4SLinus Torvalds /* 5941da177e4SLinus Torvalds * The counters and sticky-bits comprise the last 132 bits 5951da177e4SLinus Torvalds * (1398 - 1529) of RDR16 on a U chip. We'll zero these 5961da177e4SLinus Torvalds * out the easy way: zero out last 10 bits of dword 21, 5971da177e4SLinus Torvalds * all of dword 22 and 58 bits (plus 6 don't care bits) of 5981da177e4SLinus Torvalds * dword 23. 5991da177e4SLinus Torvalds */ 6001da177e4SLinus Torvalds userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */ 6011da177e4SLinus Torvalds userbuf[22] = 0; 6021da177e4SLinus Torvalds userbuf[23] = 0; 6031da177e4SLinus Torvalds 6041da177e4SLinus Torvalds /* 6057022672eSSimon Arlott * Write back the zeroed bytes + the image given 6061da177e4SLinus Torvalds * the read was destructive. 6071da177e4SLinus Torvalds */ 6081da177e4SLinus Torvalds perf_rdr_write(16, userbuf); 6091da177e4SLinus Torvalds } else { 6101da177e4SLinus Torvalds 6111da177e4SLinus Torvalds /* 6121da177e4SLinus Torvalds * Read RDR-15 which contains the counters and sticky bits 6131da177e4SLinus Torvalds */ 6141da177e4SLinus Torvalds if (!perf_rdr_read_ubuf(15, userbuf)) { 6151da177e4SLinus Torvalds return -13; 6161da177e4SLinus Torvalds } 6171da177e4SLinus Torvalds 6181da177e4SLinus Torvalds /* 6191da177e4SLinus Torvalds * Clear out the counters 6201da177e4SLinus Torvalds */ 6211da177e4SLinus Torvalds perf_rdr_clear(15); 6221da177e4SLinus Torvalds 6231da177e4SLinus Torvalds /* 6241da177e4SLinus Torvalds * Copy the counters 6251da177e4SLinus Torvalds */ 6261da177e4SLinus Torvalds raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); 6271da177e4SLinus Torvalds raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); 6281da177e4SLinus Torvalds raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); 6291da177e4SLinus Torvalds raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); 6301da177e4SLinus Torvalds } 6311da177e4SLinus Torvalds 6321da177e4SLinus Torvalds return 0; 6331da177e4SLinus Torvalds } 6341da177e4SLinus Torvalds 6351da177e4SLinus Torvalds /* 6361da177e4SLinus Torvalds * perf_rdr_get_entry 6371da177e4SLinus Torvalds * 6381da177e4SLinus Torvalds * Retrieve a pointer to the description of what this 6391da177e4SLinus Torvalds * RDR contains. 6401da177e4SLinus Torvalds */ 641cb6fc18eSHelge Deller static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num) 6421da177e4SLinus Torvalds { 6431da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 6441da177e4SLinus Torvalds return &perf_rdr_tbl_U[rdr_num]; 6451da177e4SLinus Torvalds } else { 6461da177e4SLinus Torvalds return &perf_rdr_tbl_W[rdr_num]; 6471da177e4SLinus Torvalds } 6481da177e4SLinus Torvalds } 6491da177e4SLinus Torvalds 6501da177e4SLinus Torvalds /* 6511da177e4SLinus Torvalds * perf_rdr_read_ubuf 6521da177e4SLinus Torvalds * 6531da177e4SLinus Torvalds * Read the RDR value into the buffer specified. 6541da177e4SLinus Torvalds */ 6551da177e4SLinus Torvalds static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) 6561da177e4SLinus Torvalds { 6571da177e4SLinus Torvalds uint64_t data, data_mask = 0; 6581da177e4SLinus Torvalds uint32_t width, xbits, i; 659cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry; 6601da177e4SLinus Torvalds 6611da177e4SLinus Torvalds tentry = perf_rdr_get_entry(rdr_num); 6621da177e4SLinus Torvalds if ((width = tentry->width) == 0) 6631da177e4SLinus Torvalds return 0; 6641da177e4SLinus Torvalds 6651da177e4SLinus Torvalds /* Clear out buffer */ 6661da177e4SLinus Torvalds i = tentry->num_words; 6671da177e4SLinus Torvalds while (i--) { 6681da177e4SLinus Torvalds buffer[i] = 0; 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 6711da177e4SLinus Torvalds /* Check for bits an even number of 64 */ 6721da177e4SLinus Torvalds if ((xbits = width & 0x03f) != 0) { 6731da177e4SLinus Torvalds data_mask = 1; 6741da177e4SLinus Torvalds data_mask <<= (64 - xbits); 6751da177e4SLinus Torvalds data_mask--; 6761da177e4SLinus Torvalds } 6771da177e4SLinus Torvalds 6781da177e4SLinus Torvalds /* Grab all of the data */ 6791da177e4SLinus Torvalds i = tentry->num_words; 6801da177e4SLinus Torvalds while (i--) { 6811da177e4SLinus Torvalds 6821da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 6831da177e4SLinus Torvalds data = perf_rdr_shift_in_U(rdr_num, width); 6841da177e4SLinus Torvalds } else { 6851da177e4SLinus Torvalds data = perf_rdr_shift_in_W(rdr_num, width); 6861da177e4SLinus Torvalds } 6871da177e4SLinus Torvalds if (xbits) { 6881da177e4SLinus Torvalds buffer[i] |= (data << (64 - xbits)); 6891da177e4SLinus Torvalds if (i) { 6901da177e4SLinus Torvalds buffer[i-1] |= ((data >> xbits) & data_mask); 6911da177e4SLinus Torvalds } 6921da177e4SLinus Torvalds } else { 6931da177e4SLinus Torvalds buffer[i] = data; 6941da177e4SLinus Torvalds } 6951da177e4SLinus Torvalds } 6961da177e4SLinus Torvalds 6971da177e4SLinus Torvalds return 1; 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds 7001da177e4SLinus Torvalds /* 7011da177e4SLinus Torvalds * perf_rdr_clear 7021da177e4SLinus Torvalds * 7031da177e4SLinus Torvalds * Zero out the given RDR register 7041da177e4SLinus Torvalds */ 7051da177e4SLinus Torvalds static int perf_rdr_clear(uint32_t rdr_num) 7061da177e4SLinus Torvalds { 707cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry; 7081da177e4SLinus Torvalds int32_t i; 7091da177e4SLinus Torvalds 7101da177e4SLinus Torvalds tentry = perf_rdr_get_entry(rdr_num); 7111da177e4SLinus Torvalds 7121da177e4SLinus Torvalds if (tentry->width == 0) { 7131da177e4SLinus Torvalds return -1; 7141da177e4SLinus Torvalds } 7151da177e4SLinus Torvalds 7161da177e4SLinus Torvalds i = tentry->num_words; 7171da177e4SLinus Torvalds while (i--) { 7181da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 7191da177e4SLinus Torvalds perf_rdr_shift_out_U(rdr_num, 0UL); 7201da177e4SLinus Torvalds } else { 7211da177e4SLinus Torvalds perf_rdr_shift_out_W(rdr_num, 0UL); 7221da177e4SLinus Torvalds } 7231da177e4SLinus Torvalds } 7241da177e4SLinus Torvalds 7251da177e4SLinus Torvalds return 0; 7261da177e4SLinus Torvalds } 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds 7291da177e4SLinus Torvalds /* 7301da177e4SLinus Torvalds * perf_write_image 7311da177e4SLinus Torvalds * 7321da177e4SLinus Torvalds * Write the given image out to the processor 7331da177e4SLinus Torvalds */ 7341da177e4SLinus Torvalds static int perf_write_image(uint64_t *memaddr) 7351da177e4SLinus Torvalds { 7361da177e4SLinus Torvalds uint64_t buffer[MAX_RDR_WORDS]; 7371da177e4SLinus Torvalds uint64_t *bptr; 7381da177e4SLinus Torvalds uint32_t dwords; 739cb6fc18eSHelge Deller const uint32_t *intrigue_rdr; 740cb6fc18eSHelge Deller const uint64_t *intrigue_bitmask; 741cb6fc18eSHelge Deller uint64_t tmp64; 74253f01bbaSMatthew Wilcox void __iomem *runway; 743cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry; 7441da177e4SLinus Torvalds int i; 7451da177e4SLinus Torvalds 7461da177e4SLinus Torvalds /* Clear out counters */ 7471da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 7481da177e4SLinus Torvalds 7491da177e4SLinus Torvalds perf_rdr_clear(16); 7501da177e4SLinus Torvalds 7511da177e4SLinus Torvalds /* Toggle performance monitor */ 7521da177e4SLinus Torvalds perf_intrigue_enable_perf_counters(); 7531da177e4SLinus Torvalds perf_intrigue_disable_perf_counters(); 7541da177e4SLinus Torvalds 7551da177e4SLinus Torvalds intrigue_rdr = perf_rdrs_U; 7561da177e4SLinus Torvalds } else { 7571da177e4SLinus Torvalds perf_rdr_clear(15); 7581da177e4SLinus Torvalds intrigue_rdr = perf_rdrs_W; 7591da177e4SLinus Torvalds } 7601da177e4SLinus Torvalds 7611da177e4SLinus Torvalds /* Write all RDRs */ 7621da177e4SLinus Torvalds while (*intrigue_rdr != -1) { 7631da177e4SLinus Torvalds tentry = perf_rdr_get_entry(*intrigue_rdr); 7641da177e4SLinus Torvalds perf_rdr_read_ubuf(*intrigue_rdr, buffer); 7651da177e4SLinus Torvalds bptr = &buffer[0]; 7661da177e4SLinus Torvalds dwords = tentry->num_words; 7671da177e4SLinus Torvalds if (tentry->write_control) { 7681da177e4SLinus Torvalds intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; 7691da177e4SLinus Torvalds while (dwords--) { 7701da177e4SLinus Torvalds tmp64 = *intrigue_bitmask & *memaddr++; 7711da177e4SLinus Torvalds tmp64 |= (~(*intrigue_bitmask++)) & *bptr; 7721da177e4SLinus Torvalds *bptr++ = tmp64; 7731da177e4SLinus Torvalds } 7741da177e4SLinus Torvalds } else { 7751da177e4SLinus Torvalds while (dwords--) { 7761da177e4SLinus Torvalds *bptr++ = *memaddr++; 7771da177e4SLinus Torvalds } 7781da177e4SLinus Torvalds } 7791da177e4SLinus Torvalds 7801da177e4SLinus Torvalds perf_rdr_write(*intrigue_rdr, buffer); 7811da177e4SLinus Torvalds intrigue_rdr++; 7821da177e4SLinus Torvalds } 7831da177e4SLinus Torvalds 7841da177e4SLinus Torvalds /* 7851da177e4SLinus Torvalds * Now copy out the Runway stuff which is not in RDRs 7861da177e4SLinus Torvalds */ 7871da177e4SLinus Torvalds 7881da177e4SLinus Torvalds if (cpu_device == NULL) 7891da177e4SLinus Torvalds { 7901da177e4SLinus Torvalds printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); 7911da177e4SLinus Torvalds return -1; 7921da177e4SLinus Torvalds } 7931da177e4SLinus Torvalds 7944bdc0d67SChristoph Hellwig runway = ioremap(cpu_device->hpa.start, 4096); 79574e3f6e6SArvind Yadav if (!runway) { 79674e3f6e6SArvind Yadav pr_err("perf_write_image: ioremap failed!\n"); 79774e3f6e6SArvind Yadav return -ENOMEM; 79874e3f6e6SArvind Yadav } 7991da177e4SLinus Torvalds 8001da177e4SLinus Torvalds /* Merge intrigue bits into Runway STATUS 0 */ 80153f01bbaSMatthew Wilcox tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; 80253f01bbaSMatthew Wilcox __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 80353f01bbaSMatthew Wilcox runway + RUNWAY_STATUS); 8041da177e4SLinus Torvalds 8051da177e4SLinus Torvalds /* Write RUNWAY DEBUG registers */ 8061da177e4SLinus Torvalds for (i = 0; i < 8; i++) { 80753f01bbaSMatthew Wilcox __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); 8081da177e4SLinus Torvalds } 8091da177e4SLinus Torvalds 8101da177e4SLinus Torvalds return 0; 8111da177e4SLinus Torvalds } 8121da177e4SLinus Torvalds 8131da177e4SLinus Torvalds /* 8141da177e4SLinus Torvalds * perf_rdr_write 8151da177e4SLinus Torvalds * 8161da177e4SLinus Torvalds * Write the given RDR register with the contents 8171da177e4SLinus Torvalds * of the given buffer. 8181da177e4SLinus Torvalds */ 8191da177e4SLinus Torvalds static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer) 8201da177e4SLinus Torvalds { 821cb6fc18eSHelge Deller const struct rdr_tbl_ent *tentry; 8221da177e4SLinus Torvalds int32_t i; 8231da177e4SLinus Torvalds 8241da177e4SLinus Torvalds printk("perf_rdr_write\n"); 8251da177e4SLinus Torvalds tentry = perf_rdr_get_entry(rdr_num); 8261da177e4SLinus Torvalds if (tentry->width == 0) { return; } 8271da177e4SLinus Torvalds 8281da177e4SLinus Torvalds i = tentry->num_words; 8291da177e4SLinus Torvalds while (i--) { 8301da177e4SLinus Torvalds if (perf_processor_interface == ONYX_INTF) { 8311da177e4SLinus Torvalds perf_rdr_shift_out_U(rdr_num, buffer[i]); 8321da177e4SLinus Torvalds } else { 8331da177e4SLinus Torvalds perf_rdr_shift_out_W(rdr_num, buffer[i]); 8341da177e4SLinus Torvalds } 8351da177e4SLinus Torvalds } 8361da177e4SLinus Torvalds printk("perf_rdr_write done\n"); 8371da177e4SLinus Torvalds } 838