xref: /linux/arch/parisc/kernel/perf.c (revision 52c996d3f40b40f87ef9dc80596903309682acc3)
1de6cc651SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *  Parisc performance counters
41da177e4SLinus Torvalds  *  Copyright (C) 2001 Randolph Chung <tausq@debian.org>
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  *  This code is derived, with permission, from HP/UX sources.
71da177e4SLinus Torvalds  */
81da177e4SLinus Torvalds 
91da177e4SLinus Torvalds /*
101da177e4SLinus Torvalds  *  Edited comment from original sources:
111da177e4SLinus Torvalds  *
121da177e4SLinus Torvalds  *  This driver programs the PCX-U/PCX-W performance counters
131da177e4SLinus Torvalds  *  on the PA-RISC 2.0 chips.  The driver keeps all images now
1406fe9fb4SDirk Hohndel  *  internally to the kernel to hopefully eliminate the possibility
151da177e4SLinus Torvalds  *  of a bad image halting the CPU.  Also, there are different
161da177e4SLinus Torvalds  *  images for the PCX-W and later chips vs the PCX-U chips.
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  *  Only 1 process is allowed to access the driver at any time,
191da177e4SLinus Torvalds  *  so the only protection that is needed is at open and close.
201da177e4SLinus Torvalds  *  A variable "perf_enabled" is used to hold the state of the
211da177e4SLinus Torvalds  *  driver.  The spinlock "perf_lock" is used to protect the
221da177e4SLinus Torvalds  *  modification of the state during open/close operations so
231da177e4SLinus Torvalds  *  multiple processes don't get into the driver simultaneously.
241da177e4SLinus Torvalds  *
251da177e4SLinus Torvalds  *  This driver accesses the processor directly vs going through
261da177e4SLinus Torvalds  *  the PDC INTRIGUE calls.  This is done to eliminate bugs introduced
271da177e4SLinus Torvalds  *  in various PDC revisions.  The code is much more maintainable
281da177e4SLinus Torvalds  *  and reliable this way vs having to debug on every version of PDC
291da177e4SLinus Torvalds  *  on every box.
301da177e4SLinus Torvalds  */
311da177e4SLinus Torvalds 
32a9415644SRandy Dunlap #include <linux/capability.h>
331da177e4SLinus Torvalds #include <linux/init.h>
341da177e4SLinus Torvalds #include <linux/proc_fs.h>
351da177e4SLinus Torvalds #include <linux/miscdevice.h>
361da177e4SLinus Torvalds #include <linux/spinlock.h>
371da177e4SLinus Torvalds 
387c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
391da177e4SLinus Torvalds #include <asm/perf.h>
401da177e4SLinus Torvalds #include <asm/parisc-device.h>
411da177e4SLinus Torvalds #include <asm/processor.h>
421da177e4SLinus Torvalds #include <asm/runway.h>
431da177e4SLinus Torvalds #include <asm/io.h>		/* for __raw_read() */
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds #include "perf_images.h"
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds #define MAX_RDR_WORDS	24
481da177e4SLinus Torvalds #define PERF_VERSION	2	/* derived from hpux's PI v2 interface */
491da177e4SLinus Torvalds 
501da177e4SLinus Torvalds /* definition of RDR regs */
511da177e4SLinus Torvalds struct rdr_tbl_ent {
521da177e4SLinus Torvalds 	uint16_t	width;
531da177e4SLinus Torvalds 	uint8_t		num_words;
541da177e4SLinus Torvalds 	uint8_t		write_control;
551da177e4SLinus Torvalds };
561da177e4SLinus Torvalds 
578039de10SHelge Deller static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
58cb6fc18eSHelge Deller static int perf_enabled __read_mostly;
5976cffeb6SHelge Deller static DEFINE_SPINLOCK(perf_lock);
60*d863066eSHelge Deller static struct parisc_device *cpu_device __read_mostly;
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds /* RDRs to write for PCX-W */
63cb6fc18eSHelge Deller static const int perf_rdrs_W[] =
641da177e4SLinus Torvalds 	{ 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds /* RDRs to write for PCX-U */
67cb6fc18eSHelge Deller static const int perf_rdrs_U[] =
681da177e4SLinus Torvalds 	{ 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 };
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds /* RDR register descriptions for PCX-W */
71cb6fc18eSHelge Deller static const struct rdr_tbl_ent perf_rdr_tbl_W[] = {
721da177e4SLinus Torvalds 	{ 19,	1,	8 },   /* RDR 0 */
731da177e4SLinus Torvalds 	{ 16,	1,	16 },  /* RDR 1 */
741da177e4SLinus Torvalds 	{ 72,	2,	0 },   /* RDR 2 */
751da177e4SLinus Torvalds 	{ 81,	2,	0 },   /* RDR 3 */
761da177e4SLinus Torvalds 	{ 328,	6,	0 },   /* RDR 4 */
771da177e4SLinus Torvalds 	{ 160,	3,	0 },   /* RDR 5 */
781da177e4SLinus Torvalds 	{ 336,	6,	0 },   /* RDR 6 */
791da177e4SLinus Torvalds 	{ 164,	3,	0 },   /* RDR 7 */
801da177e4SLinus Torvalds 	{ 0,	0,	0 },   /* RDR 8 */
811da177e4SLinus Torvalds 	{ 35,	1,	0 },   /* RDR 9 */
821da177e4SLinus Torvalds 	{ 6,	1,	0 },   /* RDR 10 */
831da177e4SLinus Torvalds 	{ 18,	1,	0 },   /* RDR 11 */
841da177e4SLinus Torvalds 	{ 13,	1,	0 },   /* RDR 12 */
851da177e4SLinus Torvalds 	{ 8,	1,	0 },   /* RDR 13 */
861da177e4SLinus Torvalds 	{ 8,	1,	0 },   /* RDR 14 */
871da177e4SLinus Torvalds 	{ 8,	1,	0 },   /* RDR 15 */
881da177e4SLinus Torvalds 	{ 1530,	24,	0 },   /* RDR 16 */
891da177e4SLinus Torvalds 	{ 16,	1,	0 },   /* RDR 17 */
901da177e4SLinus Torvalds 	{ 4,	1,	0 },   /* RDR 18 */
911da177e4SLinus Torvalds 	{ 0,	0,	0 },   /* RDR 19 */
921da177e4SLinus Torvalds 	{ 152,	3,	24 },  /* RDR 20 */
931da177e4SLinus Torvalds 	{ 152,	3,	24 },  /* RDR 21 */
941da177e4SLinus Torvalds 	{ 233,	4,	48 },  /* RDR 22 */
951da177e4SLinus Torvalds 	{ 233,	4,	48 },  /* RDR 23 */
961da177e4SLinus Torvalds 	{ 71,	2,	0 },   /* RDR 24 */
971da177e4SLinus Torvalds 	{ 71,	2,	0 },   /* RDR 25 */
981da177e4SLinus Torvalds 	{ 11,	1,	0 },   /* RDR 26 */
991da177e4SLinus Torvalds 	{ 18,	1,	0 },   /* RDR 27 */
1001da177e4SLinus Torvalds 	{ 128,	2,	0 },   /* RDR 28 */
1011da177e4SLinus Torvalds 	{ 0,	0,	0 },   /* RDR 29 */
1021da177e4SLinus Torvalds 	{ 16,	1,	0 },   /* RDR 30 */
1031da177e4SLinus Torvalds 	{ 16,	1,	0 },   /* RDR 31 */
1041da177e4SLinus Torvalds };
1051da177e4SLinus Torvalds 
1061da177e4SLinus Torvalds /* RDR register descriptions for PCX-U */
107cb6fc18eSHelge Deller static const struct rdr_tbl_ent perf_rdr_tbl_U[] = {
1081da177e4SLinus Torvalds 	{ 19,	1,	8 },              /* RDR 0 */
1091da177e4SLinus Torvalds 	{ 32,	1,	16 },             /* RDR 1 */
1101da177e4SLinus Torvalds 	{ 20,	1,	0 },              /* RDR 2 */
1111da177e4SLinus Torvalds 	{ 0,	0,	0 },              /* RDR 3 */
1121da177e4SLinus Torvalds 	{ 344,	6,	0 },              /* RDR 4 */
1131da177e4SLinus Torvalds 	{ 176,	3,	0 },              /* RDR 5 */
1141da177e4SLinus Torvalds 	{ 336,	6,	0 },              /* RDR 6 */
1151da177e4SLinus Torvalds 	{ 0,	0,	0 },              /* RDR 7 */
1161da177e4SLinus Torvalds 	{ 0,	0,	0 },              /* RDR 8 */
1171da177e4SLinus Torvalds 	{ 0,	0,	0 },              /* RDR 9 */
1181da177e4SLinus Torvalds 	{ 28,	1,	0 },              /* RDR 10 */
1191da177e4SLinus Torvalds 	{ 33,	1,	0 },              /* RDR 11 */
1201da177e4SLinus Torvalds 	{ 0,	0,	0 },              /* RDR 12 */
1211da177e4SLinus Torvalds 	{ 230,	4,	0 },              /* RDR 13 */
1221da177e4SLinus Torvalds 	{ 32,	1,	0 },              /* RDR 14 */
1231da177e4SLinus Torvalds 	{ 128,	2,	0 },              /* RDR 15 */
1241da177e4SLinus Torvalds 	{ 1494,	24,	0 },              /* RDR 16 */
1251da177e4SLinus Torvalds 	{ 18,	1,	0 },              /* RDR 17 */
1261da177e4SLinus Torvalds 	{ 4,	1,	0 },              /* RDR 18 */
1271da177e4SLinus Torvalds 	{ 0,	0,	0 },              /* RDR 19 */
1281da177e4SLinus Torvalds 	{ 158,	3,	24 },             /* RDR 20 */
1291da177e4SLinus Torvalds 	{ 158,	3,	24 },             /* RDR 21 */
1301da177e4SLinus Torvalds 	{ 194,	4,	48 },             /* RDR 22 */
1311da177e4SLinus Torvalds 	{ 194,	4,	48 },             /* RDR 23 */
1321da177e4SLinus Torvalds 	{ 71,	2,	0 },              /* RDR 24 */
1331da177e4SLinus Torvalds 	{ 71,	2,	0 },              /* RDR 25 */
1341da177e4SLinus Torvalds 	{ 28,	1,	0 },              /* RDR 26 */
1351da177e4SLinus Torvalds 	{ 33,	1,	0 },              /* RDR 27 */
1361da177e4SLinus Torvalds 	{ 88,	2,	0 },              /* RDR 28 */
1371da177e4SLinus Torvalds 	{ 32,	1,	0 },              /* RDR 29 */
1381da177e4SLinus Torvalds 	{ 24,	1,	0 },              /* RDR 30 */
1391da177e4SLinus Torvalds 	{ 16,	1,	0 },              /* RDR 31 */
1401da177e4SLinus Torvalds };
1411da177e4SLinus Torvalds 
1421da177e4SLinus Torvalds /*
1431da177e4SLinus Torvalds  * A non-zero write_control in the above tables is a byte offset into
1441da177e4SLinus Torvalds  * this array.
1451da177e4SLinus Torvalds  */
146cb6fc18eSHelge Deller static const uint64_t perf_bitmasks[] = {
1471da177e4SLinus Torvalds 	0x0000000000000000ul,     /* first dbl word must be zero */
1481da177e4SLinus Torvalds 	0xfdffe00000000000ul,     /* RDR0 bitmask */
1491da177e4SLinus Torvalds 	0x003f000000000000ul,     /* RDR1 bitmask */
1501da177e4SLinus Torvalds 	0x00fffffffffffffful,     /* RDR20-RDR21 bitmask (152 bits) */
1511da177e4SLinus Torvalds 	0xfffffffffffffffful,
1521da177e4SLinus Torvalds 	0xfffffffc00000000ul,
1531da177e4SLinus Torvalds 	0xfffffffffffffffful,     /* RDR22-RDR23 bitmask (233 bits) */
1541da177e4SLinus Torvalds 	0xfffffffffffffffful,
1551da177e4SLinus Torvalds 	0xfffffffffffffffcul,
1561da177e4SLinus Torvalds 	0xff00000000000000ul
1571da177e4SLinus Torvalds };
1581da177e4SLinus Torvalds 
1591da177e4SLinus Torvalds /*
1601da177e4SLinus Torvalds  * Write control bitmasks for Pa-8700 processor given
1611da177e4SLinus Torvalds  * some things have changed slightly.
1621da177e4SLinus Torvalds  */
163cb6fc18eSHelge Deller static const uint64_t perf_bitmasks_piranha[] = {
1641da177e4SLinus Torvalds 	0x0000000000000000ul,     /* first dbl word must be zero */
1651da177e4SLinus Torvalds 	0xfdffe00000000000ul,     /* RDR0 bitmask */
1661da177e4SLinus Torvalds 	0x003f000000000000ul,     /* RDR1 bitmask */
1671da177e4SLinus Torvalds 	0x00fffffffffffffful,     /* RDR20-RDR21 bitmask (158 bits) */
1681da177e4SLinus Torvalds 	0xfffffffffffffffful,
1691da177e4SLinus Torvalds 	0xfffffffc00000000ul,
1701da177e4SLinus Torvalds 	0xfffffffffffffffful,     /* RDR22-RDR23 bitmask (210 bits) */
1711da177e4SLinus Torvalds 	0xfffffffffffffffful,
1721da177e4SLinus Torvalds 	0xfffffffffffffffful,
1731da177e4SLinus Torvalds 	0xfffc000000000000ul
1741da177e4SLinus Torvalds };
1751da177e4SLinus Torvalds 
176cb6fc18eSHelge Deller static const uint64_t *bitmask_array;   /* array of bitmasks to use */
1771da177e4SLinus Torvalds 
1781da177e4SLinus Torvalds /******************************************************************************
1791da177e4SLinus Torvalds  * Function Prototypes
1801da177e4SLinus Torvalds  *****************************************************************************/
1811da177e4SLinus Torvalds static int perf_config(uint32_t *image_ptr);
1821da177e4SLinus Torvalds static int perf_release(struct inode *inode, struct file *file);
1831da177e4SLinus Torvalds static int perf_open(struct inode *inode, struct file *file);
1841da177e4SLinus Torvalds static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
18574e3f6e6SArvind Yadav static ssize_t perf_write(struct file *file, const char __user *buf,
18674e3f6e6SArvind Yadav 	size_t count, loff_t *ppos);
187ad7dd338SChristoph Hellwig static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1881da177e4SLinus Torvalds static void perf_start_counters(void);
1891da177e4SLinus Torvalds static int perf_stop_counters(uint32_t *raddr);
190cb6fc18eSHelge Deller static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num);
1911da177e4SLinus Torvalds static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer);
1921da177e4SLinus Torvalds static int perf_rdr_clear(uint32_t rdr_num);
1931da177e4SLinus Torvalds static int perf_write_image(uint64_t *memaddr);
1941da177e4SLinus Torvalds static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer);
1951da177e4SLinus Torvalds 
1961da177e4SLinus Torvalds /* External Assembly Routines */
1971da177e4SLinus Torvalds extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width);
1981da177e4SLinus Torvalds extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width);
1991da177e4SLinus Torvalds extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer);
2001da177e4SLinus Torvalds extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer);
2011da177e4SLinus Torvalds extern void perf_intrigue_enable_perf_counters (void);
2021da177e4SLinus Torvalds extern void perf_intrigue_disable_perf_counters (void);
2031da177e4SLinus Torvalds 
2041da177e4SLinus Torvalds /******************************************************************************
2051da177e4SLinus Torvalds  * Function Definitions
2061da177e4SLinus Torvalds  *****************************************************************************/
2071da177e4SLinus Torvalds 
2081da177e4SLinus Torvalds 
2091da177e4SLinus Torvalds /*
2101da177e4SLinus Torvalds  * configure:
2111da177e4SLinus Torvalds  *
2121da177e4SLinus Torvalds  * Configure the cpu with a given data image.  First turn off the counters,
2131da177e4SLinus Torvalds  * then download the image, then turn the counters back on.
2141da177e4SLinus Torvalds  */
2151da177e4SLinus Torvalds static int perf_config(uint32_t *image_ptr)
2161da177e4SLinus Torvalds {
2171da177e4SLinus Torvalds 	long error;
2181da177e4SLinus Torvalds 	uint32_t raddr[4];
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds 	/* Stop the counters*/
2211da177e4SLinus Torvalds 	error = perf_stop_counters(raddr);
2221da177e4SLinus Torvalds 	if (error != 0) {
2231da177e4SLinus Torvalds 		printk("perf_config: perf_stop_counters = %ld\n", error);
2241da177e4SLinus Torvalds 		return -EINVAL;
2251da177e4SLinus Torvalds 	}
2261da177e4SLinus Torvalds 
2271da177e4SLinus Torvalds printk("Preparing to write image\n");
2281da177e4SLinus Torvalds 	/* Write the image to the chip */
2291da177e4SLinus Torvalds 	error = perf_write_image((uint64_t *)image_ptr);
2301da177e4SLinus Torvalds 	if (error != 0) {
2311da177e4SLinus Torvalds 		printk("perf_config: DOWNLOAD = %ld\n", error);
2321da177e4SLinus Torvalds 		return -EINVAL;
2331da177e4SLinus Torvalds 	}
2341da177e4SLinus Torvalds 
2351da177e4SLinus Torvalds printk("Preparing to start counters\n");
2361da177e4SLinus Torvalds 
2371da177e4SLinus Torvalds 	/* Start the counters */
2381da177e4SLinus Torvalds 	perf_start_counters();
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 	return sizeof(uint32_t);
2411da177e4SLinus Torvalds }
2421da177e4SLinus Torvalds 
2431da177e4SLinus Torvalds /*
2441da177e4SLinus Torvalds  * Open the device and initialize all of its memory.  The device is only
2451da177e4SLinus Torvalds  * opened once, but can be "queried" by multiple processes that know its
2461da177e4SLinus Torvalds  * file descriptor.
2471da177e4SLinus Torvalds  */
2481da177e4SLinus Torvalds static int perf_open(struct inode *inode, struct file *file)
2491da177e4SLinus Torvalds {
2501da177e4SLinus Torvalds 	spin_lock(&perf_lock);
2511da177e4SLinus Torvalds 	if (perf_enabled) {
2521da177e4SLinus Torvalds 		spin_unlock(&perf_lock);
2531da177e4SLinus Torvalds 		return -EBUSY;
2541da177e4SLinus Torvalds 	}
2551da177e4SLinus Torvalds 	perf_enabled = 1;
2561da177e4SLinus Torvalds  	spin_unlock(&perf_lock);
2571da177e4SLinus Torvalds 
2581da177e4SLinus Torvalds 	return 0;
2591da177e4SLinus Torvalds }
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds /*
2621da177e4SLinus Torvalds  * Close the device.
2631da177e4SLinus Torvalds  */
2641da177e4SLinus Torvalds static int perf_release(struct inode *inode, struct file *file)
2651da177e4SLinus Torvalds {
2661da177e4SLinus Torvalds 	spin_lock(&perf_lock);
2671da177e4SLinus Torvalds 	perf_enabled = 0;
2681da177e4SLinus Torvalds 	spin_unlock(&perf_lock);
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds 	return 0;
2711da177e4SLinus Torvalds }
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds /*
2741da177e4SLinus Torvalds  * Read does nothing for this driver
2751da177e4SLinus Torvalds  */
2761da177e4SLinus Torvalds static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos)
2771da177e4SLinus Torvalds {
2781da177e4SLinus Torvalds 	return 0;
2791da177e4SLinus Torvalds }
2801da177e4SLinus Torvalds 
2811da177e4SLinus Torvalds /*
2821da177e4SLinus Torvalds  * write:
2831da177e4SLinus Torvalds  *
2841da177e4SLinus Torvalds  * This routine downloads the image to the chip.  It must be
2851da177e4SLinus Torvalds  * called on the processor that the download should happen
2861da177e4SLinus Torvalds  * on.
2871da177e4SLinus Torvalds  */
28874e3f6e6SArvind Yadav static ssize_t perf_write(struct file *file, const char __user *buf,
28974e3f6e6SArvind Yadav 	size_t count, loff_t *ppos)
2901da177e4SLinus Torvalds {
2919e142b72SHelge Deller 	size_t image_size __maybe_unused;
2921da177e4SLinus Torvalds 	uint32_t image_type;
2931da177e4SLinus Torvalds 	uint32_t interface_type;
2941da177e4SLinus Torvalds 	uint32_t test;
2951da177e4SLinus Torvalds 
2961da177e4SLinus Torvalds 	if (perf_processor_interface == ONYX_INTF)
2971da177e4SLinus Torvalds 		image_size = PCXU_IMAGE_SIZE;
2981da177e4SLinus Torvalds 	else if (perf_processor_interface == CUDA_INTF)
2991da177e4SLinus Torvalds 		image_size = PCXW_IMAGE_SIZE;
3001da177e4SLinus Torvalds 	else
3011da177e4SLinus Torvalds 		return -EFAULT;
3021da177e4SLinus Torvalds 
303cf91baf3SAlexey Budankov 	if (!perfmon_capable())
3041da177e4SLinus Torvalds 		return -EACCES;
3051da177e4SLinus Torvalds 
3061da177e4SLinus Torvalds 	if (count != sizeof(uint32_t))
3071da177e4SLinus Torvalds 		return -EIO;
3081da177e4SLinus Torvalds 
30982cbd568SDan Carpenter 	if (copy_from_user(&image_type, buf, sizeof(uint32_t)))
31082cbd568SDan Carpenter 		return -EFAULT;
3111da177e4SLinus Torvalds 
3121da177e4SLinus Torvalds 	/* Get the interface type and test type */
3131da177e4SLinus Torvalds    	interface_type = (image_type >> 16) & 0xffff;
3141da177e4SLinus Torvalds 	test           = (image_type & 0xffff);
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 	/* Make sure everything makes sense */
3171da177e4SLinus Torvalds 
3181da177e4SLinus Torvalds 	/* First check the machine type is correct for
3191da177e4SLinus Torvalds 	   the requested image */
3201da177e4SLinus Torvalds 	if (((perf_processor_interface == CUDA_INTF) &&
3211da177e4SLinus Torvalds 			(interface_type != CUDA_INTF)) ||
3221da177e4SLinus Torvalds 		((perf_processor_interface == ONYX_INTF) &&
3231da177e4SLinus Torvalds 			(interface_type != ONYX_INTF)))
3241da177e4SLinus Torvalds 		return -EINVAL;
3251da177e4SLinus Torvalds 
3261da177e4SLinus Torvalds 	/* Next check to make sure the requested image
3271da177e4SLinus Torvalds 	   is valid */
3281da177e4SLinus Torvalds 	if (((interface_type == CUDA_INTF) &&
3291da177e4SLinus Torvalds 		       (test >= MAX_CUDA_IMAGES)) ||
3301da177e4SLinus Torvalds 	    ((interface_type == ONYX_INTF) &&
3311da177e4SLinus Torvalds 		       (test >= MAX_ONYX_IMAGES)))
3321da177e4SLinus Torvalds 		return -EINVAL;
3331da177e4SLinus Torvalds 
3341da177e4SLinus Torvalds 	/* Copy the image into the processor */
3351da177e4SLinus Torvalds 	if (interface_type == CUDA_INTF)
3361da177e4SLinus Torvalds 		return perf_config(cuda_images[test]);
3371da177e4SLinus Torvalds 	else
3381da177e4SLinus Torvalds 		return perf_config(onyx_images[test]);
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds 	return count;
3411da177e4SLinus Torvalds }
3421da177e4SLinus Torvalds 
3431da177e4SLinus Torvalds /*
3441da177e4SLinus Torvalds  * Patch the images that need to know the IVA addresses.
3451da177e4SLinus Torvalds  */
3461da177e4SLinus Torvalds static void perf_patch_images(void)
3471da177e4SLinus Torvalds {
3481da177e4SLinus Torvalds #if 0 /* FIXME!! */
3491da177e4SLinus Torvalds /*
3501da177e4SLinus Torvalds  * NOTE:  this routine is VERY specific to the current TLB image.
3511da177e4SLinus Torvalds  * If the image is changed, this routine might also need to be changed.
3521da177e4SLinus Torvalds  */
3531da177e4SLinus Torvalds 	extern void $i_itlb_miss_2_0();
3541da177e4SLinus Torvalds 	extern void $i_dtlb_miss_2_0();
3551da177e4SLinus Torvalds 	extern void PA2_0_iva();
3561da177e4SLinus Torvalds 
3571da177e4SLinus Torvalds 	/*
3581da177e4SLinus Torvalds 	 * We can only use the lower 32-bits, the upper 32-bits should be 0
3591da177e4SLinus Torvalds 	 * anyway given this is in the kernel
3601da177e4SLinus Torvalds 	 */
3611da177e4SLinus Torvalds 	uint32_t itlb_addr  = (uint32_t)&($i_itlb_miss_2_0);
3621da177e4SLinus Torvalds 	uint32_t dtlb_addr  = (uint32_t)&($i_dtlb_miss_2_0);
3631da177e4SLinus Torvalds 	uint32_t IVAaddress = (uint32_t)&PA2_0_iva;
3641da177e4SLinus Torvalds 
3651da177e4SLinus Torvalds 	if (perf_processor_interface == ONYX_INTF) {
3661da177e4SLinus Torvalds 		/* clear last 2 bytes */
3671da177e4SLinus Torvalds 		onyx_images[TLBMISS][15] &= 0xffffff00;
3681da177e4SLinus Torvalds 		/* set 2 bytes */
3691da177e4SLinus Torvalds 		onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
3701da177e4SLinus Torvalds 		onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
3711da177e4SLinus Torvalds 		onyx_images[TLBMISS][17] = itlb_addr;
3721da177e4SLinus Torvalds 
3731da177e4SLinus Torvalds 		/* clear last 2 bytes */
3741da177e4SLinus Torvalds 		onyx_images[TLBHANDMISS][15] &= 0xffffff00;
3751da177e4SLinus Torvalds 		/* set 2 bytes */
3761da177e4SLinus Torvalds 		onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
3771da177e4SLinus Torvalds 		onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
3781da177e4SLinus Torvalds 		onyx_images[TLBHANDMISS][17] = itlb_addr;
3791da177e4SLinus Torvalds 
3801da177e4SLinus Torvalds 		/* clear last 2 bytes */
3811da177e4SLinus Torvalds 		onyx_images[BIG_CPI][15] &= 0xffffff00;
3821da177e4SLinus Torvalds 		/* set 2 bytes */
3831da177e4SLinus Torvalds 		onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
3841da177e4SLinus Torvalds 		onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
3851da177e4SLinus Torvalds 		onyx_images[BIG_CPI][17] = itlb_addr;
3861da177e4SLinus Torvalds 
3871da177e4SLinus Torvalds 	    onyx_images[PANIC][15] &= 0xffffff00;  /* clear last 2 bytes */
3881da177e4SLinus Torvalds 	 	onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */
3891da177e4SLinus Torvalds 		onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00;
3901da177e4SLinus Torvalds 
3911da177e4SLinus Torvalds 
3921da177e4SLinus Torvalds 	} else if (perf_processor_interface == CUDA_INTF) {
3931da177e4SLinus Torvalds 		/* Cuda interface */
3941da177e4SLinus Torvalds 		cuda_images[TLBMISS][16] =
3951da177e4SLinus Torvalds 			(cuda_images[TLBMISS][16]&0xffff0000) |
3961da177e4SLinus Torvalds 			((dtlb_addr >> 8)&0x0000ffff);
3971da177e4SLinus Torvalds 		cuda_images[TLBMISS][17] =
3981da177e4SLinus Torvalds 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
3991da177e4SLinus Torvalds 		cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
4001da177e4SLinus Torvalds 
4011da177e4SLinus Torvalds 		cuda_images[TLBHANDMISS][16] =
4021da177e4SLinus Torvalds 			(cuda_images[TLBHANDMISS][16]&0xffff0000) |
4031da177e4SLinus Torvalds 			((dtlb_addr >> 8)&0x0000ffff);
4041da177e4SLinus Torvalds 		cuda_images[TLBHANDMISS][17] =
4051da177e4SLinus Torvalds 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
4061da177e4SLinus Torvalds 		cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds 		cuda_images[BIG_CPI][16] =
4091da177e4SLinus Torvalds 			(cuda_images[BIG_CPI][16]&0xffff0000) |
4101da177e4SLinus Torvalds 			((dtlb_addr >> 8)&0x0000ffff);
4111da177e4SLinus Torvalds 		cuda_images[BIG_CPI][17] =
4121da177e4SLinus Torvalds 			((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
4131da177e4SLinus Torvalds 		cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
4141da177e4SLinus Torvalds 	} else {
4151da177e4SLinus Torvalds 		/* Unknown type */
4161da177e4SLinus Torvalds 	}
4171da177e4SLinus Torvalds #endif
4181da177e4SLinus Torvalds }
4191da177e4SLinus Torvalds 
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds /*
4221da177e4SLinus Torvalds  * ioctl routine
4231da177e4SLinus Torvalds  * All routines effect the processor that they are executed on.  Thus you
4241da177e4SLinus Torvalds  * must be running on the processor that you wish to change.
4251da177e4SLinus Torvalds  */
4261da177e4SLinus Torvalds 
427ad7dd338SChristoph Hellwig static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
4281da177e4SLinus Torvalds {
4291da177e4SLinus Torvalds 	long error_start;
4301da177e4SLinus Torvalds 	uint32_t raddr[4];
431ad7dd338SChristoph Hellwig 	int error = 0;
4321da177e4SLinus Torvalds 
4331da177e4SLinus Torvalds 	switch (cmd) {
4341da177e4SLinus Torvalds 
4351da177e4SLinus Torvalds 	    case PA_PERF_ON:
4361da177e4SLinus Torvalds 			/* Start the counters */
4371da177e4SLinus Torvalds 			perf_start_counters();
438ad7dd338SChristoph Hellwig 			break;
4391da177e4SLinus Torvalds 
4401da177e4SLinus Torvalds 	    case PA_PERF_OFF:
4411da177e4SLinus Torvalds 			error_start = perf_stop_counters(raddr);
4421da177e4SLinus Torvalds 			if (error_start != 0) {
4431da177e4SLinus Torvalds 				printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start);
444ad7dd338SChristoph Hellwig 				error = -EFAULT;
445ad7dd338SChristoph Hellwig 				break;
4461da177e4SLinus Torvalds 			}
4471da177e4SLinus Torvalds 
4481da177e4SLinus Torvalds 			/* copy out the Counters */
4491da177e4SLinus Torvalds 			if (copy_to_user((void __user *)arg, raddr,
4501da177e4SLinus Torvalds 					sizeof (raddr)) != 0) {
451ad7dd338SChristoph Hellwig 				error =  -EFAULT;
452ad7dd338SChristoph Hellwig 				break;
4531da177e4SLinus Torvalds 			}
454ad7dd338SChristoph Hellwig 			break;
4551da177e4SLinus Torvalds 
4561da177e4SLinus Torvalds 	    case PA_PERF_VERSION:
4571da177e4SLinus Torvalds   	  		/* Return the version # */
458ad7dd338SChristoph Hellwig 			error = put_user(PERF_VERSION, (int *)arg);
459ad7dd338SChristoph Hellwig 			break;
4601da177e4SLinus Torvalds 
4611da177e4SLinus Torvalds 	    default:
462ad7dd338SChristoph Hellwig   	 		error = -ENOTTY;
4631da177e4SLinus Torvalds 	}
464ad7dd338SChristoph Hellwig 
465ad7dd338SChristoph Hellwig 	return error;
4661da177e4SLinus Torvalds }
4671da177e4SLinus Torvalds 
4685dfe4c96SArjan van de Ven static const struct file_operations perf_fops = {
4691da177e4SLinus Torvalds 	.read = perf_read,
4701da177e4SLinus Torvalds 	.write = perf_write,
471ad7dd338SChristoph Hellwig 	.unlocked_ioctl = perf_ioctl,
472ad7dd338SChristoph Hellwig 	.compat_ioctl = perf_ioctl,
4731da177e4SLinus Torvalds 	.open = perf_open,
4741da177e4SLinus Torvalds 	.release = perf_release
4751da177e4SLinus Torvalds };
4761da177e4SLinus Torvalds 
4771da177e4SLinus Torvalds static struct miscdevice perf_dev = {
4781da177e4SLinus Torvalds 	MISC_DYNAMIC_MINOR,
4791da177e4SLinus Torvalds 	PA_PERF_DEV,
4801da177e4SLinus Torvalds 	&perf_fops
4811da177e4SLinus Torvalds };
4821da177e4SLinus Torvalds 
4831da177e4SLinus Torvalds /*
4841da177e4SLinus Torvalds  * Initialize the module
4851da177e4SLinus Torvalds  */
4861da177e4SLinus Torvalds static int __init perf_init(void)
4871da177e4SLinus Torvalds {
4881da177e4SLinus Torvalds 	int ret;
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 	/* Determine correct processor interface to use */
4911da177e4SLinus Torvalds 	bitmask_array = perf_bitmasks;
4921da177e4SLinus Torvalds 
4931da177e4SLinus Torvalds 	if (boot_cpu_data.cpu_type == pcxu ||
4941da177e4SLinus Torvalds 	    boot_cpu_data.cpu_type == pcxu_) {
4951da177e4SLinus Torvalds 		perf_processor_interface = ONYX_INTF;
4961da177e4SLinus Torvalds 	} else if (boot_cpu_data.cpu_type == pcxw ||
4971da177e4SLinus Torvalds 		 boot_cpu_data.cpu_type == pcxw_ ||
4981da177e4SLinus Torvalds 		 boot_cpu_data.cpu_type == pcxw2 ||
4992cbd42dbSKyle McMartin 		 boot_cpu_data.cpu_type == mako ||
5002cbd42dbSKyle McMartin 		 boot_cpu_data.cpu_type == mako2) {
5011da177e4SLinus Torvalds 		perf_processor_interface = CUDA_INTF;
5021da177e4SLinus Torvalds 		if (boot_cpu_data.cpu_type == pcxw2 ||
5032cbd42dbSKyle McMartin 		    boot_cpu_data.cpu_type == mako ||
5042cbd42dbSKyle McMartin 		    boot_cpu_data.cpu_type == mako2)
5051da177e4SLinus Torvalds 			bitmask_array = perf_bitmasks_piranha;
5061da177e4SLinus Torvalds 	} else {
5071da177e4SLinus Torvalds 		perf_processor_interface = UNKNOWN_INTF;
5081da177e4SLinus Torvalds 		printk("Performance monitoring counters not supported on this processor\n");
5091da177e4SLinus Torvalds 		return -ENODEV;
5101da177e4SLinus Torvalds 	}
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 	ret = misc_register(&perf_dev);
5131da177e4SLinus Torvalds 	if (ret) {
5141da177e4SLinus Torvalds 		printk(KERN_ERR "Performance monitoring counters: "
5151da177e4SLinus Torvalds 			"cannot register misc device.\n");
5161da177e4SLinus Torvalds 		return ret;
5171da177e4SLinus Torvalds 	}
5181da177e4SLinus Torvalds 
5191da177e4SLinus Torvalds 	/* Patch the images to match the system */
5201da177e4SLinus Torvalds     	perf_patch_images();
5211da177e4SLinus Torvalds 
5221da177e4SLinus Torvalds 	/* TODO: this only lets us access the first cpu.. what to do for SMP? */
523ef017bebSHelge Deller 	cpu_device = per_cpu(cpu_data, 0).dev;
5241da177e4SLinus Torvalds 	printk("Performance monitoring counters enabled for %s\n",
525ef017bebSHelge Deller 		per_cpu(cpu_data, 0).dev->name);
5261da177e4SLinus Torvalds 
5271da177e4SLinus Torvalds 	return 0;
5281da177e4SLinus Torvalds }
52915becabdSPaul Gortmaker device_initcall(perf_init);
5301da177e4SLinus Torvalds 
5311da177e4SLinus Torvalds /*
5321da177e4SLinus Torvalds  * perf_start_counters(void)
5331da177e4SLinus Torvalds  *
5341da177e4SLinus Torvalds  * Start the counters.
5351da177e4SLinus Torvalds  */
5361da177e4SLinus Torvalds static void perf_start_counters(void)
5371da177e4SLinus Torvalds {
5381da177e4SLinus Torvalds 	/* Enable performance monitor counters */
5391da177e4SLinus Torvalds 	perf_intrigue_enable_perf_counters();
5401da177e4SLinus Torvalds }
5411da177e4SLinus Torvalds 
5421da177e4SLinus Torvalds /*
5431da177e4SLinus Torvalds  * perf_stop_counters
5441da177e4SLinus Torvalds  *
5451da177e4SLinus Torvalds  * Stop the performance counters and save counts
5461da177e4SLinus Torvalds  * in a per_processor array.
5471da177e4SLinus Torvalds  */
5481da177e4SLinus Torvalds static int perf_stop_counters(uint32_t *raddr)
5491da177e4SLinus Torvalds {
5501da177e4SLinus Torvalds 	uint64_t userbuf[MAX_RDR_WORDS];
5511da177e4SLinus Torvalds 
5521da177e4SLinus Torvalds 	/* Disable performance counters */
5531da177e4SLinus Torvalds 	perf_intrigue_disable_perf_counters();
5541da177e4SLinus Torvalds 
5551da177e4SLinus Torvalds 	if (perf_processor_interface == ONYX_INTF) {
5561da177e4SLinus Torvalds 		uint64_t tmp64;
5571da177e4SLinus Torvalds 		/*
5581da177e4SLinus Torvalds 		 * Read the counters
5591da177e4SLinus Torvalds 		 */
5601da177e4SLinus Torvalds 		if (!perf_rdr_read_ubuf(16, userbuf))
5611da177e4SLinus Torvalds 			return -13;
5621da177e4SLinus Torvalds 
5637022672eSSimon Arlott 		/* Counter0 is bits 1398 to 1429 */
5641da177e4SLinus Torvalds 		tmp64 =  (userbuf[21] << 22) & 0x00000000ffc00000;
5651da177e4SLinus Torvalds 		tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff;
5661da177e4SLinus Torvalds 		/* OR sticky0 (bit 1430) to counter0 bit 32 */
5671da177e4SLinus Torvalds 		tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000;
5681da177e4SLinus Torvalds 		raddr[0] = (uint32_t)tmp64;
5691da177e4SLinus Torvalds 
5707022672eSSimon Arlott 		/* Counter1 is bits 1431 to 1462 */
5711da177e4SLinus Torvalds 		tmp64 =  (userbuf[22] >> 9) & 0x00000000ffffffff;
5721da177e4SLinus Torvalds 		/* OR sticky1 (bit 1463) to counter1 bit 32 */
5731da177e4SLinus Torvalds 		tmp64 |= (userbuf[22] << 23) & 0x0000000080000000;
5741da177e4SLinus Torvalds 		raddr[1] = (uint32_t)tmp64;
5751da177e4SLinus Torvalds 
5767022672eSSimon Arlott 		/* Counter2 is bits 1464 to 1495 */
5771da177e4SLinus Torvalds 		tmp64 =  (userbuf[22] << 24) & 0x00000000ff000000;
5781da177e4SLinus Torvalds 		tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff;
5791da177e4SLinus Torvalds 		/* OR sticky2 (bit 1496) to counter2 bit 32 */
5801da177e4SLinus Torvalds 		tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
5811da177e4SLinus Torvalds 		raddr[2] = (uint32_t)tmp64;
5821da177e4SLinus Torvalds 
5837022672eSSimon Arlott 		/* Counter3 is bits 1497 to 1528 */
5841da177e4SLinus Torvalds 		tmp64 =  (userbuf[23] >> 7) & 0x00000000ffffffff;
5851da177e4SLinus Torvalds 		/* OR sticky3 (bit 1529) to counter3 bit 32 */
5861da177e4SLinus Torvalds 		tmp64 |= (userbuf[23] << 25) & 0x0000000080000000;
5871da177e4SLinus Torvalds 		raddr[3] = (uint32_t)tmp64;
5881da177e4SLinus Torvalds 
5891da177e4SLinus Torvalds 		/*
5901da177e4SLinus Torvalds 		 * Zero out the counters
5911da177e4SLinus Torvalds 		 */
5921da177e4SLinus Torvalds 
5931da177e4SLinus Torvalds 		/*
5941da177e4SLinus Torvalds 		 * The counters and sticky-bits comprise the last 132 bits
5951da177e4SLinus Torvalds 		 * (1398 - 1529) of RDR16 on a U chip.  We'll zero these
5961da177e4SLinus Torvalds 		 * out the easy way: zero out last 10 bits of dword 21,
5971da177e4SLinus Torvalds 		 * all of dword 22 and 58 bits (plus 6 don't care bits) of
5981da177e4SLinus Torvalds 		 * dword 23.
5991da177e4SLinus Torvalds 		 */
6001da177e4SLinus Torvalds 		userbuf[21] &= 0xfffffffffffffc00ul;	/* 0 to last 10 bits */
6011da177e4SLinus Torvalds 		userbuf[22] = 0;
6021da177e4SLinus Torvalds 		userbuf[23] = 0;
6031da177e4SLinus Torvalds 
6041da177e4SLinus Torvalds 		/*
6057022672eSSimon Arlott 		 * Write back the zeroed bytes + the image given
6061da177e4SLinus Torvalds 		 * the read was destructive.
6071da177e4SLinus Torvalds 		 */
6081da177e4SLinus Torvalds 		perf_rdr_write(16, userbuf);
6091da177e4SLinus Torvalds 	} else {
6101da177e4SLinus Torvalds 
6111da177e4SLinus Torvalds 		/*
6121da177e4SLinus Torvalds 		 * Read RDR-15 which contains the counters and sticky bits
6131da177e4SLinus Torvalds 		 */
6141da177e4SLinus Torvalds 		if (!perf_rdr_read_ubuf(15, userbuf)) {
6151da177e4SLinus Torvalds 			return -13;
6161da177e4SLinus Torvalds 		}
6171da177e4SLinus Torvalds 
6181da177e4SLinus Torvalds 		/*
6191da177e4SLinus Torvalds 		 * Clear out the counters
6201da177e4SLinus Torvalds 		 */
6211da177e4SLinus Torvalds 		perf_rdr_clear(15);
6221da177e4SLinus Torvalds 
6231da177e4SLinus Torvalds 		/*
6241da177e4SLinus Torvalds 		 * Copy the counters
6251da177e4SLinus Torvalds 		 */
6261da177e4SLinus Torvalds 		raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL);
6271da177e4SLinus Torvalds 		raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL);
6281da177e4SLinus Torvalds 		raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
6291da177e4SLinus Torvalds 		raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
6301da177e4SLinus Torvalds 	}
6311da177e4SLinus Torvalds 
6321da177e4SLinus Torvalds 	return 0;
6331da177e4SLinus Torvalds }
6341da177e4SLinus Torvalds 
6351da177e4SLinus Torvalds /*
6361da177e4SLinus Torvalds  * perf_rdr_get_entry
6371da177e4SLinus Torvalds  *
6381da177e4SLinus Torvalds  * Retrieve a pointer to the description of what this
6391da177e4SLinus Torvalds  * RDR contains.
6401da177e4SLinus Torvalds  */
641cb6fc18eSHelge Deller static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num)
6421da177e4SLinus Torvalds {
6431da177e4SLinus Torvalds 	if (perf_processor_interface == ONYX_INTF) {
6441da177e4SLinus Torvalds 		return &perf_rdr_tbl_U[rdr_num];
6451da177e4SLinus Torvalds 	} else {
6461da177e4SLinus Torvalds 		return &perf_rdr_tbl_W[rdr_num];
6471da177e4SLinus Torvalds 	}
6481da177e4SLinus Torvalds }
6491da177e4SLinus Torvalds 
6501da177e4SLinus Torvalds /*
6511da177e4SLinus Torvalds  * perf_rdr_read_ubuf
6521da177e4SLinus Torvalds  *
6531da177e4SLinus Torvalds  * Read the RDR value into the buffer specified.
6541da177e4SLinus Torvalds  */
6551da177e4SLinus Torvalds static int perf_rdr_read_ubuf(uint32_t	rdr_num, uint64_t *buffer)
6561da177e4SLinus Torvalds {
6571da177e4SLinus Torvalds 	uint64_t	data, data_mask = 0;
6581da177e4SLinus Torvalds 	uint32_t	width, xbits, i;
659cb6fc18eSHelge Deller 	const struct rdr_tbl_ent *tentry;
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds 	tentry = perf_rdr_get_entry(rdr_num);
6621da177e4SLinus Torvalds 	if ((width = tentry->width) == 0)
6631da177e4SLinus Torvalds 		return 0;
6641da177e4SLinus Torvalds 
6651da177e4SLinus Torvalds 	/* Clear out buffer */
6661da177e4SLinus Torvalds 	i = tentry->num_words;
6671da177e4SLinus Torvalds 	while (i--) {
6681da177e4SLinus Torvalds 		buffer[i] = 0;
6691da177e4SLinus Torvalds 	}
6701da177e4SLinus Torvalds 
6711da177e4SLinus Torvalds 	/* Check for bits an even number of 64 */
6721da177e4SLinus Torvalds 	if ((xbits = width & 0x03f) != 0) {
6731da177e4SLinus Torvalds 		data_mask = 1;
6741da177e4SLinus Torvalds 		data_mask <<= (64 - xbits);
6751da177e4SLinus Torvalds 		data_mask--;
6761da177e4SLinus Torvalds 	}
6771da177e4SLinus Torvalds 
6781da177e4SLinus Torvalds 	/* Grab all of the data */
6791da177e4SLinus Torvalds 	i = tentry->num_words;
6801da177e4SLinus Torvalds 	while (i--) {
6811da177e4SLinus Torvalds 
6821da177e4SLinus Torvalds 		if (perf_processor_interface == ONYX_INTF) {
6831da177e4SLinus Torvalds 			data = perf_rdr_shift_in_U(rdr_num, width);
6841da177e4SLinus Torvalds 		} else {
6851da177e4SLinus Torvalds 			data = perf_rdr_shift_in_W(rdr_num, width);
6861da177e4SLinus Torvalds 		}
6871da177e4SLinus Torvalds 		if (xbits) {
6881da177e4SLinus Torvalds 			buffer[i] |= (data << (64 - xbits));
6891da177e4SLinus Torvalds 			if (i) {
6901da177e4SLinus Torvalds 				buffer[i-1] |= ((data >> xbits) & data_mask);
6911da177e4SLinus Torvalds 			}
6921da177e4SLinus Torvalds 		} else {
6931da177e4SLinus Torvalds 			buffer[i] = data;
6941da177e4SLinus Torvalds 		}
6951da177e4SLinus Torvalds 	}
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds 	return 1;
6981da177e4SLinus Torvalds }
6991da177e4SLinus Torvalds 
7001da177e4SLinus Torvalds /*
7011da177e4SLinus Torvalds  * perf_rdr_clear
7021da177e4SLinus Torvalds  *
7031da177e4SLinus Torvalds  * Zero out the given RDR register
7041da177e4SLinus Torvalds  */
7051da177e4SLinus Torvalds static int perf_rdr_clear(uint32_t	rdr_num)
7061da177e4SLinus Torvalds {
707cb6fc18eSHelge Deller 	const struct rdr_tbl_ent *tentry;
7081da177e4SLinus Torvalds 	int32_t		i;
7091da177e4SLinus Torvalds 
7101da177e4SLinus Torvalds 	tentry = perf_rdr_get_entry(rdr_num);
7111da177e4SLinus Torvalds 
7121da177e4SLinus Torvalds 	if (tentry->width == 0) {
7131da177e4SLinus Torvalds 		return -1;
7141da177e4SLinus Torvalds 	}
7151da177e4SLinus Torvalds 
7161da177e4SLinus Torvalds 	i = tentry->num_words;
7171da177e4SLinus Torvalds 	while (i--) {
7181da177e4SLinus Torvalds 		if (perf_processor_interface == ONYX_INTF) {
7191da177e4SLinus Torvalds 			perf_rdr_shift_out_U(rdr_num, 0UL);
7201da177e4SLinus Torvalds 		} else {
7211da177e4SLinus Torvalds 			perf_rdr_shift_out_W(rdr_num, 0UL);
7221da177e4SLinus Torvalds 		}
7231da177e4SLinus Torvalds 	}
7241da177e4SLinus Torvalds 
7251da177e4SLinus Torvalds 	return 0;
7261da177e4SLinus Torvalds }
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds 
7291da177e4SLinus Torvalds /*
7301da177e4SLinus Torvalds  * perf_write_image
7311da177e4SLinus Torvalds  *
7321da177e4SLinus Torvalds  * Write the given image out to the processor
7331da177e4SLinus Torvalds  */
7341da177e4SLinus Torvalds static int perf_write_image(uint64_t *memaddr)
7351da177e4SLinus Torvalds {
7361da177e4SLinus Torvalds 	uint64_t buffer[MAX_RDR_WORDS];
7371da177e4SLinus Torvalds 	uint64_t *bptr;
7381da177e4SLinus Torvalds 	uint32_t dwords;
739cb6fc18eSHelge Deller 	const uint32_t *intrigue_rdr;
740cb6fc18eSHelge Deller 	const uint64_t *intrigue_bitmask;
741cb6fc18eSHelge Deller 	uint64_t tmp64;
74253f01bbaSMatthew Wilcox 	void __iomem *runway;
743cb6fc18eSHelge Deller 	const struct rdr_tbl_ent *tentry;
7441da177e4SLinus Torvalds 	int i;
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds 	/* Clear out counters */
7471da177e4SLinus Torvalds 	if (perf_processor_interface == ONYX_INTF) {
7481da177e4SLinus Torvalds 
7491da177e4SLinus Torvalds 		perf_rdr_clear(16);
7501da177e4SLinus Torvalds 
7511da177e4SLinus Torvalds 		/* Toggle performance monitor */
7521da177e4SLinus Torvalds 		perf_intrigue_enable_perf_counters();
7531da177e4SLinus Torvalds 		perf_intrigue_disable_perf_counters();
7541da177e4SLinus Torvalds 
7551da177e4SLinus Torvalds 		intrigue_rdr = perf_rdrs_U;
7561da177e4SLinus Torvalds 	} else {
7571da177e4SLinus Torvalds 		perf_rdr_clear(15);
7581da177e4SLinus Torvalds 		intrigue_rdr = perf_rdrs_W;
7591da177e4SLinus Torvalds 	}
7601da177e4SLinus Torvalds 
7611da177e4SLinus Torvalds 	/* Write all RDRs */
7621da177e4SLinus Torvalds 	while (*intrigue_rdr != -1) {
7631da177e4SLinus Torvalds 		tentry = perf_rdr_get_entry(*intrigue_rdr);
7641da177e4SLinus Torvalds 		perf_rdr_read_ubuf(*intrigue_rdr, buffer);
7651da177e4SLinus Torvalds 		bptr   = &buffer[0];
7661da177e4SLinus Torvalds 		dwords = tentry->num_words;
7671da177e4SLinus Torvalds 		if (tentry->write_control) {
7681da177e4SLinus Torvalds 			intrigue_bitmask = &bitmask_array[tentry->write_control >> 3];
7691da177e4SLinus Torvalds 			while (dwords--) {
7701da177e4SLinus Torvalds 				tmp64 = *intrigue_bitmask & *memaddr++;
7711da177e4SLinus Torvalds 				tmp64 |= (~(*intrigue_bitmask++)) & *bptr;
7721da177e4SLinus Torvalds 				*bptr++ = tmp64;
7731da177e4SLinus Torvalds 			}
7741da177e4SLinus Torvalds 		} else {
7751da177e4SLinus Torvalds 			while (dwords--) {
7761da177e4SLinus Torvalds 				*bptr++ = *memaddr++;
7771da177e4SLinus Torvalds 			}
7781da177e4SLinus Torvalds 		}
7791da177e4SLinus Torvalds 
7801da177e4SLinus Torvalds 		perf_rdr_write(*intrigue_rdr, buffer);
7811da177e4SLinus Torvalds 		intrigue_rdr++;
7821da177e4SLinus Torvalds 	}
7831da177e4SLinus Torvalds 
7841da177e4SLinus Torvalds 	/*
7851da177e4SLinus Torvalds 	 * Now copy out the Runway stuff which is not in RDRs
7861da177e4SLinus Torvalds 	 */
7871da177e4SLinus Torvalds 
7881da177e4SLinus Torvalds 	if (cpu_device == NULL)
7891da177e4SLinus Torvalds 	{
7901da177e4SLinus Torvalds 		printk(KERN_ERR "write_image: cpu_device not yet initialized!\n");
7911da177e4SLinus Torvalds 		return -1;
7921da177e4SLinus Torvalds 	}
7931da177e4SLinus Torvalds 
7944bdc0d67SChristoph Hellwig 	runway = ioremap(cpu_device->hpa.start, 4096);
79574e3f6e6SArvind Yadav 	if (!runway) {
79674e3f6e6SArvind Yadav 		pr_err("perf_write_image: ioremap failed!\n");
79774e3f6e6SArvind Yadav 		return -ENOMEM;
79874e3f6e6SArvind Yadav 	}
7991da177e4SLinus Torvalds 
8001da177e4SLinus Torvalds 	/* Merge intrigue bits into Runway STATUS 0 */
80153f01bbaSMatthew Wilcox 	tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
80253f01bbaSMatthew Wilcox 	__raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
80353f01bbaSMatthew Wilcox 		     runway + RUNWAY_STATUS);
8041da177e4SLinus Torvalds 
8051da177e4SLinus Torvalds 	/* Write RUNWAY DEBUG registers */
8061da177e4SLinus Torvalds 	for (i = 0; i < 8; i++) {
80753f01bbaSMatthew Wilcox 		__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
8081da177e4SLinus Torvalds 	}
8091da177e4SLinus Torvalds 
8101da177e4SLinus Torvalds 	return 0;
8111da177e4SLinus Torvalds }
8121da177e4SLinus Torvalds 
8131da177e4SLinus Torvalds /*
8141da177e4SLinus Torvalds  * perf_rdr_write
8151da177e4SLinus Torvalds  *
8161da177e4SLinus Torvalds  * Write the given RDR register with the contents
8171da177e4SLinus Torvalds  * of the given buffer.
8181da177e4SLinus Torvalds  */
8191da177e4SLinus Torvalds static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer)
8201da177e4SLinus Torvalds {
821cb6fc18eSHelge Deller 	const struct rdr_tbl_ent *tentry;
8221da177e4SLinus Torvalds 	int32_t		i;
8231da177e4SLinus Torvalds 
8241da177e4SLinus Torvalds printk("perf_rdr_write\n");
8251da177e4SLinus Torvalds 	tentry = perf_rdr_get_entry(rdr_num);
8261da177e4SLinus Torvalds 	if (tentry->width == 0) { return; }
8271da177e4SLinus Torvalds 
8281da177e4SLinus Torvalds 	i = tentry->num_words;
8291da177e4SLinus Torvalds 	while (i--) {
8301da177e4SLinus Torvalds 		if (perf_processor_interface == ONYX_INTF) {
8311da177e4SLinus Torvalds 			perf_rdr_shift_out_U(rdr_num, buffer[i]);
8321da177e4SLinus Torvalds 		} else {
8331da177e4SLinus Torvalds 			perf_rdr_shift_out_W(rdr_num, buffer[i]);
8341da177e4SLinus Torvalds 		}
8351da177e4SLinus Torvalds 	}
8361da177e4SLinus Torvalds printk("perf_rdr_write done\n");
8371da177e4SLinus Torvalds }
838