1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Parisc performance counters 4 * Copyright (C) 2001 Randolph Chung <tausq@debian.org> 5 * 6 * This code is derived, with permission, from HP/UX sources. 7 */ 8 9 /* 10 * Edited comment from original sources: 11 * 12 * This driver programs the PCX-U/PCX-W performance counters 13 * on the PA-RISC 2.0 chips. The driver keeps all images now 14 * internally to the kernel to hopefully eliminate the possibility 15 * of a bad image halting the CPU. Also, there are different 16 * images for the PCX-W and later chips vs the PCX-U chips. 17 * 18 * Only 1 process is allowed to access the driver at any time, 19 * so the only protection that is needed is at open and close. 20 * A variable "perf_enabled" is used to hold the state of the 21 * driver. The spinlock "perf_lock" is used to protect the 22 * modification of the state during open/close operations so 23 * multiple processes don't get into the driver simultaneously. 24 * 25 * This driver accesses the processor directly vs going through 26 * the PDC INTRIGUE calls. This is done to eliminate bugs introduced 27 * in various PDC revisions. The code is much more maintainable 28 * and reliable this way vs having to debug on every version of PDC 29 * on every box. 30 */ 31 32 #include <linux/capability.h> 33 #include <linux/init.h> 34 #include <linux/proc_fs.h> 35 #include <linux/miscdevice.h> 36 #include <linux/spinlock.h> 37 38 #include <linux/uaccess.h> 39 #include <asm/perf.h> 40 #include <asm/parisc-device.h> 41 #include <asm/processor.h> 42 #include <asm/runway.h> 43 #include <asm/io.h> /* for __raw_read() */ 44 45 #include "perf_images.h" 46 47 #define MAX_RDR_WORDS 24 48 #define PERF_VERSION 2 /* derived from hpux's PI v2 interface */ 49 50 /* definition of RDR regs */ 51 struct rdr_tbl_ent { 52 uint16_t width; 53 uint8_t num_words; 54 uint8_t write_control; 55 }; 56 57 static int perf_processor_interface __read_mostly = UNKNOWN_INTF; 58 static int perf_enabled __read_mostly; 59 static DEFINE_SPINLOCK(perf_lock); 60 static struct parisc_device *cpu_device __read_mostly; 61 62 /* RDRs to write for PCX-W */ 63 static const int perf_rdrs_W[] = 64 { 0, 1, 4, 5, 6, 15, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 65 66 /* RDRs to write for PCX-U */ 67 static const int perf_rdrs_U[] = 68 { 0, 1, 4, 5, 6, 7, 16, 17, 18, 20, 21, 22, 23, 24, 25, -1 }; 69 70 /* RDR register descriptions for PCX-W */ 71 static const struct rdr_tbl_ent perf_rdr_tbl_W[] = { 72 { 19, 1, 8 }, /* RDR 0 */ 73 { 16, 1, 16 }, /* RDR 1 */ 74 { 72, 2, 0 }, /* RDR 2 */ 75 { 81, 2, 0 }, /* RDR 3 */ 76 { 328, 6, 0 }, /* RDR 4 */ 77 { 160, 3, 0 }, /* RDR 5 */ 78 { 336, 6, 0 }, /* RDR 6 */ 79 { 164, 3, 0 }, /* RDR 7 */ 80 { 0, 0, 0 }, /* RDR 8 */ 81 { 35, 1, 0 }, /* RDR 9 */ 82 { 6, 1, 0 }, /* RDR 10 */ 83 { 18, 1, 0 }, /* RDR 11 */ 84 { 13, 1, 0 }, /* RDR 12 */ 85 { 8, 1, 0 }, /* RDR 13 */ 86 { 8, 1, 0 }, /* RDR 14 */ 87 { 8, 1, 0 }, /* RDR 15 */ 88 { 1530, 24, 0 }, /* RDR 16 */ 89 { 16, 1, 0 }, /* RDR 17 */ 90 { 4, 1, 0 }, /* RDR 18 */ 91 { 0, 0, 0 }, /* RDR 19 */ 92 { 152, 3, 24 }, /* RDR 20 */ 93 { 152, 3, 24 }, /* RDR 21 */ 94 { 233, 4, 48 }, /* RDR 22 */ 95 { 233, 4, 48 }, /* RDR 23 */ 96 { 71, 2, 0 }, /* RDR 24 */ 97 { 71, 2, 0 }, /* RDR 25 */ 98 { 11, 1, 0 }, /* RDR 26 */ 99 { 18, 1, 0 }, /* RDR 27 */ 100 { 128, 2, 0 }, /* RDR 28 */ 101 { 0, 0, 0 }, /* RDR 29 */ 102 { 16, 1, 0 }, /* RDR 30 */ 103 { 16, 1, 0 }, /* RDR 31 */ 104 }; 105 106 /* RDR register descriptions for PCX-U */ 107 static const struct rdr_tbl_ent perf_rdr_tbl_U[] = { 108 { 19, 1, 8 }, /* RDR 0 */ 109 { 32, 1, 16 }, /* RDR 1 */ 110 { 20, 1, 0 }, /* RDR 2 */ 111 { 0, 0, 0 }, /* RDR 3 */ 112 { 344, 6, 0 }, /* RDR 4 */ 113 { 176, 3, 0 }, /* RDR 5 */ 114 { 336, 6, 0 }, /* RDR 6 */ 115 { 0, 0, 0 }, /* RDR 7 */ 116 { 0, 0, 0 }, /* RDR 8 */ 117 { 0, 0, 0 }, /* RDR 9 */ 118 { 28, 1, 0 }, /* RDR 10 */ 119 { 33, 1, 0 }, /* RDR 11 */ 120 { 0, 0, 0 }, /* RDR 12 */ 121 { 230, 4, 0 }, /* RDR 13 */ 122 { 32, 1, 0 }, /* RDR 14 */ 123 { 128, 2, 0 }, /* RDR 15 */ 124 { 1494, 24, 0 }, /* RDR 16 */ 125 { 18, 1, 0 }, /* RDR 17 */ 126 { 4, 1, 0 }, /* RDR 18 */ 127 { 0, 0, 0 }, /* RDR 19 */ 128 { 158, 3, 24 }, /* RDR 20 */ 129 { 158, 3, 24 }, /* RDR 21 */ 130 { 194, 4, 48 }, /* RDR 22 */ 131 { 194, 4, 48 }, /* RDR 23 */ 132 { 71, 2, 0 }, /* RDR 24 */ 133 { 71, 2, 0 }, /* RDR 25 */ 134 { 28, 1, 0 }, /* RDR 26 */ 135 { 33, 1, 0 }, /* RDR 27 */ 136 { 88, 2, 0 }, /* RDR 28 */ 137 { 32, 1, 0 }, /* RDR 29 */ 138 { 24, 1, 0 }, /* RDR 30 */ 139 { 16, 1, 0 }, /* RDR 31 */ 140 }; 141 142 /* 143 * A non-zero write_control in the above tables is a byte offset into 144 * this array. 145 */ 146 static const uint64_t perf_bitmasks[] = { 147 0x0000000000000000ul, /* first dbl word must be zero */ 148 0xfdffe00000000000ul, /* RDR0 bitmask */ 149 0x003f000000000000ul, /* RDR1 bitmask */ 150 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (152 bits) */ 151 0xfffffffffffffffful, 152 0xfffffffc00000000ul, 153 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (233 bits) */ 154 0xfffffffffffffffful, 155 0xfffffffffffffffcul, 156 0xff00000000000000ul 157 }; 158 159 /* 160 * Write control bitmasks for Pa-8700 processor given 161 * some things have changed slightly. 162 */ 163 static const uint64_t perf_bitmasks_piranha[] = { 164 0x0000000000000000ul, /* first dbl word must be zero */ 165 0xfdffe00000000000ul, /* RDR0 bitmask */ 166 0x003f000000000000ul, /* RDR1 bitmask */ 167 0x00fffffffffffffful, /* RDR20-RDR21 bitmask (158 bits) */ 168 0xfffffffffffffffful, 169 0xfffffffc00000000ul, 170 0xfffffffffffffffful, /* RDR22-RDR23 bitmask (210 bits) */ 171 0xfffffffffffffffful, 172 0xfffffffffffffffful, 173 0xfffc000000000000ul 174 }; 175 176 static const uint64_t *bitmask_array; /* array of bitmasks to use */ 177 178 /****************************************************************************** 179 * Function Prototypes 180 *****************************************************************************/ 181 static int perf_config(uint32_t *image_ptr); 182 static int perf_release(struct inode *inode, struct file *file); 183 static int perf_open(struct inode *inode, struct file *file); 184 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); 185 static ssize_t perf_write(struct file *file, const char __user *buf, 186 size_t count, loff_t *ppos); 187 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 188 static void perf_start_counters(void); 189 static int perf_stop_counters(uint32_t *raddr); 190 static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num); 191 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer); 192 static int perf_rdr_clear(uint32_t rdr_num); 193 static int perf_write_image(uint64_t *memaddr); 194 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer); 195 196 /* External Assembly Routines */ 197 extern uint64_t perf_rdr_shift_in_W (uint32_t rdr_num, uint16_t width); 198 extern uint64_t perf_rdr_shift_in_U (uint32_t rdr_num, uint16_t width); 199 extern void perf_rdr_shift_out_W (uint32_t rdr_num, uint64_t buffer); 200 extern void perf_rdr_shift_out_U (uint32_t rdr_num, uint64_t buffer); 201 extern void perf_intrigue_enable_perf_counters (void); 202 extern void perf_intrigue_disable_perf_counters (void); 203 204 /****************************************************************************** 205 * Function Definitions 206 *****************************************************************************/ 207 208 209 /* 210 * configure: 211 * 212 * Configure the cpu with a given data image. First turn off the counters, 213 * then download the image, then turn the counters back on. 214 */ 215 static int perf_config(uint32_t *image_ptr) 216 { 217 long error; 218 uint32_t raddr[4]; 219 220 /* Stop the counters*/ 221 error = perf_stop_counters(raddr); 222 if (error != 0) { 223 printk("perf_config: perf_stop_counters = %ld\n", error); 224 return -EINVAL; 225 } 226 227 printk("Preparing to write image\n"); 228 /* Write the image to the chip */ 229 error = perf_write_image((uint64_t *)image_ptr); 230 if (error != 0) { 231 printk("perf_config: DOWNLOAD = %ld\n", error); 232 return -EINVAL; 233 } 234 235 printk("Preparing to start counters\n"); 236 237 /* Start the counters */ 238 perf_start_counters(); 239 240 return sizeof(uint32_t); 241 } 242 243 /* 244 * Open the device and initialize all of its memory. The device is only 245 * opened once, but can be "queried" by multiple processes that know its 246 * file descriptor. 247 */ 248 static int perf_open(struct inode *inode, struct file *file) 249 { 250 spin_lock(&perf_lock); 251 if (perf_enabled) { 252 spin_unlock(&perf_lock); 253 return -EBUSY; 254 } 255 perf_enabled = 1; 256 spin_unlock(&perf_lock); 257 258 return 0; 259 } 260 261 /* 262 * Close the device. 263 */ 264 static int perf_release(struct inode *inode, struct file *file) 265 { 266 spin_lock(&perf_lock); 267 perf_enabled = 0; 268 spin_unlock(&perf_lock); 269 270 return 0; 271 } 272 273 /* 274 * Read does nothing for this driver 275 */ 276 static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos) 277 { 278 return 0; 279 } 280 281 /* 282 * write: 283 * 284 * This routine downloads the image to the chip. It must be 285 * called on the processor that the download should happen 286 * on. 287 */ 288 static ssize_t perf_write(struct file *file, const char __user *buf, 289 size_t count, loff_t *ppos) 290 { 291 size_t image_size __maybe_unused; 292 uint32_t image_type; 293 uint32_t interface_type; 294 uint32_t test; 295 296 if (perf_processor_interface == ONYX_INTF) 297 image_size = PCXU_IMAGE_SIZE; 298 else if (perf_processor_interface == CUDA_INTF) 299 image_size = PCXW_IMAGE_SIZE; 300 else 301 return -EFAULT; 302 303 if (!perfmon_capable()) 304 return -EACCES; 305 306 if (count != sizeof(uint32_t)) 307 return -EIO; 308 309 if (copy_from_user(&image_type, buf, sizeof(uint32_t))) 310 return -EFAULT; 311 312 /* Get the interface type and test type */ 313 interface_type = (image_type >> 16) & 0xffff; 314 test = (image_type & 0xffff); 315 316 /* Make sure everything makes sense */ 317 318 /* First check the machine type is correct for 319 the requested image */ 320 if (((perf_processor_interface == CUDA_INTF) && 321 (interface_type != CUDA_INTF)) || 322 ((perf_processor_interface == ONYX_INTF) && 323 (interface_type != ONYX_INTF))) 324 return -EINVAL; 325 326 /* Next check to make sure the requested image 327 is valid */ 328 if (((interface_type == CUDA_INTF) && 329 (test >= MAX_CUDA_IMAGES)) || 330 ((interface_type == ONYX_INTF) && 331 (test >= MAX_ONYX_IMAGES))) 332 return -EINVAL; 333 334 /* Copy the image into the processor */ 335 if (interface_type == CUDA_INTF) 336 return perf_config(cuda_images[test]); 337 else 338 return perf_config(onyx_images[test]); 339 340 return count; 341 } 342 343 /* 344 * Patch the images that need to know the IVA addresses. 345 */ 346 static void perf_patch_images(void) 347 { 348 #if 0 /* FIXME!! */ 349 /* 350 * NOTE: this routine is VERY specific to the current TLB image. 351 * If the image is changed, this routine might also need to be changed. 352 */ 353 extern void $i_itlb_miss_2_0(); 354 extern void $i_dtlb_miss_2_0(); 355 extern void PA2_0_iva(); 356 357 /* 358 * We can only use the lower 32-bits, the upper 32-bits should be 0 359 * anyway given this is in the kernel 360 */ 361 uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); 362 uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); 363 uint32_t IVAaddress = (uint32_t)&PA2_0_iva; 364 365 if (perf_processor_interface == ONYX_INTF) { 366 /* clear last 2 bytes */ 367 onyx_images[TLBMISS][15] &= 0xffffff00; 368 /* set 2 bytes */ 369 onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 370 onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; 371 onyx_images[TLBMISS][17] = itlb_addr; 372 373 /* clear last 2 bytes */ 374 onyx_images[TLBHANDMISS][15] &= 0xffffff00; 375 /* set 2 bytes */ 376 onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); 377 onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; 378 onyx_images[TLBHANDMISS][17] = itlb_addr; 379 380 /* clear last 2 bytes */ 381 onyx_images[BIG_CPI][15] &= 0xffffff00; 382 /* set 2 bytes */ 383 onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); 384 onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; 385 onyx_images[BIG_CPI][17] = itlb_addr; 386 387 onyx_images[PANIC][15] &= 0xffffff00; /* clear last 2 bytes */ 388 onyx_images[PANIC][15] |= (0x000000ff&((IVAaddress) >> 24)); /* set 2 bytes */ 389 onyx_images[PANIC][16] = (IVAaddress << 8)&0xffffff00; 390 391 392 } else if (perf_processor_interface == CUDA_INTF) { 393 /* Cuda interface */ 394 cuda_images[TLBMISS][16] = 395 (cuda_images[TLBMISS][16]&0xffff0000) | 396 ((dtlb_addr >> 8)&0x0000ffff); 397 cuda_images[TLBMISS][17] = 398 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 399 cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; 400 401 cuda_images[TLBHANDMISS][16] = 402 (cuda_images[TLBHANDMISS][16]&0xffff0000) | 403 ((dtlb_addr >> 8)&0x0000ffff); 404 cuda_images[TLBHANDMISS][17] = 405 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 406 cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; 407 408 cuda_images[BIG_CPI][16] = 409 (cuda_images[BIG_CPI][16]&0xffff0000) | 410 ((dtlb_addr >> 8)&0x0000ffff); 411 cuda_images[BIG_CPI][17] = 412 ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); 413 cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; 414 } else { 415 /* Unknown type */ 416 } 417 #endif 418 } 419 420 421 /* 422 * ioctl routine 423 * All routines effect the processor that they are executed on. Thus you 424 * must be running on the processor that you wish to change. 425 */ 426 427 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 428 { 429 long error_start; 430 uint32_t raddr[4]; 431 int error = 0; 432 433 switch (cmd) { 434 435 case PA_PERF_ON: 436 /* Start the counters */ 437 perf_start_counters(); 438 break; 439 440 case PA_PERF_OFF: 441 error_start = perf_stop_counters(raddr); 442 if (error_start != 0) { 443 printk(KERN_ERR "perf_off: perf_stop_counters = %ld\n", error_start); 444 error = -EFAULT; 445 break; 446 } 447 448 /* copy out the Counters */ 449 if (copy_to_user((void __user *)arg, raddr, 450 sizeof (raddr)) != 0) { 451 error = -EFAULT; 452 break; 453 } 454 break; 455 456 case PA_PERF_VERSION: 457 /* Return the version # */ 458 error = put_user(PERF_VERSION, (int *)arg); 459 break; 460 461 default: 462 error = -ENOTTY; 463 } 464 465 return error; 466 } 467 468 static const struct file_operations perf_fops = { 469 .read = perf_read, 470 .write = perf_write, 471 .unlocked_ioctl = perf_ioctl, 472 .compat_ioctl = perf_ioctl, 473 .open = perf_open, 474 .release = perf_release 475 }; 476 477 static struct miscdevice perf_dev = { 478 MISC_DYNAMIC_MINOR, 479 PA_PERF_DEV, 480 &perf_fops 481 }; 482 483 /* 484 * Initialize the module 485 */ 486 static int __init perf_init(void) 487 { 488 int ret; 489 490 /* Determine correct processor interface to use */ 491 bitmask_array = perf_bitmasks; 492 493 if (boot_cpu_data.cpu_type == pcxu || 494 boot_cpu_data.cpu_type == pcxu_) { 495 perf_processor_interface = ONYX_INTF; 496 } else if (boot_cpu_data.cpu_type == pcxw || 497 boot_cpu_data.cpu_type == pcxw_ || 498 boot_cpu_data.cpu_type == pcxw2 || 499 boot_cpu_data.cpu_type == mako || 500 boot_cpu_data.cpu_type == mako2) { 501 perf_processor_interface = CUDA_INTF; 502 if (boot_cpu_data.cpu_type == pcxw2 || 503 boot_cpu_data.cpu_type == mako || 504 boot_cpu_data.cpu_type == mako2) 505 bitmask_array = perf_bitmasks_piranha; 506 } else { 507 perf_processor_interface = UNKNOWN_INTF; 508 printk("Performance monitoring counters not supported on this processor\n"); 509 return -ENODEV; 510 } 511 512 ret = misc_register(&perf_dev); 513 if (ret) { 514 printk(KERN_ERR "Performance monitoring counters: " 515 "cannot register misc device.\n"); 516 return ret; 517 } 518 519 /* Patch the images to match the system */ 520 perf_patch_images(); 521 522 /* TODO: this only lets us access the first cpu.. what to do for SMP? */ 523 cpu_device = per_cpu(cpu_data, 0).dev; 524 printk("Performance monitoring counters enabled for %s\n", 525 per_cpu(cpu_data, 0).dev->name); 526 527 return 0; 528 } 529 device_initcall(perf_init); 530 531 /* 532 * perf_start_counters(void) 533 * 534 * Start the counters. 535 */ 536 static void perf_start_counters(void) 537 { 538 /* Enable performance monitor counters */ 539 perf_intrigue_enable_perf_counters(); 540 } 541 542 /* 543 * perf_stop_counters 544 * 545 * Stop the performance counters and save counts 546 * in a per_processor array. 547 */ 548 static int perf_stop_counters(uint32_t *raddr) 549 { 550 uint64_t userbuf[MAX_RDR_WORDS]; 551 552 /* Disable performance counters */ 553 perf_intrigue_disable_perf_counters(); 554 555 if (perf_processor_interface == ONYX_INTF) { 556 uint64_t tmp64; 557 /* 558 * Read the counters 559 */ 560 if (!perf_rdr_read_ubuf(16, userbuf)) 561 return -13; 562 563 /* Counter0 is bits 1398 to 1429 */ 564 tmp64 = (userbuf[21] << 22) & 0x00000000ffc00000; 565 tmp64 |= (userbuf[22] >> 42) & 0x00000000003fffff; 566 /* OR sticky0 (bit 1430) to counter0 bit 32 */ 567 tmp64 |= (userbuf[22] >> 10) & 0x0000000080000000; 568 raddr[0] = (uint32_t)tmp64; 569 570 /* Counter1 is bits 1431 to 1462 */ 571 tmp64 = (userbuf[22] >> 9) & 0x00000000ffffffff; 572 /* OR sticky1 (bit 1463) to counter1 bit 32 */ 573 tmp64 |= (userbuf[22] << 23) & 0x0000000080000000; 574 raddr[1] = (uint32_t)tmp64; 575 576 /* Counter2 is bits 1464 to 1495 */ 577 tmp64 = (userbuf[22] << 24) & 0x00000000ff000000; 578 tmp64 |= (userbuf[23] >> 40) & 0x0000000000ffffff; 579 /* OR sticky2 (bit 1496) to counter2 bit 32 */ 580 tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; 581 raddr[2] = (uint32_t)tmp64; 582 583 /* Counter3 is bits 1497 to 1528 */ 584 tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; 585 /* OR sticky3 (bit 1529) to counter3 bit 32 */ 586 tmp64 |= (userbuf[23] << 25) & 0x0000000080000000; 587 raddr[3] = (uint32_t)tmp64; 588 589 /* 590 * Zero out the counters 591 */ 592 593 /* 594 * The counters and sticky-bits comprise the last 132 bits 595 * (1398 - 1529) of RDR16 on a U chip. We'll zero these 596 * out the easy way: zero out last 10 bits of dword 21, 597 * all of dword 22 and 58 bits (plus 6 don't care bits) of 598 * dword 23. 599 */ 600 userbuf[21] &= 0xfffffffffffffc00ul; /* 0 to last 10 bits */ 601 userbuf[22] = 0; 602 userbuf[23] = 0; 603 604 /* 605 * Write back the zeroed bytes + the image given 606 * the read was destructive. 607 */ 608 perf_rdr_write(16, userbuf); 609 } else { 610 611 /* 612 * Read RDR-15 which contains the counters and sticky bits 613 */ 614 if (!perf_rdr_read_ubuf(15, userbuf)) { 615 return -13; 616 } 617 618 /* 619 * Clear out the counters 620 */ 621 perf_rdr_clear(15); 622 623 /* 624 * Copy the counters 625 */ 626 raddr[0] = (uint32_t)((userbuf[0] >> 32) & 0x00000000ffffffffUL); 627 raddr[1] = (uint32_t)(userbuf[0] & 0x00000000ffffffffUL); 628 raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); 629 raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); 630 } 631 632 return 0; 633 } 634 635 /* 636 * perf_rdr_get_entry 637 * 638 * Retrieve a pointer to the description of what this 639 * RDR contains. 640 */ 641 static const struct rdr_tbl_ent * perf_rdr_get_entry(uint32_t rdr_num) 642 { 643 if (perf_processor_interface == ONYX_INTF) { 644 return &perf_rdr_tbl_U[rdr_num]; 645 } else { 646 return &perf_rdr_tbl_W[rdr_num]; 647 } 648 } 649 650 /* 651 * perf_rdr_read_ubuf 652 * 653 * Read the RDR value into the buffer specified. 654 */ 655 static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) 656 { 657 uint64_t data, data_mask = 0; 658 uint32_t width, xbits, i; 659 const struct rdr_tbl_ent *tentry; 660 661 tentry = perf_rdr_get_entry(rdr_num); 662 if ((width = tentry->width) == 0) 663 return 0; 664 665 /* Clear out buffer */ 666 i = tentry->num_words; 667 while (i--) { 668 buffer[i] = 0; 669 } 670 671 /* Check for bits an even number of 64 */ 672 if ((xbits = width & 0x03f) != 0) { 673 data_mask = 1; 674 data_mask <<= (64 - xbits); 675 data_mask--; 676 } 677 678 /* Grab all of the data */ 679 i = tentry->num_words; 680 while (i--) { 681 682 if (perf_processor_interface == ONYX_INTF) { 683 data = perf_rdr_shift_in_U(rdr_num, width); 684 } else { 685 data = perf_rdr_shift_in_W(rdr_num, width); 686 } 687 if (xbits) { 688 buffer[i] |= (data << (64 - xbits)); 689 if (i) { 690 buffer[i-1] |= ((data >> xbits) & data_mask); 691 } 692 } else { 693 buffer[i] = data; 694 } 695 } 696 697 return 1; 698 } 699 700 /* 701 * perf_rdr_clear 702 * 703 * Zero out the given RDR register 704 */ 705 static int perf_rdr_clear(uint32_t rdr_num) 706 { 707 const struct rdr_tbl_ent *tentry; 708 int32_t i; 709 710 tentry = perf_rdr_get_entry(rdr_num); 711 712 if (tentry->width == 0) { 713 return -1; 714 } 715 716 i = tentry->num_words; 717 while (i--) { 718 if (perf_processor_interface == ONYX_INTF) { 719 perf_rdr_shift_out_U(rdr_num, 0UL); 720 } else { 721 perf_rdr_shift_out_W(rdr_num, 0UL); 722 } 723 } 724 725 return 0; 726 } 727 728 729 /* 730 * perf_write_image 731 * 732 * Write the given image out to the processor 733 */ 734 static int perf_write_image(uint64_t *memaddr) 735 { 736 uint64_t buffer[MAX_RDR_WORDS]; 737 uint64_t *bptr; 738 uint32_t dwords; 739 const uint32_t *intrigue_rdr; 740 const uint64_t *intrigue_bitmask; 741 uint64_t tmp64; 742 void __iomem *runway; 743 const struct rdr_tbl_ent *tentry; 744 int i; 745 746 /* Clear out counters */ 747 if (perf_processor_interface == ONYX_INTF) { 748 749 perf_rdr_clear(16); 750 751 /* Toggle performance monitor */ 752 perf_intrigue_enable_perf_counters(); 753 perf_intrigue_disable_perf_counters(); 754 755 intrigue_rdr = perf_rdrs_U; 756 } else { 757 perf_rdr_clear(15); 758 intrigue_rdr = perf_rdrs_W; 759 } 760 761 /* Write all RDRs */ 762 while (*intrigue_rdr != -1) { 763 tentry = perf_rdr_get_entry(*intrigue_rdr); 764 perf_rdr_read_ubuf(*intrigue_rdr, buffer); 765 bptr = &buffer[0]; 766 dwords = tentry->num_words; 767 if (tentry->write_control) { 768 intrigue_bitmask = &bitmask_array[tentry->write_control >> 3]; 769 while (dwords--) { 770 tmp64 = *intrigue_bitmask & *memaddr++; 771 tmp64 |= (~(*intrigue_bitmask++)) & *bptr; 772 *bptr++ = tmp64; 773 } 774 } else { 775 while (dwords--) { 776 *bptr++ = *memaddr++; 777 } 778 } 779 780 perf_rdr_write(*intrigue_rdr, buffer); 781 intrigue_rdr++; 782 } 783 784 /* 785 * Now copy out the Runway stuff which is not in RDRs 786 */ 787 788 if (cpu_device == NULL) 789 { 790 printk(KERN_ERR "write_image: cpu_device not yet initialized!\n"); 791 return -1; 792 } 793 794 runway = ioremap(cpu_device->hpa.start, 4096); 795 if (!runway) { 796 pr_err("perf_write_image: ioremap failed!\n"); 797 return -ENOMEM; 798 } 799 800 /* Merge intrigue bits into Runway STATUS 0 */ 801 tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; 802 __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), 803 runway + RUNWAY_STATUS); 804 805 /* Write RUNWAY DEBUG registers */ 806 for (i = 0; i < 8; i++) { 807 __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); 808 } 809 810 return 0; 811 } 812 813 /* 814 * perf_rdr_write 815 * 816 * Write the given RDR register with the contents 817 * of the given buffer. 818 */ 819 static void perf_rdr_write(uint32_t rdr_num, uint64_t *buffer) 820 { 821 const struct rdr_tbl_ent *tentry; 822 int32_t i; 823 824 printk("perf_rdr_write\n"); 825 tentry = perf_rdr_get_entry(rdr_num); 826 if (tentry->width == 0) { return; } 827 828 i = tentry->num_words; 829 while (i--) { 830 if (perf_processor_interface == ONYX_INTF) { 831 perf_rdr_shift_out_U(rdr_num, buffer[i]); 832 } else { 833 perf_rdr_shift_out_W(rdr_num, buffer[i]); 834 } 835 } 836 printk("perf_rdr_write done\n"); 837 } 838