1 /* 2 * turbostat -- show CPU frequency and C-state residency 3 * on modern Intel turbo-capable processors. 4 * 5 * Copyright (c) 2012 Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * You should have received a copy of the GNU General Public License along with 18 * this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21 22 #define _GNU_SOURCE 23 #include <stdio.h> 24 #include <unistd.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/stat.h> 28 #include <sys/resource.h> 29 #include <fcntl.h> 30 #include <signal.h> 31 #include <sys/time.h> 32 #include <stdlib.h> 33 #include <dirent.h> 34 #include <string.h> 35 #include <ctype.h> 36 #include <sched.h> 37 38 #define MSR_NEHALEM_PLATFORM_INFO 0xCE 39 #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD 40 #define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE 41 #define MSR_APERF 0xE8 42 #define MSR_MPERF 0xE7 43 #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ 44 #define MSR_PKG_C3_RESIDENCY 0x3F8 45 #define MSR_PKG_C6_RESIDENCY 0x3F9 46 #define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ 47 #define MSR_CORE_C3_RESIDENCY 0x3FC 48 #define MSR_CORE_C6_RESIDENCY 0x3FD 49 #define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ 50 51 char *proc_stat = "/proc/stat"; 52 unsigned int interval_sec = 5; /* set with -i interval_sec */ 53 unsigned int verbose; /* set with -v */ 54 unsigned int summary_only; /* set with -s */ 55 unsigned int skip_c0; 56 unsigned int skip_c1; 57 unsigned int do_nhm_cstates; 58 unsigned int do_snb_cstates; 59 unsigned int has_aperf; 60 unsigned int units = 1000000000; /* Ghz etc */ 61 unsigned int genuine_intel; 62 unsigned int has_invariant_tsc; 63 unsigned int do_nehalem_platform_info; 64 unsigned int do_nehalem_turbo_ratio_limit; 65 unsigned int do_ivt_turbo_ratio_limit; 66 unsigned int extra_msr_offset32; 67 unsigned int extra_msr_offset64; 68 unsigned int extra_delta_offset32; 69 unsigned int extra_delta_offset64; 70 double bclk; 71 unsigned int show_pkg; 72 unsigned int show_core; 73 unsigned int show_cpu; 74 unsigned int show_pkg_only; 75 unsigned int show_core_only; 76 char *output_buffer, *outp; 77 78 int aperf_mperf_unstable; 79 int backwards_count; 80 char *progname; 81 82 cpu_set_t *cpu_present_set, *cpu_affinity_set; 83 size_t cpu_present_setsize, cpu_affinity_setsize; 84 85 struct thread_data { 86 unsigned long long tsc; 87 unsigned long long aperf; 88 unsigned long long mperf; 89 unsigned long long c1; /* derived */ 90 unsigned long long extra_msr64; 91 unsigned long long extra_delta64; 92 unsigned long long extra_msr32; 93 unsigned long long extra_delta32; 94 unsigned int cpu_id; 95 unsigned int flags; 96 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 97 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 98 } *thread_even, *thread_odd; 99 100 struct core_data { 101 unsigned long long c3; 102 unsigned long long c6; 103 unsigned long long c7; 104 unsigned int core_id; 105 } *core_even, *core_odd; 106 107 struct pkg_data { 108 unsigned long long pc2; 109 unsigned long long pc3; 110 unsigned long long pc6; 111 unsigned long long pc7; 112 unsigned int package_id; 113 } *package_even, *package_odd; 114 115 #define ODD_COUNTERS thread_odd, core_odd, package_odd 116 #define EVEN_COUNTERS thread_even, core_even, package_even 117 118 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ 119 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ 120 topo.num_threads_per_core + \ 121 (core_no) * topo.num_threads_per_core + (thread_no)) 122 #define GET_CORE(core_base, core_no, pkg_no) \ 123 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 124 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 125 126 struct system_summary { 127 struct thread_data threads; 128 struct core_data cores; 129 struct pkg_data packages; 130 } sum, average; 131 132 133 struct topo_params { 134 int num_packages; 135 int num_cpus; 136 int num_cores; 137 int max_cpu_num; 138 int num_cores_per_pkg; 139 int num_threads_per_core; 140 } topo; 141 142 struct timeval tv_even, tv_odd, tv_delta; 143 144 void setup_all_buffers(void); 145 146 int cpu_is_not_present(int cpu) 147 { 148 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 149 } 150 /* 151 * run func(thread, core, package) in topology order 152 * skip non-present cpus 153 */ 154 155 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 156 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 157 { 158 int retval, pkg_no, core_no, thread_no; 159 160 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 161 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 162 for (thread_no = 0; thread_no < 163 topo.num_threads_per_core; ++thread_no) { 164 struct thread_data *t; 165 struct core_data *c; 166 struct pkg_data *p; 167 168 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 169 170 if (cpu_is_not_present(t->cpu_id)) 171 continue; 172 173 c = GET_CORE(core_base, core_no, pkg_no); 174 p = GET_PKG(pkg_base, pkg_no); 175 176 retval = func(t, c, p); 177 if (retval) 178 return retval; 179 } 180 } 181 } 182 return 0; 183 } 184 185 int cpu_migrate(int cpu) 186 { 187 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 188 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 189 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 190 return -1; 191 else 192 return 0; 193 } 194 195 int get_msr(int cpu, off_t offset, unsigned long long *msr) 196 { 197 ssize_t retval; 198 char pathname[32]; 199 int fd; 200 201 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 202 fd = open(pathname, O_RDONLY); 203 if (fd < 0) 204 return -1; 205 206 retval = pread(fd, msr, sizeof *msr, offset); 207 close(fd); 208 209 if (retval != sizeof *msr) { 210 fprintf(stderr, "%s offset 0x%zx read failed\n", pathname, offset); 211 return -1; 212 } 213 214 return 0; 215 } 216 217 void print_header(void) 218 { 219 if (show_pkg) 220 outp += sprintf(outp, "pk"); 221 if (show_pkg) 222 outp += sprintf(outp, " "); 223 if (show_core) 224 outp += sprintf(outp, "cor"); 225 if (show_cpu) 226 outp += sprintf(outp, " CPU"); 227 if (show_pkg || show_core || show_cpu) 228 outp += sprintf(outp, " "); 229 if (do_nhm_cstates) 230 outp += sprintf(outp, " %%c0"); 231 if (has_aperf) 232 outp += sprintf(outp, " GHz"); 233 outp += sprintf(outp, " TSC"); 234 if (extra_delta_offset32) 235 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); 236 if (extra_delta_offset64) 237 outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); 238 if (extra_msr_offset32) 239 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); 240 if (extra_msr_offset64) 241 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); 242 if (do_nhm_cstates) 243 outp += sprintf(outp, " %%c1"); 244 if (do_nhm_cstates) 245 outp += sprintf(outp, " %%c3"); 246 if (do_nhm_cstates) 247 outp += sprintf(outp, " %%c6"); 248 if (do_snb_cstates) 249 outp += sprintf(outp, " %%c7"); 250 if (do_snb_cstates) 251 outp += sprintf(outp, " %%pc2"); 252 if (do_nhm_cstates) 253 outp += sprintf(outp, " %%pc3"); 254 if (do_nhm_cstates) 255 outp += sprintf(outp, " %%pc6"); 256 if (do_snb_cstates) 257 outp += sprintf(outp, " %%pc7"); 258 259 outp += sprintf(outp, "\n"); 260 } 261 262 int dump_counters(struct thread_data *t, struct core_data *c, 263 struct pkg_data *p) 264 { 265 fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); 266 267 if (t) { 268 fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 269 fprintf(stderr, "TSC: %016llX\n", t->tsc); 270 fprintf(stderr, "aperf: %016llX\n", t->aperf); 271 fprintf(stderr, "mperf: %016llX\n", t->mperf); 272 fprintf(stderr, "c1: %016llX\n", t->c1); 273 fprintf(stderr, "msr0x%x: %08llX\n", 274 extra_delta_offset32, t->extra_delta32); 275 fprintf(stderr, "msr0x%x: %016llX\n", 276 extra_delta_offset64, t->extra_delta64); 277 fprintf(stderr, "msr0x%x: %08llX\n", 278 extra_msr_offset32, t->extra_msr32); 279 fprintf(stderr, "msr0x%x: %016llX\n", 280 extra_msr_offset64, t->extra_msr64); 281 } 282 283 if (c) { 284 fprintf(stderr, "core: %d\n", c->core_id); 285 fprintf(stderr, "c3: %016llX\n", c->c3); 286 fprintf(stderr, "c6: %016llX\n", c->c6); 287 fprintf(stderr, "c7: %016llX\n", c->c7); 288 } 289 290 if (p) { 291 fprintf(stderr, "package: %d\n", p->package_id); 292 fprintf(stderr, "pc2: %016llX\n", p->pc2); 293 fprintf(stderr, "pc3: %016llX\n", p->pc3); 294 fprintf(stderr, "pc6: %016llX\n", p->pc6); 295 fprintf(stderr, "pc7: %016llX\n", p->pc7); 296 } 297 return 0; 298 } 299 300 /* 301 * column formatting convention & formats 302 * package: "pk" 2 columns %2d 303 * core: "cor" 3 columns %3d 304 * CPU: "CPU" 3 columns %3d 305 * GHz: "GHz" 3 columns %3.2 306 * TSC: "TSC" 3 columns %3.2 307 * percentage " %pc3" %6.2 308 */ 309 int format_counters(struct thread_data *t, struct core_data *c, 310 struct pkg_data *p) 311 { 312 double interval_float; 313 314 /* if showing only 1st thread in core and this isn't one, bail out */ 315 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 316 return 0; 317 318 /* if showing only 1st thread in pkg and this isn't one, bail out */ 319 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 320 return 0; 321 322 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 323 324 /* topo columns, print blanks on 1st (average) line */ 325 if (t == &average.threads) { 326 if (show_pkg) 327 outp += sprintf(outp, " "); 328 if (show_pkg && show_core) 329 outp += sprintf(outp, " "); 330 if (show_core) 331 outp += sprintf(outp, " "); 332 if (show_cpu) 333 outp += sprintf(outp, " " " "); 334 } else { 335 if (show_pkg) { 336 if (p) 337 outp += sprintf(outp, "%2d", p->package_id); 338 else 339 outp += sprintf(outp, " "); 340 } 341 if (show_pkg && show_core) 342 outp += sprintf(outp, " "); 343 if (show_core) { 344 if (c) 345 outp += sprintf(outp, "%3d", c->core_id); 346 else 347 outp += sprintf(outp, " "); 348 } 349 if (show_cpu) 350 outp += sprintf(outp, " %3d", t->cpu_id); 351 } 352 353 /* %c0 */ 354 if (do_nhm_cstates) { 355 if (show_pkg || show_core || show_cpu) 356 outp += sprintf(outp, " "); 357 if (!skip_c0) 358 outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); 359 else 360 outp += sprintf(outp, " ****"); 361 } 362 363 /* GHz */ 364 if (has_aperf) { 365 if (!aperf_mperf_unstable) { 366 outp += sprintf(outp, " %3.2f", 367 1.0 * t->tsc / units * t->aperf / 368 t->mperf / interval_float); 369 } else { 370 if (t->aperf > t->tsc || t->mperf > t->tsc) { 371 outp += sprintf(outp, " ***"); 372 } else { 373 outp += sprintf(outp, "%3.1f*", 374 1.0 * t->tsc / 375 units * t->aperf / 376 t->mperf / interval_float); 377 } 378 } 379 } 380 381 /* TSC */ 382 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); 383 384 /* delta */ 385 if (extra_delta_offset32) 386 outp += sprintf(outp, " %11llu", t->extra_delta32); 387 388 /* DELTA */ 389 if (extra_delta_offset64) 390 outp += sprintf(outp, " %11llu", t->extra_delta64); 391 /* msr */ 392 if (extra_msr_offset32) 393 outp += sprintf(outp, " 0x%08llx", t->extra_msr32); 394 395 /* MSR */ 396 if (extra_msr_offset64) 397 outp += sprintf(outp, " 0x%016llx", t->extra_msr64); 398 399 if (do_nhm_cstates) { 400 if (!skip_c1) 401 outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); 402 else 403 outp += sprintf(outp, " ****"); 404 } 405 406 /* print per-core data only for 1st thread in core */ 407 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 408 goto done; 409 410 if (do_nhm_cstates) 411 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); 412 if (do_nhm_cstates) 413 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); 414 if (do_snb_cstates) 415 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); 416 417 /* print per-package data only for 1st core in package */ 418 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 419 goto done; 420 421 if (do_snb_cstates) 422 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); 423 if (do_nhm_cstates) 424 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); 425 if (do_nhm_cstates) 426 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); 427 if (do_snb_cstates) 428 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); 429 done: 430 outp += sprintf(outp, "\n"); 431 432 return 0; 433 } 434 435 void flush_stdout() 436 { 437 fputs(output_buffer, stdout); 438 outp = output_buffer; 439 } 440 void flush_stderr() 441 { 442 fputs(output_buffer, stderr); 443 outp = output_buffer; 444 } 445 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 446 { 447 static int printed; 448 449 if (!printed || !summary_only) 450 print_header(); 451 452 if (topo.num_cpus > 1) 453 format_counters(&average.threads, &average.cores, 454 &average.packages); 455 456 printed = 1; 457 458 if (summary_only) 459 return; 460 461 for_all_cpus(format_counters, t, c, p); 462 } 463 464 void 465 delta_package(struct pkg_data *new, struct pkg_data *old) 466 { 467 old->pc2 = new->pc2 - old->pc2; 468 old->pc3 = new->pc3 - old->pc3; 469 old->pc6 = new->pc6 - old->pc6; 470 old->pc7 = new->pc7 - old->pc7; 471 } 472 473 void 474 delta_core(struct core_data *new, struct core_data *old) 475 { 476 old->c3 = new->c3 - old->c3; 477 old->c6 = new->c6 - old->c6; 478 old->c7 = new->c7 - old->c7; 479 } 480 481 /* 482 * old = new - old 483 */ 484 void 485 delta_thread(struct thread_data *new, struct thread_data *old, 486 struct core_data *core_delta) 487 { 488 old->tsc = new->tsc - old->tsc; 489 490 /* check for TSC < 1 Mcycles over interval */ 491 if (old->tsc < (1000 * 1000)) { 492 fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); 493 fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); 494 fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); 495 exit(-3); 496 } 497 498 old->c1 = new->c1 - old->c1; 499 500 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 501 old->aperf = new->aperf - old->aperf; 502 old->mperf = new->mperf - old->mperf; 503 } else { 504 505 if (!aperf_mperf_unstable) { 506 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 507 fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 508 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 509 510 aperf_mperf_unstable = 1; 511 } 512 /* 513 * mperf delta is likely a huge "positive" number 514 * can not use it for calculating c0 time 515 */ 516 skip_c0 = 1; 517 skip_c1 = 1; 518 } 519 520 521 /* 522 * As counter collection is not atomic, 523 * it is possible for mperf's non-halted cycles + idle states 524 * to exceed TSC's all cycles: show c1 = 0% in that case. 525 */ 526 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) 527 old->c1 = 0; 528 else { 529 /* normal case, derive c1 */ 530 old->c1 = old->tsc - old->mperf - core_delta->c3 531 - core_delta->c6 - core_delta->c7; 532 } 533 534 if (old->mperf == 0) { 535 if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 536 old->mperf = 1; /* divide by 0 protection */ 537 } 538 539 old->extra_delta32 = new->extra_delta32 - old->extra_delta32; 540 old->extra_delta32 &= 0xFFFFFFFF; 541 542 old->extra_delta64 = new->extra_delta64 - old->extra_delta64; 543 544 /* 545 * Extra MSR is just a snapshot, simply copy latest w/o subtracting 546 */ 547 old->extra_msr32 = new->extra_msr32; 548 old->extra_msr64 = new->extra_msr64; 549 } 550 551 int delta_cpu(struct thread_data *t, struct core_data *c, 552 struct pkg_data *p, struct thread_data *t2, 553 struct core_data *c2, struct pkg_data *p2) 554 { 555 /* calculate core delta only for 1st thread in core */ 556 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) 557 delta_core(c, c2); 558 559 /* always calculate thread delta */ 560 delta_thread(t, t2, c2); /* c2 is core delta */ 561 562 /* calculate package delta only for 1st core in package */ 563 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) 564 delta_package(p, p2); 565 566 return 0; 567 } 568 569 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 570 { 571 t->tsc = 0; 572 t->aperf = 0; 573 t->mperf = 0; 574 t->c1 = 0; 575 576 t->extra_delta32 = 0; 577 t->extra_delta64 = 0; 578 579 /* tells format_counters to dump all fields from this set */ 580 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; 581 582 c->c3 = 0; 583 c->c6 = 0; 584 c->c7 = 0; 585 586 p->pc2 = 0; 587 p->pc3 = 0; 588 p->pc6 = 0; 589 p->pc7 = 0; 590 } 591 int sum_counters(struct thread_data *t, struct core_data *c, 592 struct pkg_data *p) 593 { 594 average.threads.tsc += t->tsc; 595 average.threads.aperf += t->aperf; 596 average.threads.mperf += t->mperf; 597 average.threads.c1 += t->c1; 598 599 average.threads.extra_delta32 += t->extra_delta32; 600 average.threads.extra_delta64 += t->extra_delta64; 601 602 /* sum per-core values only for 1st thread in core */ 603 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 604 return 0; 605 606 average.cores.c3 += c->c3; 607 average.cores.c6 += c->c6; 608 average.cores.c7 += c->c7; 609 610 /* sum per-pkg values only for 1st core in pkg */ 611 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 612 return 0; 613 614 average.packages.pc2 += p->pc2; 615 average.packages.pc3 += p->pc3; 616 average.packages.pc6 += p->pc6; 617 average.packages.pc7 += p->pc7; 618 619 return 0; 620 } 621 /* 622 * sum the counters for all cpus in the system 623 * compute the weighted average 624 */ 625 void compute_average(struct thread_data *t, struct core_data *c, 626 struct pkg_data *p) 627 { 628 clear_counters(&average.threads, &average.cores, &average.packages); 629 630 for_all_cpus(sum_counters, t, c, p); 631 632 average.threads.tsc /= topo.num_cpus; 633 average.threads.aperf /= topo.num_cpus; 634 average.threads.mperf /= topo.num_cpus; 635 average.threads.c1 /= topo.num_cpus; 636 637 average.threads.extra_delta32 /= topo.num_cpus; 638 average.threads.extra_delta32 &= 0xFFFFFFFF; 639 640 average.threads.extra_delta64 /= topo.num_cpus; 641 642 average.cores.c3 /= topo.num_cores; 643 average.cores.c6 /= topo.num_cores; 644 average.cores.c7 /= topo.num_cores; 645 646 average.packages.pc2 /= topo.num_packages; 647 average.packages.pc3 /= topo.num_packages; 648 average.packages.pc6 /= topo.num_packages; 649 average.packages.pc7 /= topo.num_packages; 650 } 651 652 static unsigned long long rdtsc(void) 653 { 654 unsigned int low, high; 655 656 asm volatile("rdtsc" : "=a" (low), "=d" (high)); 657 658 return low | ((unsigned long long)high) << 32; 659 } 660 661 662 /* 663 * get_counters(...) 664 * migrate to cpu 665 * acquire and record local counters for that cpu 666 */ 667 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 668 { 669 int cpu = t->cpu_id; 670 671 if (cpu_migrate(cpu)) 672 return -1; 673 674 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 675 676 if (has_aperf) { 677 if (get_msr(cpu, MSR_APERF, &t->aperf)) 678 return -3; 679 if (get_msr(cpu, MSR_MPERF, &t->mperf)) 680 return -4; 681 } 682 683 if (extra_delta_offset32) { 684 if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) 685 return -5; 686 t->extra_delta32 &= 0xFFFFFFFF; 687 } 688 689 if (extra_delta_offset64) 690 if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) 691 return -5; 692 693 if (extra_msr_offset32) { 694 if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) 695 return -5; 696 t->extra_msr32 &= 0xFFFFFFFF; 697 } 698 699 if (extra_msr_offset64) 700 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) 701 return -5; 702 703 /* collect core counters only for 1st thread in core */ 704 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 705 return 0; 706 707 if (do_nhm_cstates) { 708 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 709 return -6; 710 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 711 return -7; 712 } 713 714 if (do_snb_cstates) 715 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 716 return -8; 717 718 /* collect package counters only for 1st core in package */ 719 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 720 return 0; 721 722 if (do_nhm_cstates) { 723 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 724 return -9; 725 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) 726 return -10; 727 } 728 if (do_snb_cstates) { 729 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) 730 return -11; 731 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 732 return -12; 733 } 734 return 0; 735 } 736 737 void print_verbose_header(void) 738 { 739 unsigned long long msr; 740 unsigned int ratio; 741 742 if (!do_nehalem_platform_info) 743 return; 744 745 get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); 746 747 if (verbose > 1) 748 fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); 749 750 ratio = (msr >> 40) & 0xFF; 751 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 752 ratio, bclk, ratio * bclk); 753 754 ratio = (msr >> 8) & 0xFF; 755 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 756 ratio, bclk, ratio * bclk); 757 758 if (!do_ivt_turbo_ratio_limit) 759 goto print_nhm_turbo_ratio_limits; 760 761 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 762 763 if (verbose > 1) 764 fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 765 766 ratio = (msr >> 56) & 0xFF; 767 if (ratio) 768 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 769 ratio, bclk, ratio * bclk); 770 771 ratio = (msr >> 48) & 0xFF; 772 if (ratio) 773 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 774 ratio, bclk, ratio * bclk); 775 776 ratio = (msr >> 40) & 0xFF; 777 if (ratio) 778 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 779 ratio, bclk, ratio * bclk); 780 781 ratio = (msr >> 32) & 0xFF; 782 if (ratio) 783 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 784 ratio, bclk, ratio * bclk); 785 786 ratio = (msr >> 24) & 0xFF; 787 if (ratio) 788 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 789 ratio, bclk, ratio * bclk); 790 791 ratio = (msr >> 16) & 0xFF; 792 if (ratio) 793 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 794 ratio, bclk, ratio * bclk); 795 796 ratio = (msr >> 8) & 0xFF; 797 if (ratio) 798 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 799 ratio, bclk, ratio * bclk); 800 801 ratio = (msr >> 0) & 0xFF; 802 if (ratio) 803 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 804 ratio, bclk, ratio * bclk); 805 806 print_nhm_turbo_ratio_limits: 807 808 if (!do_nehalem_turbo_ratio_limit) 809 return; 810 811 get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); 812 813 if (verbose > 1) 814 fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 815 816 ratio = (msr >> 56) & 0xFF; 817 if (ratio) 818 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 819 ratio, bclk, ratio * bclk); 820 821 ratio = (msr >> 48) & 0xFF; 822 if (ratio) 823 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 824 ratio, bclk, ratio * bclk); 825 826 ratio = (msr >> 40) & 0xFF; 827 if (ratio) 828 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 829 ratio, bclk, ratio * bclk); 830 831 ratio = (msr >> 32) & 0xFF; 832 if (ratio) 833 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 834 ratio, bclk, ratio * bclk); 835 836 ratio = (msr >> 24) & 0xFF; 837 if (ratio) 838 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 839 ratio, bclk, ratio * bclk); 840 841 ratio = (msr >> 16) & 0xFF; 842 if (ratio) 843 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 844 ratio, bclk, ratio * bclk); 845 846 ratio = (msr >> 8) & 0xFF; 847 if (ratio) 848 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 849 ratio, bclk, ratio * bclk); 850 851 ratio = (msr >> 0) & 0xFF; 852 if (ratio) 853 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 854 ratio, bclk, ratio * bclk); 855 } 856 857 void free_all_buffers(void) 858 { 859 CPU_FREE(cpu_present_set); 860 cpu_present_set = NULL; 861 cpu_present_set = 0; 862 863 CPU_FREE(cpu_affinity_set); 864 cpu_affinity_set = NULL; 865 cpu_affinity_setsize = 0; 866 867 free(thread_even); 868 free(core_even); 869 free(package_even); 870 871 thread_even = NULL; 872 core_even = NULL; 873 package_even = NULL; 874 875 free(thread_odd); 876 free(core_odd); 877 free(package_odd); 878 879 thread_odd = NULL; 880 core_odd = NULL; 881 package_odd = NULL; 882 883 free(output_buffer); 884 output_buffer = NULL; 885 outp = NULL; 886 } 887 888 /* 889 * cpu_is_first_sibling_in_core(cpu) 890 * return 1 if given CPU is 1st HT sibling in the core 891 */ 892 int cpu_is_first_sibling_in_core(int cpu) 893 { 894 char path[64]; 895 FILE *filep; 896 int first_cpu; 897 898 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 899 filep = fopen(path, "r"); 900 if (filep == NULL) { 901 perror(path); 902 exit(1); 903 } 904 fscanf(filep, "%d", &first_cpu); 905 fclose(filep); 906 return (cpu == first_cpu); 907 } 908 909 /* 910 * cpu_is_first_core_in_package(cpu) 911 * return 1 if given CPU is 1st core in package 912 */ 913 int cpu_is_first_core_in_package(int cpu) 914 { 915 char path[64]; 916 FILE *filep; 917 int first_cpu; 918 919 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 920 filep = fopen(path, "r"); 921 if (filep == NULL) { 922 perror(path); 923 exit(1); 924 } 925 fscanf(filep, "%d", &first_cpu); 926 fclose(filep); 927 return (cpu == first_cpu); 928 } 929 930 int get_physical_package_id(int cpu) 931 { 932 char path[80]; 933 FILE *filep; 934 int pkg; 935 936 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 937 filep = fopen(path, "r"); 938 if (filep == NULL) { 939 perror(path); 940 exit(1); 941 } 942 fscanf(filep, "%d", &pkg); 943 fclose(filep); 944 return pkg; 945 } 946 947 int get_core_id(int cpu) 948 { 949 char path[80]; 950 FILE *filep; 951 int core; 952 953 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 954 filep = fopen(path, "r"); 955 if (filep == NULL) { 956 perror(path); 957 exit(1); 958 } 959 fscanf(filep, "%d", &core); 960 fclose(filep); 961 return core; 962 } 963 964 int get_num_ht_siblings(int cpu) 965 { 966 char path[80]; 967 FILE *filep; 968 int sib1, sib2; 969 int matches; 970 char character; 971 972 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 973 filep = fopen(path, "r"); 974 if (filep == NULL) { 975 perror(path); 976 exit(1); 977 } 978 /* 979 * file format: 980 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) 981 * otherwinse 1 sibling (self). 982 */ 983 matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); 984 985 fclose(filep); 986 987 if (matches == 3) 988 return 2; 989 else 990 return 1; 991 } 992 993 /* 994 * run func(thread, core, package) in topology order 995 * skip non-present cpus 996 */ 997 998 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, 999 struct pkg_data *, struct thread_data *, struct core_data *, 1000 struct pkg_data *), struct thread_data *thread_base, 1001 struct core_data *core_base, struct pkg_data *pkg_base, 1002 struct thread_data *thread_base2, struct core_data *core_base2, 1003 struct pkg_data *pkg_base2) 1004 { 1005 int retval, pkg_no, core_no, thread_no; 1006 1007 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1008 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 1009 for (thread_no = 0; thread_no < 1010 topo.num_threads_per_core; ++thread_no) { 1011 struct thread_data *t, *t2; 1012 struct core_data *c, *c2; 1013 struct pkg_data *p, *p2; 1014 1015 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 1016 1017 if (cpu_is_not_present(t->cpu_id)) 1018 continue; 1019 1020 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); 1021 1022 c = GET_CORE(core_base, core_no, pkg_no); 1023 c2 = GET_CORE(core_base2, core_no, pkg_no); 1024 1025 p = GET_PKG(pkg_base, pkg_no); 1026 p2 = GET_PKG(pkg_base2, pkg_no); 1027 1028 retval = func(t, c, p, t2, c2, p2); 1029 if (retval) 1030 return retval; 1031 } 1032 } 1033 } 1034 return 0; 1035 } 1036 1037 /* 1038 * run func(cpu) on every cpu in /proc/stat 1039 * return max_cpu number 1040 */ 1041 int for_all_proc_cpus(int (func)(int)) 1042 { 1043 FILE *fp; 1044 int cpu_num; 1045 int retval; 1046 1047 fp = fopen(proc_stat, "r"); 1048 if (fp == NULL) { 1049 perror(proc_stat); 1050 exit(1); 1051 } 1052 1053 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 1054 if (retval != 0) { 1055 perror("/proc/stat format"); 1056 exit(1); 1057 } 1058 1059 while (1) { 1060 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 1061 if (retval != 1) 1062 break; 1063 1064 retval = func(cpu_num); 1065 if (retval) { 1066 fclose(fp); 1067 return(retval); 1068 } 1069 } 1070 fclose(fp); 1071 return 0; 1072 } 1073 1074 void re_initialize(void) 1075 { 1076 free_all_buffers(); 1077 setup_all_buffers(); 1078 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); 1079 } 1080 1081 1082 /* 1083 * count_cpus() 1084 * remember the last one seen, it will be the max 1085 */ 1086 int count_cpus(int cpu) 1087 { 1088 if (topo.max_cpu_num < cpu) 1089 topo.max_cpu_num = cpu; 1090 1091 topo.num_cpus += 1; 1092 return 0; 1093 } 1094 int mark_cpu_present(int cpu) 1095 { 1096 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 1097 return 0; 1098 } 1099 1100 void turbostat_loop() 1101 { 1102 int retval; 1103 1104 restart: 1105 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1106 if (retval < -1) { 1107 exit(retval); 1108 } else if (retval == -1) { 1109 re_initialize(); 1110 goto restart; 1111 } 1112 gettimeofday(&tv_even, (struct timezone *)NULL); 1113 1114 while (1) { 1115 if (for_all_proc_cpus(cpu_is_not_present)) { 1116 re_initialize(); 1117 goto restart; 1118 } 1119 sleep(interval_sec); 1120 retval = for_all_cpus(get_counters, ODD_COUNTERS); 1121 if (retval < -1) { 1122 exit(retval); 1123 } else if (retval == -1) { 1124 re_initialize(); 1125 goto restart; 1126 } 1127 gettimeofday(&tv_odd, (struct timezone *)NULL); 1128 timersub(&tv_odd, &tv_even, &tv_delta); 1129 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1130 compute_average(EVEN_COUNTERS); 1131 format_all_counters(EVEN_COUNTERS); 1132 flush_stdout(); 1133 sleep(interval_sec); 1134 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1135 if (retval < -1) { 1136 exit(retval); 1137 } else if (retval == -1) { 1138 re_initialize(); 1139 goto restart; 1140 } 1141 gettimeofday(&tv_even, (struct timezone *)NULL); 1142 timersub(&tv_even, &tv_odd, &tv_delta); 1143 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 1144 compute_average(ODD_COUNTERS); 1145 format_all_counters(ODD_COUNTERS); 1146 flush_stdout(); 1147 } 1148 } 1149 1150 void check_dev_msr() 1151 { 1152 struct stat sb; 1153 1154 if (stat("/dev/cpu/0/msr", &sb)) { 1155 fprintf(stderr, "no /dev/cpu/0/msr\n"); 1156 fprintf(stderr, "Try \"# modprobe msr\"\n"); 1157 exit(-5); 1158 } 1159 } 1160 1161 void check_super_user() 1162 { 1163 if (getuid() != 0) { 1164 fprintf(stderr, "must be root\n"); 1165 exit(-6); 1166 } 1167 } 1168 1169 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) 1170 { 1171 if (!genuine_intel) 1172 return 0; 1173 1174 if (family != 6) 1175 return 0; 1176 1177 switch (model) { 1178 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 1179 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 1180 case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 1181 case 0x25: /* Westmere Client - Clarkdale, Arrandale */ 1182 case 0x2C: /* Westmere EP - Gulftown */ 1183 case 0x2A: /* SNB */ 1184 case 0x2D: /* SNB Xeon */ 1185 case 0x3A: /* IVB */ 1186 case 0x3E: /* IVB Xeon */ 1187 return 1; 1188 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 1189 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1190 default: 1191 return 0; 1192 } 1193 } 1194 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) 1195 { 1196 if (!genuine_intel) 1197 return 0; 1198 1199 if (family != 6) 1200 return 0; 1201 1202 switch (model) { 1203 case 0x3E: /* IVB Xeon */ 1204 return 1; 1205 default: 1206 return 0; 1207 } 1208 } 1209 1210 1211 int is_snb(unsigned int family, unsigned int model) 1212 { 1213 if (!genuine_intel) 1214 return 0; 1215 1216 switch (model) { 1217 case 0x2A: 1218 case 0x2D: 1219 case 0x3A: /* IVB */ 1220 case 0x3E: /* IVB Xeon */ 1221 return 1; 1222 } 1223 return 0; 1224 } 1225 1226 double discover_bclk(unsigned int family, unsigned int model) 1227 { 1228 if (is_snb(family, model)) 1229 return 100.00; 1230 else 1231 return 133.33; 1232 } 1233 1234 void check_cpuid() 1235 { 1236 unsigned int eax, ebx, ecx, edx, max_level; 1237 unsigned int fms, family, model, stepping; 1238 1239 eax = ebx = ecx = edx = 0; 1240 1241 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); 1242 1243 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 1244 genuine_intel = 1; 1245 1246 if (verbose) 1247 fprintf(stderr, "%.4s%.4s%.4s ", 1248 (char *)&ebx, (char *)&edx, (char *)&ecx); 1249 1250 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); 1251 family = (fms >> 8) & 0xf; 1252 model = (fms >> 4) & 0xf; 1253 stepping = fms & 0xf; 1254 if (family == 6 || family == 0xf) 1255 model += ((fms >> 16) & 0xf) << 4; 1256 1257 if (verbose) 1258 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 1259 max_level, family, model, stepping, family, model, stepping); 1260 1261 if (!(edx & (1 << 5))) { 1262 fprintf(stderr, "CPUID: no MSR\n"); 1263 exit(1); 1264 } 1265 1266 /* 1267 * check max extended function levels of CPUID. 1268 * This is needed to check for invariant TSC. 1269 * This check is valid for both Intel and AMD. 1270 */ 1271 ebx = ecx = edx = 0; 1272 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); 1273 1274 if (max_level < 0x80000007) { 1275 fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); 1276 exit(1); 1277 } 1278 1279 /* 1280 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 1281 * this check is valid for both Intel and AMD 1282 */ 1283 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); 1284 has_invariant_tsc = edx & (1 << 8); 1285 1286 if (!has_invariant_tsc) { 1287 fprintf(stderr, "No invariant TSC\n"); 1288 exit(1); 1289 } 1290 1291 /* 1292 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 1293 * this check is valid for both Intel and AMD 1294 */ 1295 1296 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); 1297 has_aperf = ecx & (1 << 0); 1298 if (!has_aperf) { 1299 fprintf(stderr, "No APERF MSR\n"); 1300 exit(1); 1301 } 1302 1303 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1304 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1305 do_snb_cstates = is_snb(family, model); 1306 bclk = discover_bclk(family, model); 1307 1308 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); 1309 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); 1310 } 1311 1312 1313 void usage() 1314 { 1315 fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", 1316 progname); 1317 exit(1); 1318 } 1319 1320 1321 /* 1322 * in /dev/cpu/ return success for names that are numbers 1323 * ie. filter out ".", "..", "microcode". 1324 */ 1325 int dir_filter(const struct dirent *dirp) 1326 { 1327 if (isdigit(dirp->d_name[0])) 1328 return 1; 1329 else 1330 return 0; 1331 } 1332 1333 int open_dev_cpu_msr(int dummy1) 1334 { 1335 return 0; 1336 } 1337 1338 void topology_probe() 1339 { 1340 int i; 1341 int max_core_id = 0; 1342 int max_package_id = 0; 1343 int max_siblings = 0; 1344 struct cpu_topology { 1345 int core_id; 1346 int physical_package_id; 1347 } *cpus; 1348 1349 /* Initialize num_cpus, max_cpu_num */ 1350 topo.num_cpus = 0; 1351 topo.max_cpu_num = 0; 1352 for_all_proc_cpus(count_cpus); 1353 if (!summary_only && topo.num_cpus > 1) 1354 show_cpu = 1; 1355 1356 if (verbose > 1) 1357 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 1358 1359 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 1360 if (cpus == NULL) { 1361 perror("calloc cpus"); 1362 exit(1); 1363 } 1364 1365 /* 1366 * Allocate and initialize cpu_present_set 1367 */ 1368 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1369 if (cpu_present_set == NULL) { 1370 perror("CPU_ALLOC"); 1371 exit(3); 1372 } 1373 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1374 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 1375 for_all_proc_cpus(mark_cpu_present); 1376 1377 /* 1378 * Allocate and initialize cpu_affinity_set 1379 */ 1380 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1381 if (cpu_affinity_set == NULL) { 1382 perror("CPU_ALLOC"); 1383 exit(3); 1384 } 1385 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1386 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1387 1388 1389 /* 1390 * For online cpus 1391 * find max_core_id, max_package_id 1392 */ 1393 for (i = 0; i <= topo.max_cpu_num; ++i) { 1394 int siblings; 1395 1396 if (cpu_is_not_present(i)) { 1397 if (verbose > 1) 1398 fprintf(stderr, "cpu%d NOT PRESENT\n", i); 1399 continue; 1400 } 1401 cpus[i].core_id = get_core_id(i); 1402 if (cpus[i].core_id > max_core_id) 1403 max_core_id = cpus[i].core_id; 1404 1405 cpus[i].physical_package_id = get_physical_package_id(i); 1406 if (cpus[i].physical_package_id > max_package_id) 1407 max_package_id = cpus[i].physical_package_id; 1408 1409 siblings = get_num_ht_siblings(i); 1410 if (siblings > max_siblings) 1411 max_siblings = siblings; 1412 if (verbose > 1) 1413 fprintf(stderr, "cpu %d pkg %d core %d\n", 1414 i, cpus[i].physical_package_id, cpus[i].core_id); 1415 } 1416 topo.num_cores_per_pkg = max_core_id + 1; 1417 if (verbose > 1) 1418 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 1419 max_core_id, topo.num_cores_per_pkg); 1420 if (!summary_only && topo.num_cores_per_pkg > 1) 1421 show_core = 1; 1422 1423 topo.num_packages = max_package_id + 1; 1424 if (verbose > 1) 1425 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 1426 max_package_id, topo.num_packages); 1427 if (!summary_only && topo.num_packages > 1) 1428 show_pkg = 1; 1429 1430 topo.num_threads_per_core = max_siblings; 1431 if (verbose > 1) 1432 fprintf(stderr, "max_siblings %d\n", max_siblings); 1433 1434 free(cpus); 1435 } 1436 1437 void 1438 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 1439 { 1440 int i; 1441 1442 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 1443 topo.num_packages, sizeof(struct thread_data)); 1444 if (*t == NULL) 1445 goto error; 1446 1447 for (i = 0; i < topo.num_threads_per_core * 1448 topo.num_cores_per_pkg * topo.num_packages; i++) 1449 (*t)[i].cpu_id = -1; 1450 1451 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 1452 sizeof(struct core_data)); 1453 if (*c == NULL) 1454 goto error; 1455 1456 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 1457 (*c)[i].core_id = -1; 1458 1459 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 1460 if (*p == NULL) 1461 goto error; 1462 1463 for (i = 0; i < topo.num_packages; i++) 1464 (*p)[i].package_id = i; 1465 1466 return; 1467 error: 1468 perror("calloc counters"); 1469 exit(1); 1470 } 1471 /* 1472 * init_counter() 1473 * 1474 * set cpu_id, core_num, pkg_num 1475 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 1476 * 1477 * increment topo.num_cores when 1st core in pkg seen 1478 */ 1479 void init_counter(struct thread_data *thread_base, struct core_data *core_base, 1480 struct pkg_data *pkg_base, int thread_num, int core_num, 1481 int pkg_num, int cpu_id) 1482 { 1483 struct thread_data *t; 1484 struct core_data *c; 1485 struct pkg_data *p; 1486 1487 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); 1488 c = GET_CORE(core_base, core_num, pkg_num); 1489 p = GET_PKG(pkg_base, pkg_num); 1490 1491 t->cpu_id = cpu_id; 1492 if (thread_num == 0) { 1493 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; 1494 if (cpu_is_first_core_in_package(cpu_id)) 1495 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; 1496 } 1497 1498 c->core_id = core_num; 1499 p->package_id = pkg_num; 1500 } 1501 1502 1503 int initialize_counters(int cpu_id) 1504 { 1505 int my_thread_id, my_core_id, my_package_id; 1506 1507 my_package_id = get_physical_package_id(cpu_id); 1508 my_core_id = get_core_id(cpu_id); 1509 1510 if (cpu_is_first_sibling_in_core(cpu_id)) { 1511 my_thread_id = 0; 1512 topo.num_cores++; 1513 } else { 1514 my_thread_id = 1; 1515 } 1516 1517 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 1518 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 1519 return 0; 1520 } 1521 1522 void allocate_output_buffer() 1523 { 1524 output_buffer = calloc(1, (1 + topo.num_cpus) * 128); 1525 outp = output_buffer; 1526 if (outp == NULL) { 1527 perror("calloc"); 1528 exit(-1); 1529 } 1530 } 1531 1532 void setup_all_buffers(void) 1533 { 1534 topology_probe(); 1535 allocate_counters(&thread_even, &core_even, &package_even); 1536 allocate_counters(&thread_odd, &core_odd, &package_odd); 1537 allocate_output_buffer(); 1538 for_all_proc_cpus(initialize_counters); 1539 } 1540 void turbostat_init() 1541 { 1542 check_cpuid(); 1543 1544 check_dev_msr(); 1545 check_super_user(); 1546 1547 setup_all_buffers(); 1548 1549 if (verbose) 1550 print_verbose_header(); 1551 } 1552 1553 int fork_it(char **argv) 1554 { 1555 pid_t child_pid; 1556 int status; 1557 1558 status = for_all_cpus(get_counters, EVEN_COUNTERS); 1559 if (status) 1560 exit(status); 1561 /* clear affinity side-effect of get_counters() */ 1562 sched_setaffinity(0, cpu_present_setsize, cpu_present_set); 1563 gettimeofday(&tv_even, (struct timezone *)NULL); 1564 1565 child_pid = fork(); 1566 if (!child_pid) { 1567 /* child */ 1568 execvp(argv[0], argv); 1569 } else { 1570 1571 /* parent */ 1572 if (child_pid == -1) { 1573 perror("fork"); 1574 exit(1); 1575 } 1576 1577 signal(SIGINT, SIG_IGN); 1578 signal(SIGQUIT, SIG_IGN); 1579 if (waitpid(child_pid, &status, 0) == -1) { 1580 perror("wait"); 1581 exit(status); 1582 } 1583 } 1584 /* 1585 * n.b. fork_it() does not check for errors from for_all_cpus() 1586 * because re-starting is problematic when forking 1587 */ 1588 for_all_cpus(get_counters, ODD_COUNTERS); 1589 gettimeofday(&tv_odd, (struct timezone *)NULL); 1590 timersub(&tv_odd, &tv_even, &tv_delta); 1591 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1592 compute_average(EVEN_COUNTERS); 1593 format_all_counters(EVEN_COUNTERS); 1594 flush_stderr(); 1595 1596 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 1597 1598 return status; 1599 } 1600 1601 void cmdline(int argc, char **argv) 1602 { 1603 int opt; 1604 1605 progname = argv[0]; 1606 1607 while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { 1608 switch (opt) { 1609 case 'p': 1610 show_core_only++; 1611 break; 1612 case 'P': 1613 show_pkg_only++; 1614 break; 1615 case 'S': 1616 summary_only++; 1617 break; 1618 case 'v': 1619 verbose++; 1620 break; 1621 case 'i': 1622 interval_sec = atoi(optarg); 1623 break; 1624 case 'c': 1625 sscanf(optarg, "%x", &extra_delta_offset32); 1626 break; 1627 case 's': 1628 extra_delta_offset32 = 0x34; /* SMI counter */ 1629 break; 1630 case 'C': 1631 sscanf(optarg, "%x", &extra_delta_offset64); 1632 break; 1633 case 'm': 1634 sscanf(optarg, "%x", &extra_msr_offset32); 1635 break; 1636 case 'M': 1637 sscanf(optarg, "%x", &extra_msr_offset64); 1638 break; 1639 default: 1640 usage(); 1641 } 1642 } 1643 } 1644 1645 int main(int argc, char **argv) 1646 { 1647 cmdline(argc, argv); 1648 1649 if (verbose > 1) 1650 fprintf(stderr, "turbostat v2.1 October 6, 2012" 1651 " - Len Brown <lenb@kernel.org>\n"); 1652 1653 turbostat_init(); 1654 1655 /* 1656 * if any params left, it must be a command to fork 1657 */ 1658 if (argc - optind) 1659 return fork_it(argv + optind); 1660 else 1661 turbostat_loop(); 1662 1663 return 0; 1664 } 1665