1 /* 2 * turbostat -- show CPU frequency and C-state residency 3 * on modern Intel turbo-capable processors. 4 * 5 * Copyright (c) 2012 Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms and conditions of the GNU General Public License, 10 * version 2, as published by the Free Software Foundation. 11 * 12 * This program is distributed in the hope it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 * more details. 16 * 17 * You should have received a copy of the GNU General Public License along with 18 * this program; if not, write to the Free Software Foundation, Inc., 19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 20 */ 21 22 #define _GNU_SOURCE 23 #include <stdio.h> 24 #include <unistd.h> 25 #include <sys/types.h> 26 #include <sys/wait.h> 27 #include <sys/stat.h> 28 #include <sys/resource.h> 29 #include <fcntl.h> 30 #include <signal.h> 31 #include <sys/time.h> 32 #include <stdlib.h> 33 #include <dirent.h> 34 #include <string.h> 35 #include <ctype.h> 36 #include <sched.h> 37 38 #define MSR_NEHALEM_PLATFORM_INFO 0xCE 39 #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD 40 #define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE 41 #define MSR_APERF 0xE8 42 #define MSR_MPERF 0xE7 43 #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ 44 #define MSR_PKG_C3_RESIDENCY 0x3F8 45 #define MSR_PKG_C6_RESIDENCY 0x3F9 46 #define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ 47 #define MSR_CORE_C3_RESIDENCY 0x3FC 48 #define MSR_CORE_C6_RESIDENCY 0x3FD 49 #define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ 50 51 char *proc_stat = "/proc/stat"; 52 unsigned int interval_sec = 5; /* set with -i interval_sec */ 53 unsigned int verbose; /* set with -v */ 54 unsigned int summary_only; /* set with -s */ 55 unsigned int skip_c0; 56 unsigned int skip_c1; 57 unsigned int do_nhm_cstates; 58 unsigned int do_snb_cstates; 59 unsigned int has_aperf; 60 unsigned int units = 1000000000; /* Ghz etc */ 61 unsigned int genuine_intel; 62 unsigned int has_invariant_tsc; 63 unsigned int do_nehalem_platform_info; 64 unsigned int do_nehalem_turbo_ratio_limit; 65 unsigned int do_ivt_turbo_ratio_limit; 66 unsigned int extra_msr_offset32; 67 unsigned int extra_msr_offset64; 68 unsigned int extra_delta_offset32; 69 unsigned int extra_delta_offset64; 70 double bclk; 71 unsigned int show_pkg; 72 unsigned int show_core; 73 unsigned int show_cpu; 74 unsigned int show_pkg_only; 75 unsigned int show_core_only; 76 char *output_buffer, *outp; 77 78 int aperf_mperf_unstable; 79 int backwards_count; 80 char *progname; 81 82 cpu_set_t *cpu_present_set, *cpu_affinity_set; 83 size_t cpu_present_setsize, cpu_affinity_setsize; 84 85 struct thread_data { 86 unsigned long long tsc; 87 unsigned long long aperf; 88 unsigned long long mperf; 89 unsigned long long c1; /* derived */ 90 unsigned long long extra_msr64; 91 unsigned long long extra_delta64; 92 unsigned long long extra_msr32; 93 unsigned long long extra_delta32; 94 unsigned int cpu_id; 95 unsigned int flags; 96 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 97 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 98 } *thread_even, *thread_odd; 99 100 struct core_data { 101 unsigned long long c3; 102 unsigned long long c6; 103 unsigned long long c7; 104 unsigned int core_id; 105 } *core_even, *core_odd; 106 107 struct pkg_data { 108 unsigned long long pc2; 109 unsigned long long pc3; 110 unsigned long long pc6; 111 unsigned long long pc7; 112 unsigned int package_id; 113 } *package_even, *package_odd; 114 115 #define ODD_COUNTERS thread_odd, core_odd, package_odd 116 #define EVEN_COUNTERS thread_even, core_even, package_even 117 118 #define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ 119 (thread_base + (pkg_no) * topo.num_cores_per_pkg * \ 120 topo.num_threads_per_core + \ 121 (core_no) * topo.num_threads_per_core + (thread_no)) 122 #define GET_CORE(core_base, core_no, pkg_no) \ 123 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 124 #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 125 126 struct system_summary { 127 struct thread_data threads; 128 struct core_data cores; 129 struct pkg_data packages; 130 } sum, average; 131 132 133 struct topo_params { 134 int num_packages; 135 int num_cpus; 136 int num_cores; 137 int max_cpu_num; 138 int num_cores_per_pkg; 139 int num_threads_per_core; 140 } topo; 141 142 struct timeval tv_even, tv_odd, tv_delta; 143 144 void setup_all_buffers(void); 145 146 int cpu_is_not_present(int cpu) 147 { 148 return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); 149 } 150 /* 151 * run func(thread, core, package) in topology order 152 * skip non-present cpus 153 */ 154 155 int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), 156 struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) 157 { 158 int retval, pkg_no, core_no, thread_no; 159 160 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 161 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 162 for (thread_no = 0; thread_no < 163 topo.num_threads_per_core; ++thread_no) { 164 struct thread_data *t; 165 struct core_data *c; 166 struct pkg_data *p; 167 168 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 169 170 if (cpu_is_not_present(t->cpu_id)) 171 continue; 172 173 c = GET_CORE(core_base, core_no, pkg_no); 174 p = GET_PKG(pkg_base, pkg_no); 175 176 retval = func(t, c, p); 177 if (retval) 178 return retval; 179 } 180 } 181 } 182 return 0; 183 } 184 185 int cpu_migrate(int cpu) 186 { 187 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 188 CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); 189 if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) 190 return -1; 191 else 192 return 0; 193 } 194 195 int get_msr(int cpu, off_t offset, unsigned long long *msr) 196 { 197 ssize_t retval; 198 char pathname[32]; 199 int fd; 200 201 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 202 fd = open(pathname, O_RDONLY); 203 if (fd < 0) 204 return -1; 205 206 retval = pread(fd, msr, sizeof *msr, offset); 207 close(fd); 208 209 if (retval != sizeof *msr) 210 return -1; 211 212 return 0; 213 } 214 215 void print_header(void) 216 { 217 if (show_pkg) 218 outp += sprintf(outp, "pk"); 219 if (show_pkg) 220 outp += sprintf(outp, " "); 221 if (show_core) 222 outp += sprintf(outp, "cor"); 223 if (show_cpu) 224 outp += sprintf(outp, " CPU"); 225 if (show_pkg || show_core || show_cpu) 226 outp += sprintf(outp, " "); 227 if (do_nhm_cstates) 228 outp += sprintf(outp, " %%c0"); 229 if (has_aperf) 230 outp += sprintf(outp, " GHz"); 231 outp += sprintf(outp, " TSC"); 232 if (extra_delta_offset32) 233 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); 234 if (extra_delta_offset64) 235 outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); 236 if (extra_msr_offset32) 237 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); 238 if (extra_msr_offset64) 239 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); 240 if (do_nhm_cstates) 241 outp += sprintf(outp, " %%c1"); 242 if (do_nhm_cstates) 243 outp += sprintf(outp, " %%c3"); 244 if (do_nhm_cstates) 245 outp += sprintf(outp, " %%c6"); 246 if (do_snb_cstates) 247 outp += sprintf(outp, " %%c7"); 248 if (do_snb_cstates) 249 outp += sprintf(outp, " %%pc2"); 250 if (do_nhm_cstates) 251 outp += sprintf(outp, " %%pc3"); 252 if (do_nhm_cstates) 253 outp += sprintf(outp, " %%pc6"); 254 if (do_snb_cstates) 255 outp += sprintf(outp, " %%pc7"); 256 257 outp += sprintf(outp, "\n"); 258 } 259 260 int dump_counters(struct thread_data *t, struct core_data *c, 261 struct pkg_data *p) 262 { 263 fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); 264 265 if (t) { 266 fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); 267 fprintf(stderr, "TSC: %016llX\n", t->tsc); 268 fprintf(stderr, "aperf: %016llX\n", t->aperf); 269 fprintf(stderr, "mperf: %016llX\n", t->mperf); 270 fprintf(stderr, "c1: %016llX\n", t->c1); 271 fprintf(stderr, "msr0x%x: %08llX\n", 272 extra_delta_offset32, t->extra_delta32); 273 fprintf(stderr, "msr0x%x: %016llX\n", 274 extra_delta_offset64, t->extra_delta64); 275 fprintf(stderr, "msr0x%x: %08llX\n", 276 extra_msr_offset32, t->extra_msr32); 277 fprintf(stderr, "msr0x%x: %016llX\n", 278 extra_msr_offset64, t->extra_msr64); 279 } 280 281 if (c) { 282 fprintf(stderr, "core: %d\n", c->core_id); 283 fprintf(stderr, "c3: %016llX\n", c->c3); 284 fprintf(stderr, "c6: %016llX\n", c->c6); 285 fprintf(stderr, "c7: %016llX\n", c->c7); 286 } 287 288 if (p) { 289 fprintf(stderr, "package: %d\n", p->package_id); 290 fprintf(stderr, "pc2: %016llX\n", p->pc2); 291 fprintf(stderr, "pc3: %016llX\n", p->pc3); 292 fprintf(stderr, "pc6: %016llX\n", p->pc6); 293 fprintf(stderr, "pc7: %016llX\n", p->pc7); 294 } 295 return 0; 296 } 297 298 /* 299 * column formatting convention & formats 300 * package: "pk" 2 columns %2d 301 * core: "cor" 3 columns %3d 302 * CPU: "CPU" 3 columns %3d 303 * GHz: "GHz" 3 columns %3.2 304 * TSC: "TSC" 3 columns %3.2 305 * percentage " %pc3" %6.2 306 */ 307 int format_counters(struct thread_data *t, struct core_data *c, 308 struct pkg_data *p) 309 { 310 double interval_float; 311 312 /* if showing only 1st thread in core and this isn't one, bail out */ 313 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 314 return 0; 315 316 /* if showing only 1st thread in pkg and this isn't one, bail out */ 317 if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 318 return 0; 319 320 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; 321 322 /* topo columns, print blanks on 1st (average) line */ 323 if (t == &average.threads) { 324 if (show_pkg) 325 outp += sprintf(outp, " "); 326 if (show_pkg && show_core) 327 outp += sprintf(outp, " "); 328 if (show_core) 329 outp += sprintf(outp, " "); 330 if (show_cpu) 331 outp += sprintf(outp, " " " "); 332 } else { 333 if (show_pkg) { 334 if (p) 335 outp += sprintf(outp, "%2d", p->package_id); 336 else 337 outp += sprintf(outp, " "); 338 } 339 if (show_pkg && show_core) 340 outp += sprintf(outp, " "); 341 if (show_core) { 342 if (c) 343 outp += sprintf(outp, "%3d", c->core_id); 344 else 345 outp += sprintf(outp, " "); 346 } 347 if (show_cpu) 348 outp += sprintf(outp, " %3d", t->cpu_id); 349 } 350 351 /* %c0 */ 352 if (do_nhm_cstates) { 353 if (show_pkg || show_core || show_cpu) 354 outp += sprintf(outp, " "); 355 if (!skip_c0) 356 outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc); 357 else 358 outp += sprintf(outp, " ****"); 359 } 360 361 /* GHz */ 362 if (has_aperf) { 363 if (!aperf_mperf_unstable) { 364 outp += sprintf(outp, " %3.2f", 365 1.0 * t->tsc / units * t->aperf / 366 t->mperf / interval_float); 367 } else { 368 if (t->aperf > t->tsc || t->mperf > t->tsc) { 369 outp += sprintf(outp, " ***"); 370 } else { 371 outp += sprintf(outp, "%3.1f*", 372 1.0 * t->tsc / 373 units * t->aperf / 374 t->mperf / interval_float); 375 } 376 } 377 } 378 379 /* TSC */ 380 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); 381 382 /* delta */ 383 if (extra_delta_offset32) 384 outp += sprintf(outp, " %11llu", t->extra_delta32); 385 386 /* DELTA */ 387 if (extra_delta_offset64) 388 outp += sprintf(outp, " %11llu", t->extra_delta64); 389 /* msr */ 390 if (extra_msr_offset32) 391 outp += sprintf(outp, " 0x%08llx", t->extra_msr32); 392 393 /* MSR */ 394 if (extra_msr_offset64) 395 outp += sprintf(outp, " 0x%016llx", t->extra_msr64); 396 397 if (do_nhm_cstates) { 398 if (!skip_c1) 399 outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); 400 else 401 outp += sprintf(outp, " ****"); 402 } 403 404 /* print per-core data only for 1st thread in core */ 405 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 406 goto done; 407 408 if (do_nhm_cstates) 409 outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc); 410 if (do_nhm_cstates) 411 outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc); 412 if (do_snb_cstates) 413 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); 414 415 /* print per-package data only for 1st core in package */ 416 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 417 goto done; 418 419 if (do_snb_cstates) 420 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); 421 if (do_nhm_cstates) 422 outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc); 423 if (do_nhm_cstates) 424 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); 425 if (do_snb_cstates) 426 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); 427 done: 428 outp += sprintf(outp, "\n"); 429 430 return 0; 431 } 432 433 void flush_stdout() 434 { 435 fputs(output_buffer, stdout); 436 outp = output_buffer; 437 } 438 void flush_stderr() 439 { 440 fputs(output_buffer, stderr); 441 outp = output_buffer; 442 } 443 void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 444 { 445 static int printed; 446 447 if (!printed || !summary_only) 448 print_header(); 449 450 if (topo.num_cpus > 1) 451 format_counters(&average.threads, &average.cores, 452 &average.packages); 453 454 printed = 1; 455 456 if (summary_only) 457 return; 458 459 for_all_cpus(format_counters, t, c, p); 460 } 461 462 void 463 delta_package(struct pkg_data *new, struct pkg_data *old) 464 { 465 old->pc2 = new->pc2 - old->pc2; 466 old->pc3 = new->pc3 - old->pc3; 467 old->pc6 = new->pc6 - old->pc6; 468 old->pc7 = new->pc7 - old->pc7; 469 } 470 471 void 472 delta_core(struct core_data *new, struct core_data *old) 473 { 474 old->c3 = new->c3 - old->c3; 475 old->c6 = new->c6 - old->c6; 476 old->c7 = new->c7 - old->c7; 477 } 478 479 /* 480 * old = new - old 481 */ 482 void 483 delta_thread(struct thread_data *new, struct thread_data *old, 484 struct core_data *core_delta) 485 { 486 old->tsc = new->tsc - old->tsc; 487 488 /* check for TSC < 1 Mcycles over interval */ 489 if (old->tsc < (1000 * 1000)) { 490 fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); 491 fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); 492 fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); 493 exit(-3); 494 } 495 496 old->c1 = new->c1 - old->c1; 497 498 if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { 499 old->aperf = new->aperf - old->aperf; 500 old->mperf = new->mperf - old->mperf; 501 } else { 502 503 if (!aperf_mperf_unstable) { 504 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 505 fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 506 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 507 508 aperf_mperf_unstable = 1; 509 } 510 /* 511 * mperf delta is likely a huge "positive" number 512 * can not use it for calculating c0 time 513 */ 514 skip_c0 = 1; 515 skip_c1 = 1; 516 } 517 518 519 /* 520 * As counter collection is not atomic, 521 * it is possible for mperf's non-halted cycles + idle states 522 * to exceed TSC's all cycles: show c1 = 0% in that case. 523 */ 524 if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) 525 old->c1 = 0; 526 else { 527 /* normal case, derive c1 */ 528 old->c1 = old->tsc - old->mperf - core_delta->c3 529 - core_delta->c6 - core_delta->c7; 530 } 531 532 if (old->mperf == 0) { 533 if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 534 old->mperf = 1; /* divide by 0 protection */ 535 } 536 537 old->extra_delta32 = new->extra_delta32 - old->extra_delta32; 538 old->extra_delta32 &= 0xFFFFFFFF; 539 540 old->extra_delta64 = new->extra_delta64 - old->extra_delta64; 541 542 /* 543 * Extra MSR is just a snapshot, simply copy latest w/o subtracting 544 */ 545 old->extra_msr32 = new->extra_msr32; 546 old->extra_msr64 = new->extra_msr64; 547 } 548 549 int delta_cpu(struct thread_data *t, struct core_data *c, 550 struct pkg_data *p, struct thread_data *t2, 551 struct core_data *c2, struct pkg_data *p2) 552 { 553 /* calculate core delta only for 1st thread in core */ 554 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) 555 delta_core(c, c2); 556 557 /* always calculate thread delta */ 558 delta_thread(t, t2, c2); /* c2 is core delta */ 559 560 /* calculate package delta only for 1st core in package */ 561 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) 562 delta_package(p, p2); 563 564 return 0; 565 } 566 567 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 568 { 569 t->tsc = 0; 570 t->aperf = 0; 571 t->mperf = 0; 572 t->c1 = 0; 573 574 t->extra_delta32 = 0; 575 t->extra_delta64 = 0; 576 577 /* tells format_counters to dump all fields from this set */ 578 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; 579 580 c->c3 = 0; 581 c->c6 = 0; 582 c->c7 = 0; 583 584 p->pc2 = 0; 585 p->pc3 = 0; 586 p->pc6 = 0; 587 p->pc7 = 0; 588 } 589 int sum_counters(struct thread_data *t, struct core_data *c, 590 struct pkg_data *p) 591 { 592 average.threads.tsc += t->tsc; 593 average.threads.aperf += t->aperf; 594 average.threads.mperf += t->mperf; 595 average.threads.c1 += t->c1; 596 597 average.threads.extra_delta32 += t->extra_delta32; 598 average.threads.extra_delta64 += t->extra_delta64; 599 600 /* sum per-core values only for 1st thread in core */ 601 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 602 return 0; 603 604 average.cores.c3 += c->c3; 605 average.cores.c6 += c->c6; 606 average.cores.c7 += c->c7; 607 608 /* sum per-pkg values only for 1st core in pkg */ 609 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 610 return 0; 611 612 average.packages.pc2 += p->pc2; 613 average.packages.pc3 += p->pc3; 614 average.packages.pc6 += p->pc6; 615 average.packages.pc7 += p->pc7; 616 617 return 0; 618 } 619 /* 620 * sum the counters for all cpus in the system 621 * compute the weighted average 622 */ 623 void compute_average(struct thread_data *t, struct core_data *c, 624 struct pkg_data *p) 625 { 626 clear_counters(&average.threads, &average.cores, &average.packages); 627 628 for_all_cpus(sum_counters, t, c, p); 629 630 average.threads.tsc /= topo.num_cpus; 631 average.threads.aperf /= topo.num_cpus; 632 average.threads.mperf /= topo.num_cpus; 633 average.threads.c1 /= topo.num_cpus; 634 635 average.threads.extra_delta32 /= topo.num_cpus; 636 average.threads.extra_delta32 &= 0xFFFFFFFF; 637 638 average.threads.extra_delta64 /= topo.num_cpus; 639 640 average.cores.c3 /= topo.num_cores; 641 average.cores.c6 /= topo.num_cores; 642 average.cores.c7 /= topo.num_cores; 643 644 average.packages.pc2 /= topo.num_packages; 645 average.packages.pc3 /= topo.num_packages; 646 average.packages.pc6 /= topo.num_packages; 647 average.packages.pc7 /= topo.num_packages; 648 } 649 650 static unsigned long long rdtsc(void) 651 { 652 unsigned int low, high; 653 654 asm volatile("rdtsc" : "=a" (low), "=d" (high)); 655 656 return low | ((unsigned long long)high) << 32; 657 } 658 659 660 /* 661 * get_counters(...) 662 * migrate to cpu 663 * acquire and record local counters for that cpu 664 */ 665 int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 666 { 667 int cpu = t->cpu_id; 668 669 if (cpu_migrate(cpu)) 670 return -1; 671 672 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 673 674 if (has_aperf) { 675 if (get_msr(cpu, MSR_APERF, &t->aperf)) 676 return -3; 677 if (get_msr(cpu, MSR_MPERF, &t->mperf)) 678 return -4; 679 } 680 681 if (extra_delta_offset32) { 682 if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) 683 return -5; 684 t->extra_delta32 &= 0xFFFFFFFF; 685 } 686 687 if (extra_delta_offset64) 688 if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) 689 return -5; 690 691 if (extra_msr_offset32) { 692 if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) 693 return -5; 694 t->extra_msr32 &= 0xFFFFFFFF; 695 } 696 697 if (extra_msr_offset64) 698 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) 699 return -5; 700 701 /* collect core counters only for 1st thread in core */ 702 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 703 return 0; 704 705 if (do_nhm_cstates) { 706 if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) 707 return -6; 708 if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) 709 return -7; 710 } 711 712 if (do_snb_cstates) 713 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 714 return -8; 715 716 /* collect package counters only for 1st core in package */ 717 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 718 return 0; 719 720 if (do_nhm_cstates) { 721 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 722 return -9; 723 if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) 724 return -10; 725 } 726 if (do_snb_cstates) { 727 if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) 728 return -11; 729 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 730 return -12; 731 } 732 return 0; 733 } 734 735 void print_verbose_header(void) 736 { 737 unsigned long long msr; 738 unsigned int ratio; 739 740 if (!do_nehalem_platform_info) 741 return; 742 743 get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); 744 745 if (verbose > 1) 746 fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); 747 748 ratio = (msr >> 40) & 0xFF; 749 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 750 ratio, bclk, ratio * bclk); 751 752 ratio = (msr >> 8) & 0xFF; 753 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 754 ratio, bclk, ratio * bclk); 755 756 if (!do_ivt_turbo_ratio_limit) 757 goto print_nhm_turbo_ratio_limits; 758 759 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 760 761 if (verbose > 1) 762 fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 763 764 ratio = (msr >> 56) & 0xFF; 765 if (ratio) 766 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 767 ratio, bclk, ratio * bclk); 768 769 ratio = (msr >> 48) & 0xFF; 770 if (ratio) 771 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 772 ratio, bclk, ratio * bclk); 773 774 ratio = (msr >> 40) & 0xFF; 775 if (ratio) 776 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 777 ratio, bclk, ratio * bclk); 778 779 ratio = (msr >> 32) & 0xFF; 780 if (ratio) 781 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 782 ratio, bclk, ratio * bclk); 783 784 ratio = (msr >> 24) & 0xFF; 785 if (ratio) 786 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 787 ratio, bclk, ratio * bclk); 788 789 ratio = (msr >> 16) & 0xFF; 790 if (ratio) 791 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 792 ratio, bclk, ratio * bclk); 793 794 ratio = (msr >> 8) & 0xFF; 795 if (ratio) 796 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 797 ratio, bclk, ratio * bclk); 798 799 ratio = (msr >> 0) & 0xFF; 800 if (ratio) 801 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 802 ratio, bclk, ratio * bclk); 803 804 print_nhm_turbo_ratio_limits: 805 806 if (!do_nehalem_turbo_ratio_limit) 807 return; 808 809 get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); 810 811 if (verbose > 1) 812 fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 813 814 ratio = (msr >> 56) & 0xFF; 815 if (ratio) 816 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 817 ratio, bclk, ratio * bclk); 818 819 ratio = (msr >> 48) & 0xFF; 820 if (ratio) 821 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 822 ratio, bclk, ratio * bclk); 823 824 ratio = (msr >> 40) & 0xFF; 825 if (ratio) 826 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 827 ratio, bclk, ratio * bclk); 828 829 ratio = (msr >> 32) & 0xFF; 830 if (ratio) 831 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 832 ratio, bclk, ratio * bclk); 833 834 ratio = (msr >> 24) & 0xFF; 835 if (ratio) 836 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 837 ratio, bclk, ratio * bclk); 838 839 ratio = (msr >> 16) & 0xFF; 840 if (ratio) 841 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 842 ratio, bclk, ratio * bclk); 843 844 ratio = (msr >> 8) & 0xFF; 845 if (ratio) 846 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 847 ratio, bclk, ratio * bclk); 848 849 ratio = (msr >> 0) & 0xFF; 850 if (ratio) 851 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 852 ratio, bclk, ratio * bclk); 853 } 854 855 void free_all_buffers(void) 856 { 857 CPU_FREE(cpu_present_set); 858 cpu_present_set = NULL; 859 cpu_present_set = 0; 860 861 CPU_FREE(cpu_affinity_set); 862 cpu_affinity_set = NULL; 863 cpu_affinity_setsize = 0; 864 865 free(thread_even); 866 free(core_even); 867 free(package_even); 868 869 thread_even = NULL; 870 core_even = NULL; 871 package_even = NULL; 872 873 free(thread_odd); 874 free(core_odd); 875 free(package_odd); 876 877 thread_odd = NULL; 878 core_odd = NULL; 879 package_odd = NULL; 880 881 free(output_buffer); 882 output_buffer = NULL; 883 outp = NULL; 884 } 885 886 /* 887 * cpu_is_first_sibling_in_core(cpu) 888 * return 1 if given CPU is 1st HT sibling in the core 889 */ 890 int cpu_is_first_sibling_in_core(int cpu) 891 { 892 char path[64]; 893 FILE *filep; 894 int first_cpu; 895 896 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 897 filep = fopen(path, "r"); 898 if (filep == NULL) { 899 perror(path); 900 exit(1); 901 } 902 fscanf(filep, "%d", &first_cpu); 903 fclose(filep); 904 return (cpu == first_cpu); 905 } 906 907 /* 908 * cpu_is_first_core_in_package(cpu) 909 * return 1 if given CPU is 1st core in package 910 */ 911 int cpu_is_first_core_in_package(int cpu) 912 { 913 char path[64]; 914 FILE *filep; 915 int first_cpu; 916 917 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); 918 filep = fopen(path, "r"); 919 if (filep == NULL) { 920 perror(path); 921 exit(1); 922 } 923 fscanf(filep, "%d", &first_cpu); 924 fclose(filep); 925 return (cpu == first_cpu); 926 } 927 928 int get_physical_package_id(int cpu) 929 { 930 char path[80]; 931 FILE *filep; 932 int pkg; 933 934 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); 935 filep = fopen(path, "r"); 936 if (filep == NULL) { 937 perror(path); 938 exit(1); 939 } 940 fscanf(filep, "%d", &pkg); 941 fclose(filep); 942 return pkg; 943 } 944 945 int get_core_id(int cpu) 946 { 947 char path[80]; 948 FILE *filep; 949 int core; 950 951 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); 952 filep = fopen(path, "r"); 953 if (filep == NULL) { 954 perror(path); 955 exit(1); 956 } 957 fscanf(filep, "%d", &core); 958 fclose(filep); 959 return core; 960 } 961 962 int get_num_ht_siblings(int cpu) 963 { 964 char path[80]; 965 FILE *filep; 966 int sib1, sib2; 967 int matches; 968 char character; 969 970 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); 971 filep = fopen(path, "r"); 972 if (filep == NULL) { 973 perror(path); 974 exit(1); 975 } 976 /* 977 * file format: 978 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) 979 * otherwinse 1 sibling (self). 980 */ 981 matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); 982 983 fclose(filep); 984 985 if (matches == 3) 986 return 2; 987 else 988 return 1; 989 } 990 991 /* 992 * run func(thread, core, package) in topology order 993 * skip non-present cpus 994 */ 995 996 int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, 997 struct pkg_data *, struct thread_data *, struct core_data *, 998 struct pkg_data *), struct thread_data *thread_base, 999 struct core_data *core_base, struct pkg_data *pkg_base, 1000 struct thread_data *thread_base2, struct core_data *core_base2, 1001 struct pkg_data *pkg_base2) 1002 { 1003 int retval, pkg_no, core_no, thread_no; 1004 1005 for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { 1006 for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { 1007 for (thread_no = 0; thread_no < 1008 topo.num_threads_per_core; ++thread_no) { 1009 struct thread_data *t, *t2; 1010 struct core_data *c, *c2; 1011 struct pkg_data *p, *p2; 1012 1013 t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); 1014 1015 if (cpu_is_not_present(t->cpu_id)) 1016 continue; 1017 1018 t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); 1019 1020 c = GET_CORE(core_base, core_no, pkg_no); 1021 c2 = GET_CORE(core_base2, core_no, pkg_no); 1022 1023 p = GET_PKG(pkg_base, pkg_no); 1024 p2 = GET_PKG(pkg_base2, pkg_no); 1025 1026 retval = func(t, c, p, t2, c2, p2); 1027 if (retval) 1028 return retval; 1029 } 1030 } 1031 } 1032 return 0; 1033 } 1034 1035 /* 1036 * run func(cpu) on every cpu in /proc/stat 1037 * return max_cpu number 1038 */ 1039 int for_all_proc_cpus(int (func)(int)) 1040 { 1041 FILE *fp; 1042 int cpu_num; 1043 int retval; 1044 1045 fp = fopen(proc_stat, "r"); 1046 if (fp == NULL) { 1047 perror(proc_stat); 1048 exit(1); 1049 } 1050 1051 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); 1052 if (retval != 0) { 1053 perror("/proc/stat format"); 1054 exit(1); 1055 } 1056 1057 while (1) { 1058 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); 1059 if (retval != 1) 1060 break; 1061 1062 retval = func(cpu_num); 1063 if (retval) { 1064 fclose(fp); 1065 return(retval); 1066 } 1067 } 1068 fclose(fp); 1069 return 0; 1070 } 1071 1072 void re_initialize(void) 1073 { 1074 free_all_buffers(); 1075 setup_all_buffers(); 1076 printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); 1077 } 1078 1079 1080 /* 1081 * count_cpus() 1082 * remember the last one seen, it will be the max 1083 */ 1084 int count_cpus(int cpu) 1085 { 1086 if (topo.max_cpu_num < cpu) 1087 topo.max_cpu_num = cpu; 1088 1089 topo.num_cpus += 1; 1090 return 0; 1091 } 1092 int mark_cpu_present(int cpu) 1093 { 1094 CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); 1095 return 0; 1096 } 1097 1098 void turbostat_loop() 1099 { 1100 int retval; 1101 1102 restart: 1103 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1104 if (retval) { 1105 re_initialize(); 1106 goto restart; 1107 } 1108 gettimeofday(&tv_even, (struct timezone *)NULL); 1109 1110 while (1) { 1111 if (for_all_proc_cpus(cpu_is_not_present)) { 1112 re_initialize(); 1113 goto restart; 1114 } 1115 sleep(interval_sec); 1116 retval = for_all_cpus(get_counters, ODD_COUNTERS); 1117 if (retval) { 1118 re_initialize(); 1119 goto restart; 1120 } 1121 gettimeofday(&tv_odd, (struct timezone *)NULL); 1122 timersub(&tv_odd, &tv_even, &tv_delta); 1123 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1124 compute_average(EVEN_COUNTERS); 1125 format_all_counters(EVEN_COUNTERS); 1126 flush_stdout(); 1127 sleep(interval_sec); 1128 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1129 if (retval) { 1130 re_initialize(); 1131 goto restart; 1132 } 1133 gettimeofday(&tv_even, (struct timezone *)NULL); 1134 timersub(&tv_even, &tv_odd, &tv_delta); 1135 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 1136 compute_average(ODD_COUNTERS); 1137 format_all_counters(ODD_COUNTERS); 1138 flush_stdout(); 1139 } 1140 } 1141 1142 void check_dev_msr() 1143 { 1144 struct stat sb; 1145 1146 if (stat("/dev/cpu/0/msr", &sb)) { 1147 fprintf(stderr, "no /dev/cpu/0/msr\n"); 1148 fprintf(stderr, "Try \"# modprobe msr\"\n"); 1149 exit(-5); 1150 } 1151 } 1152 1153 void check_super_user() 1154 { 1155 if (getuid() != 0) { 1156 fprintf(stderr, "must be root\n"); 1157 exit(-6); 1158 } 1159 } 1160 1161 int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) 1162 { 1163 if (!genuine_intel) 1164 return 0; 1165 1166 if (family != 6) 1167 return 0; 1168 1169 switch (model) { 1170 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 1171 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 1172 case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 1173 case 0x25: /* Westmere Client - Clarkdale, Arrandale */ 1174 case 0x2C: /* Westmere EP - Gulftown */ 1175 case 0x2A: /* SNB */ 1176 case 0x2D: /* SNB Xeon */ 1177 case 0x3A: /* IVB */ 1178 case 0x3E: /* IVB Xeon */ 1179 return 1; 1180 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 1181 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1182 default: 1183 return 0; 1184 } 1185 } 1186 int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) 1187 { 1188 if (!genuine_intel) 1189 return 0; 1190 1191 if (family != 6) 1192 return 0; 1193 1194 switch (model) { 1195 case 0x3E: /* IVB Xeon */ 1196 return 1; 1197 default: 1198 return 0; 1199 } 1200 } 1201 1202 1203 int is_snb(unsigned int family, unsigned int model) 1204 { 1205 if (!genuine_intel) 1206 return 0; 1207 1208 switch (model) { 1209 case 0x2A: 1210 case 0x2D: 1211 case 0x3A: /* IVB */ 1212 case 0x3E: /* IVB Xeon */ 1213 return 1; 1214 } 1215 return 0; 1216 } 1217 1218 double discover_bclk(unsigned int family, unsigned int model) 1219 { 1220 if (is_snb(family, model)) 1221 return 100.00; 1222 else 1223 return 133.33; 1224 } 1225 1226 void check_cpuid() 1227 { 1228 unsigned int eax, ebx, ecx, edx, max_level; 1229 unsigned int fms, family, model, stepping; 1230 1231 eax = ebx = ecx = edx = 0; 1232 1233 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); 1234 1235 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 1236 genuine_intel = 1; 1237 1238 if (verbose) 1239 fprintf(stderr, "%.4s%.4s%.4s ", 1240 (char *)&ebx, (char *)&edx, (char *)&ecx); 1241 1242 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); 1243 family = (fms >> 8) & 0xf; 1244 model = (fms >> 4) & 0xf; 1245 stepping = fms & 0xf; 1246 if (family == 6 || family == 0xf) 1247 model += ((fms >> 16) & 0xf) << 4; 1248 1249 if (verbose) 1250 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 1251 max_level, family, model, stepping, family, model, stepping); 1252 1253 if (!(edx & (1 << 5))) { 1254 fprintf(stderr, "CPUID: no MSR\n"); 1255 exit(1); 1256 } 1257 1258 /* 1259 * check max extended function levels of CPUID. 1260 * This is needed to check for invariant TSC. 1261 * This check is valid for both Intel and AMD. 1262 */ 1263 ebx = ecx = edx = 0; 1264 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); 1265 1266 if (max_level < 0x80000007) { 1267 fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); 1268 exit(1); 1269 } 1270 1271 /* 1272 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 1273 * this check is valid for both Intel and AMD 1274 */ 1275 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); 1276 has_invariant_tsc = edx & (1 << 8); 1277 1278 if (!has_invariant_tsc) { 1279 fprintf(stderr, "No invariant TSC\n"); 1280 exit(1); 1281 } 1282 1283 /* 1284 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 1285 * this check is valid for both Intel and AMD 1286 */ 1287 1288 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); 1289 has_aperf = ecx & (1 << 0); 1290 if (!has_aperf) { 1291 fprintf(stderr, "No APERF MSR\n"); 1292 exit(1); 1293 } 1294 1295 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1296 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1297 do_snb_cstates = is_snb(family, model); 1298 bclk = discover_bclk(family, model); 1299 1300 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); 1301 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); 1302 } 1303 1304 1305 void usage() 1306 { 1307 fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", 1308 progname); 1309 exit(1); 1310 } 1311 1312 1313 /* 1314 * in /dev/cpu/ return success for names that are numbers 1315 * ie. filter out ".", "..", "microcode". 1316 */ 1317 int dir_filter(const struct dirent *dirp) 1318 { 1319 if (isdigit(dirp->d_name[0])) 1320 return 1; 1321 else 1322 return 0; 1323 } 1324 1325 int open_dev_cpu_msr(int dummy1) 1326 { 1327 return 0; 1328 } 1329 1330 void topology_probe() 1331 { 1332 int i; 1333 int max_core_id = 0; 1334 int max_package_id = 0; 1335 int max_siblings = 0; 1336 struct cpu_topology { 1337 int core_id; 1338 int physical_package_id; 1339 } *cpus; 1340 1341 /* Initialize num_cpus, max_cpu_num */ 1342 topo.num_cpus = 0; 1343 topo.max_cpu_num = 0; 1344 for_all_proc_cpus(count_cpus); 1345 if (!summary_only && topo.num_cpus > 1) 1346 show_cpu = 1; 1347 1348 if (verbose > 1) 1349 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 1350 1351 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 1352 if (cpus == NULL) { 1353 perror("calloc cpus"); 1354 exit(1); 1355 } 1356 1357 /* 1358 * Allocate and initialize cpu_present_set 1359 */ 1360 cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1361 if (cpu_present_set == NULL) { 1362 perror("CPU_ALLOC"); 1363 exit(3); 1364 } 1365 cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1366 CPU_ZERO_S(cpu_present_setsize, cpu_present_set); 1367 for_all_proc_cpus(mark_cpu_present); 1368 1369 /* 1370 * Allocate and initialize cpu_affinity_set 1371 */ 1372 cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); 1373 if (cpu_affinity_set == NULL) { 1374 perror("CPU_ALLOC"); 1375 exit(3); 1376 } 1377 cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); 1378 CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); 1379 1380 1381 /* 1382 * For online cpus 1383 * find max_core_id, max_package_id 1384 */ 1385 for (i = 0; i <= topo.max_cpu_num; ++i) { 1386 int siblings; 1387 1388 if (cpu_is_not_present(i)) { 1389 if (verbose > 1) 1390 fprintf(stderr, "cpu%d NOT PRESENT\n", i); 1391 continue; 1392 } 1393 cpus[i].core_id = get_core_id(i); 1394 if (cpus[i].core_id > max_core_id) 1395 max_core_id = cpus[i].core_id; 1396 1397 cpus[i].physical_package_id = get_physical_package_id(i); 1398 if (cpus[i].physical_package_id > max_package_id) 1399 max_package_id = cpus[i].physical_package_id; 1400 1401 siblings = get_num_ht_siblings(i); 1402 if (siblings > max_siblings) 1403 max_siblings = siblings; 1404 if (verbose > 1) 1405 fprintf(stderr, "cpu %d pkg %d core %d\n", 1406 i, cpus[i].physical_package_id, cpus[i].core_id); 1407 } 1408 topo.num_cores_per_pkg = max_core_id + 1; 1409 if (verbose > 1) 1410 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 1411 max_core_id, topo.num_cores_per_pkg); 1412 if (!summary_only && topo.num_cores_per_pkg > 1) 1413 show_core = 1; 1414 1415 topo.num_packages = max_package_id + 1; 1416 if (verbose > 1) 1417 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 1418 max_package_id, topo.num_packages); 1419 if (!summary_only && topo.num_packages > 1) 1420 show_pkg = 1; 1421 1422 topo.num_threads_per_core = max_siblings; 1423 if (verbose > 1) 1424 fprintf(stderr, "max_siblings %d\n", max_siblings); 1425 1426 free(cpus); 1427 } 1428 1429 void 1430 allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) 1431 { 1432 int i; 1433 1434 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 1435 topo.num_packages, sizeof(struct thread_data)); 1436 if (*t == NULL) 1437 goto error; 1438 1439 for (i = 0; i < topo.num_threads_per_core * 1440 topo.num_cores_per_pkg * topo.num_packages; i++) 1441 (*t)[i].cpu_id = -1; 1442 1443 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 1444 sizeof(struct core_data)); 1445 if (*c == NULL) 1446 goto error; 1447 1448 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 1449 (*c)[i].core_id = -1; 1450 1451 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 1452 if (*p == NULL) 1453 goto error; 1454 1455 for (i = 0; i < topo.num_packages; i++) 1456 (*p)[i].package_id = i; 1457 1458 return; 1459 error: 1460 perror("calloc counters"); 1461 exit(1); 1462 } 1463 /* 1464 * init_counter() 1465 * 1466 * set cpu_id, core_num, pkg_num 1467 * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE 1468 * 1469 * increment topo.num_cores when 1st core in pkg seen 1470 */ 1471 void init_counter(struct thread_data *thread_base, struct core_data *core_base, 1472 struct pkg_data *pkg_base, int thread_num, int core_num, 1473 int pkg_num, int cpu_id) 1474 { 1475 struct thread_data *t; 1476 struct core_data *c; 1477 struct pkg_data *p; 1478 1479 t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); 1480 c = GET_CORE(core_base, core_num, pkg_num); 1481 p = GET_PKG(pkg_base, pkg_num); 1482 1483 t->cpu_id = cpu_id; 1484 if (thread_num == 0) { 1485 t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; 1486 if (cpu_is_first_core_in_package(cpu_id)) 1487 t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; 1488 } 1489 1490 c->core_id = core_num; 1491 p->package_id = pkg_num; 1492 } 1493 1494 1495 int initialize_counters(int cpu_id) 1496 { 1497 int my_thread_id, my_core_id, my_package_id; 1498 1499 my_package_id = get_physical_package_id(cpu_id); 1500 my_core_id = get_core_id(cpu_id); 1501 1502 if (cpu_is_first_sibling_in_core(cpu_id)) { 1503 my_thread_id = 0; 1504 topo.num_cores++; 1505 } else { 1506 my_thread_id = 1; 1507 } 1508 1509 init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 1510 init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); 1511 return 0; 1512 } 1513 1514 void allocate_output_buffer() 1515 { 1516 output_buffer = calloc(1, (1 + topo.num_cpus) * 128); 1517 outp = output_buffer; 1518 if (outp == NULL) { 1519 perror("calloc"); 1520 exit(-1); 1521 } 1522 } 1523 1524 void setup_all_buffers(void) 1525 { 1526 topology_probe(); 1527 allocate_counters(&thread_even, &core_even, &package_even); 1528 allocate_counters(&thread_odd, &core_odd, &package_odd); 1529 allocate_output_buffer(); 1530 for_all_proc_cpus(initialize_counters); 1531 } 1532 void turbostat_init() 1533 { 1534 check_cpuid(); 1535 1536 check_dev_msr(); 1537 check_super_user(); 1538 1539 setup_all_buffers(); 1540 1541 if (verbose) 1542 print_verbose_header(); 1543 } 1544 1545 int fork_it(char **argv) 1546 { 1547 pid_t child_pid; 1548 1549 for_all_cpus(get_counters, EVEN_COUNTERS); 1550 /* clear affinity side-effect of get_counters() */ 1551 sched_setaffinity(0, cpu_present_setsize, cpu_present_set); 1552 gettimeofday(&tv_even, (struct timezone *)NULL); 1553 1554 child_pid = fork(); 1555 if (!child_pid) { 1556 /* child */ 1557 execvp(argv[0], argv); 1558 } else { 1559 int status; 1560 1561 /* parent */ 1562 if (child_pid == -1) { 1563 perror("fork"); 1564 exit(1); 1565 } 1566 1567 signal(SIGINT, SIG_IGN); 1568 signal(SIGQUIT, SIG_IGN); 1569 if (waitpid(child_pid, &status, 0) == -1) { 1570 perror("wait"); 1571 exit(1); 1572 } 1573 } 1574 /* 1575 * n.b. fork_it() does not check for errors from for_all_cpus() 1576 * because re-starting is problematic when forking 1577 */ 1578 for_all_cpus(get_counters, ODD_COUNTERS); 1579 gettimeofday(&tv_odd, (struct timezone *)NULL); 1580 timersub(&tv_odd, &tv_even, &tv_delta); 1581 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 1582 compute_average(EVEN_COUNTERS); 1583 format_all_counters(EVEN_COUNTERS); 1584 flush_stderr(); 1585 1586 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 1587 1588 return 0; 1589 } 1590 1591 void cmdline(int argc, char **argv) 1592 { 1593 int opt; 1594 1595 progname = argv[0]; 1596 1597 while ((opt = getopt(argc, argv, "+pPSvisc:sC:m:M:")) != -1) { 1598 switch (opt) { 1599 case 'p': 1600 show_core_only++; 1601 break; 1602 case 'P': 1603 show_pkg_only++; 1604 break; 1605 case 'S': 1606 summary_only++; 1607 break; 1608 case 'v': 1609 verbose++; 1610 break; 1611 case 'i': 1612 interval_sec = atoi(optarg); 1613 break; 1614 case 'c': 1615 sscanf(optarg, "%x", &extra_delta_offset32); 1616 break; 1617 case 's': 1618 extra_delta_offset32 = 0x34; /* SMI counter */ 1619 break; 1620 case 'C': 1621 sscanf(optarg, "%x", &extra_delta_offset64); 1622 break; 1623 case 'm': 1624 sscanf(optarg, "%x", &extra_msr_offset32); 1625 break; 1626 case 'M': 1627 sscanf(optarg, "%x", &extra_msr_offset64); 1628 break; 1629 default: 1630 usage(); 1631 } 1632 } 1633 } 1634 1635 int main(int argc, char **argv) 1636 { 1637 cmdline(argc, argv); 1638 1639 if (verbose > 1) 1640 fprintf(stderr, "turbostat v2.1 October 6, 2012" 1641 " - Len Brown <lenb@kernel.org>\n"); 1642 1643 turbostat_init(); 1644 1645 /* 1646 * if any params left, it must be a command to fork 1647 */ 1648 if (argc - optind) 1649 return fork_it(argv + optind); 1650 else 1651 turbostat_loop(); 1652 1653 return 0; 1654 } 1655