1#!/usr/perl5/bin/perl 2# 3# CDDL HEADER START 4# 5# The contents of this file are subject to the terms of the 6# Common Development and Distribution License (the "License"). 7# You may not use this file except in compliance with the License. 8# 9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10# or http://www.opensolaris.org/os/licensing. 11# See the License for the specific language governing permissions 12# and limitations under the License. 13# 14# When distributing Covered Code, include this CDDL HEADER in each 15# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16# If applicable, add the following below this CDDL HEADER, with the 17# fields enclosed by brackets "[]" replaced with your own identifying 18# information: Portions Copyright [yyyy] [name of copyright owner] 19# 20# CDDL HEADER END 21# 22 23# 24# Copyright 2006 Sun Microsystems, Inc. All rights reserved. 25# Use is subject to license terms. 26# 27#ident "%Z%%M% %I% %E% SMI" 28# 29 30require 5.6.1; 31use strict; 32use warnings; 33use POSIX; 34use File::Basename("basename"); 35 36my $cmdname = basename($0); 37 38my $using_scengen = 0; # 1 if using scenario simulator 39my $debug = 0; 40 41my $normal_sleeptime = 10; # time to sleep between samples 42my $idle_sleeptime = 45; # time to sleep when idle 43my $onecpu_sleeptime = (60 * 15); # used if only 1 CPU on system 44my $sleeptime = $normal_sleeptime; # either normal_ or idle_ or onecpu_ 45 46my $idle_intrload = .1; # idle if interrupt load < 10% 47 48my $timerange_toohi = .01; 49my $statslen = 60; # time period (in secs) to keep in @deltas 50 51 52# Parse arguments. intrd does not accept any public arguments; the two 53# arguments below are meant for testing purposes. -D generates a significant 54# amount of syslog output. -S <filename> loads the filename as a perl 55# script. That file is expected to implement a kstat "simulator" which 56# can be used to feed information to intrd and verify intrd's responses. 57 58while ($_ = shift @ARGV) { 59 if ($_ eq "-S" && $#ARGV != -1) { 60 $using_scengen = 1; 61 do $ARGV[0]; # load simulator 62 shift @ARGV; 63 } elsif ($_ eq "-D") { 64 $debug = 1; 65 } 66} 67 68if ($using_scengen == 0) { 69 require Sun::Solaris::Kstat; 70 require Sun::Solaris::Intrs; 71 import Sun::Solaris::Intrs(qw(intrmove)); 72 require Sys::Syslog; 73 import Sys::Syslog; 74 openlog($cmdname, 'pid', 'daemon'); 75 setlogmask(Sys::Syslog::LOG_UPTO($debug > 0 ? &Sys::Syslog::LOG_DEBUG : 76 &Sys::Syslog::LOG_INFO)); 77} 78 79 80my $asserted = 0; 81my $assert_level = 'debug'; # syslog level for assertion failures 82sub VERIFY($@) 83{ 84 my $bad = (shift() == 0); # $_[0] == 0 means assert failed 85 if ($bad) { 86 my $msg = shift(); 87 syslog($assert_level, "VERIFY: $msg", @_); 88 $asserted++; 89 } 90 return ($bad); 91} 92 93 94 95 96sub getstat($); 97sub generate_delta($$); 98sub compress_deltas($); 99sub dumpdelta($); 100 101sub goodness($); 102sub imbalanced($$); 103sub do_reconfig($); 104 105sub goodness_cpu($$); # private function 106sub move_intr($$$$); # private function 107sub ivecs_to_string(@); # private function 108sub do_find_goal($$$$); # private function 109sub find_goal($$); # private function 110sub do_reconfig_cpu2cpu($$$$); # private function 111sub do_reconfig_cpu($$$); # private function 112 113 114# 115# What follow are the basic data structures routines of intrd. 116# 117# getstat() is responsible for reading the kstats and generating a "stat" hash. 118# 119# generate_delta() is responsible for taking two "stat" hashes and creating 120# a new "delta" hash that represents what has changed over time. 121# 122# compress_deltas() is responsible for taking a list of deltas and generating 123# a single delta hash that encompasses all the time periods described by the 124# deltas. 125 126 127# 128# getstat() is handed a reference to a kstat and generates a hash, returned 129# by reference, containing all the fields from the kstats which we need. 130# If it returns the scalar 0, it failed to gather the kstats, and the caller 131# should react accordingly. 132# 133# getstat() is also responsible for maintaining a reasonable $sleeptime. 134# 135# {"snaptime"} kstat's snaptime 136# {<cpuid>} one hash reference per online cpu 137# ->{"tot"} == cpu:<cpuid>:sys:cpu_nsec_{user + kernel + idle} 138# ->{"crtime"} == cpu:<cpuid>:sys:crtime 139# ->{"ivecs"} 140# ->{<cookie#>} iterates over pci_intrs::<nexus>:cookie 141# ->{"time"} == pci_intrs:<ivec#>:<nexus>:time (in nsec) 142# ->{"pil"} == pci_intrs:<ivec#>:<nexus>:pil 143# ->{"crtime"} == pci_intrs:<ivec#>:<nexus>:crtime 144# ->{"ino"} == pci_intrs:<ivec#>:<nexus>:ino 145# ->{"buspath"} == pci_intrs:<ivec#>:<nexus>:buspath 146# ->{"name"} == pci_intrs:<ivec#>:<nexus>:name 147# ->{"ihs"} == pci_intrs:<ivec#>:<nexus>:ihs 148# 149 150sub getstat($) 151{ 152 my ($ks) = @_; 153 154 my $cpucnt = 0; 155 my %stat = (); 156 my ($minsnap, $maxsnap); 157 158 # kstats are not generated atomically. Each kstat hierarchy will 159 # have been generated within the kernel at a different time. On a 160 # thrashing system, we may not run quickly enough in order to get 161 # coherent kstat timing information across all the kstats. To 162 # determine if this is occurring, $minsnap/$maxsnap are used to 163 # find the breadth between the first and last snaptime of all the 164 # kstats we access. $maxsnap - $minsnap roughly represents the 165 # total time taken up in getstat(). If this time approaches the 166 # time between snapshots, our results may not be useful. 167 168 $minsnap = -1; # snaptime is always a positive number 169 $maxsnap = $minsnap; 170 171 # Iterate over the cpus in cpu:<cpuid>::. Check 172 # cpu_info:<cpuid>:cpu_info<cpuid>:state to make sure the 173 # processor is "on-line". If not, it isn't accepting interrupts 174 # and doesn't concern us. 175 # 176 # Record cpu:<cpuid>:sys:snaptime, and check $minsnap/$maxsnap. 177 178 while (my ($cpu, $cpst) = each %{$ks->{cpu}}) { 179 next if !exists($ks->{cpu_info}{$cpu}{"cpu_info$cpu"}{state}); 180 my $state = $ks->{cpu_info}{$cpu}{"cpu_info$cpu"}{state}; 181 next if ($state !~ /^on-line\0/); 182 my $cpu_sys = $cpst->{sys}; 183 184 $stat{$cpu}{tot} = ($cpu_sys->{cpu_nsec_idle} + 185 $cpu_sys->{cpu_nsec_user} + 186 $cpu_sys->{cpu_nsec_kernel}); 187 $stat{$cpu}{crtime} = $cpu_sys->{crtime}; 188 $stat{$cpu}{ivecs} = {}; 189 190 if ($minsnap == -1 || $cpu_sys->{snaptime} < $minsnap) { 191 $minsnap = $cpu_sys->{snaptime}; 192 } 193 if ($cpu_sys->{snaptime} > $maxsnap) { 194 $maxsnap = $cpu_sys->{snaptime}; 195 } 196 $cpucnt++; 197 } 198 199 if ($cpucnt <= 1) { 200 $sleeptime = $onecpu_sleeptime; 201 return (0); # nothing to do with 1 CPU 202 } 203 204 # Iterate over the ivecs. If the cpu is not on-line, ignore the 205 # ivecs mapped to it, if any. 206 # 207 # Record pci_intrs:{inum}:<nexus>:time, snaptime, crtime, pil, 208 # ino, name, and buspath. Check $minsnap/$maxsnap. 209 210 foreach my $inst (values(%{$ks->{pci_intrs}})) { 211 my $intrcfg = (values(%$inst))[0]; 212 my $cpu = $intrcfg->{cpu}; 213 214 next unless exists $stat{$cpu}; 215 next if ($intrcfg->{type} =~ /^disabled\0/); 216 217 if ($intrcfg->{snaptime} < $minsnap) { 218 $minsnap = $intrcfg->{snaptime}; 219 } elsif ($intrcfg->{snaptime} > $maxsnap) { 220 $maxsnap = $intrcfg->{snaptime}; 221 } 222 223 my $cookie = "$intrcfg->{buspath} $intrcfg->{ino}"; 224 if (exists $stat{$cpu}{ivecs}{$cookie}) { 225 my $cookiestats = $stat{$cpu}{ivecs}{$cookie}; 226 227 $cookiestats->{time} += $intrcfg->{time}; 228 $cookiestats->{name} .= "/$intrcfg->{name}"; 229 230 # If this new interrupt sharing $cookie represents a 231 # change from an earlier getstat, make sure that 232 # generate_delta will see the change by setting 233 # crtime to the most recent crtime of its components. 234 235 if ($intrcfg->{crtime} > $cookiestats->{crtime}) { 236 $cookiestats->{crtime} = $intrcfg->{crtime}; 237 } 238 $cookiestats->{ihs}++; 239 next; 240 } 241 $stat{$cpu}{ivecs}{$cookie}{time} = $intrcfg->{time}; 242 $stat{$cpu}{ivecs}{$cookie}{crtime} = $intrcfg->{crtime}; 243 $stat{$cpu}{ivecs}{$cookie}{pil} = $intrcfg->{pil}; 244 $stat{$cpu}{ivecs}{$cookie}{ino} = $intrcfg->{ino}; 245 $stat{$cpu}{ivecs}{$cookie}{buspath} = $intrcfg->{buspath}; 246 $stat{$cpu}{ivecs}{$cookie}{name} = $intrcfg->{name}; 247 $stat{$cpu}{ivecs}{$cookie}{ihs} = 1; 248 } 249 250 # We define the timerange as the amount of time spent gathering the 251 # various kstats, divided by our sleeptime. If we take a lot of time 252 # to access the kstats, and then we create a delta comparing these 253 # kstats with a prior set of kstats, that delta will cover 254 # substaintially different amount of time depending upon which 255 # interrupt or CPU is being examined. 256 # 257 # By checking the timerange here, we guarantee that any deltas 258 # created from these kstats will contain self-consistent data, 259 # in that all CPUs and interrupts cover a similar span of time. 260 # 261 # $timerange_toohi is the upper bound. Any timerange above 262 # this is thrown out as garbage. If the stat is safely within this 263 # bound, we treat the stat as representing an instant in time, rather 264 # than the time range it actually spans. We arbitrarily choose minsnap 265 # as the snaptime of the stat. 266 267 $stat{snaptime} = $minsnap; 268 my $timerange = ($maxsnap - $minsnap) / $sleeptime; 269 return (0) if ($timerange > $timerange_toohi); # i.e. failure 270 return (\%stat); 271} 272 273# 274# dumpdelta takes a reference to our "delta" structure: 275# {"missing"} "1" if the delta's component stats had inconsistencies 276# {"minsnap"} time of the first kstat snaptime used in this delta 277# {"maxsnap"} time of the last kstat snaptime used in this delta 278# {"goodness"} cost function applied to this delta 279# {"avgintrload"} avg of interrupt load across cpus, as a percentage 280# {"avgintrnsec"} avg number of nsec spent in interrupts, per cpu 281# {<cpuid>} iterates over on-line cpus 282# ->{"intrs"} cpu's movable intr time (sum of "time" for each ivec) 283# ->{"tot"} CPU load from all sources in nsec 284# ->{"bigintr"} largest value of {ivecs}{<ivec#>}{time} from below 285# ->{"intrload"} intrs / tot 286# ->{"ivecs"} 287# ->{<ivec#>} iterates over ivecs for this cpu 288# ->{"time"} time used by this interrupt (in nsec) 289# ->{"pil"} pil level of this interrupt 290# ->{"ino"} interrupt number 291# ->{"buspath"} filename of the directory of the device's bus 292# ->{"name"} device name 293# ->{"ihs"} number of different handlers sharing this ino 294# 295# It prints out the delta structure in a nice, human readable display. 296# 297 298sub dumpdelta($) 299{ 300 my ($delta) = @_; 301 302 # print global info 303 304 syslog('debug', "dumpdelta:"); 305 syslog('debug', " RECONFIGURATION IN DELTA") if $delta->{missing} > 0; 306 syslog('debug', " avgintrload: %5.2f%% avgintrnsec: %d", 307 $delta->{avgintrload} * 100, $delta->{avgintrnsec}); 308 syslog('debug', " goodness: %5.2f%%", $delta->{goodness} * 100) 309 if exists($delta->{goodness}); 310 311 # iterate over cpus 312 313 while (my ($cpu, $cpst) = each %$delta) { 314 next if !ref($cpst); # skip non-cpuid entries 315 my $tot = $cpst->{tot}; 316 syslog('debug', " cpu %3d intr %7.3f%% (bigintr %7.3f%%)", 317 $cpu, $cpst->{intrload}*100, $cpst->{bigintr}*100/$tot); 318 syslog('debug', " intrs %d, bigintr %d", 319 $cpst->{intrs}, $cpst->{bigintr}); 320 321 # iterate over ivecs on this cpu 322 323 while (my ($ivec, $ivst) = each %{$cpst->{ivecs}}) { 324 syslog('debug', " %15s:\"%s\": %7.3f%% %d", 325 ($ivst->{ihs} > 1 ? "$ivst->{name}($ivst->{ihs})" : 326 $ivst->{name}), $ivec, 327 $ivst->{time}*100 / $tot, $ivst->{time}); 328 } 329 } 330} 331 332# 333# generate_delta($stat, $newstat) takes two stat references, returned from 334# getstat(), and creates a %delta. %delta (not surprisingly) contains the 335# same basic info as stat and newstat, but with the timestamps as deltas 336# instead of absolute times. We return a reference to the delta. 337# 338 339sub generate_delta($$) 340{ 341 my ($stat, $newstat) = @_; 342 343 my %delta = (); 344 my $intrload; 345 my $intrnsec; 346 my $cpus; 347 348 # Take the worstcase timerange 349 $delta{minsnap} = $stat->{snaptime}; 350 $delta{maxsnap} = $newstat->{snaptime}; 351 if (VERIFY($delta{maxsnap} > $delta{minsnap}, 352 "generate_delta: stats aren't ascending")) { 353 $delta{missing} = 1; 354 return (\%delta); 355 } 356 357 # if there are a different number of cpus in the stats, set missing 358 359 $delta{missing} = (keys(%$stat) != keys(%$newstat)); 360 if (VERIFY($delta{missing} == 0, 361 "generate_delta: number of CPUs changed")) { 362 return (\%delta); 363 } 364 365 # scan through every cpu in %newstat and compare against %stat 366 367 while (my ($cpu, $newcpst) = each %$newstat) { 368 next if !ref($newcpst); # skip non-cpuid fields 369 370 # If %stat is missing a cpu from %newstat, then it was just 371 # onlined. Mark missing. 372 373 if (VERIFY(exists $stat->{$cpu} && 374 $stat->{$cpu}{crtime} == $newcpst->{crtime}, 375 "generate_delta: cpu $cpu changed")) { 376 $delta{missing} = 1; 377 return (\%delta); 378 } 379 my $cpst = $stat->{$cpu}; 380 $delta{$cpu}{tot} = $newcpst->{tot} - $cpst->{tot}; 381 if (VERIFY($delta{$cpu}{tot} >= 0, 382 "generate_delta: deltas are not ascending?")) { 383 $delta{missing} = 1; 384 delete($delta{$cpu}); 385 return (\%delta); 386 } 387 # Avoid remote chance of division by zero 388 $delta{$cpu}{tot} = 1 if $delta{$cpu}{tot} == 0; 389 $delta{$cpu}{intrs} = 0; 390 $delta{$cpu}{bigintr} = 0; 391 392 my %ivecs = (); 393 $delta{$cpu}{ivecs} = \%ivecs; 394 395 # if the number of ivecs differs, set missing 396 397 if (VERIFY(keys(%{$cpst->{ivecs}}) == 398 keys(%{$newcpst->{ivecs}}), 399 "generate_delta: cpu $cpu has more/less". 400 " interrupts")) { 401 $delta{missing} = 1; 402 return (\%delta); 403 } 404 405 while (my ($inum, $newivec) = each %{$newcpst->{ivecs}}) { 406 # If this ivec doesn't exist in $stat, or if $stat 407 # shows a different crtime, set missing. 408 409 if (VERIFY(exists $cpst->{ivecs}{$inum} && 410 $cpst->{ivecs}{$inum}{crtime} == 411 $newivec->{crtime}, 412 "generate_delta: cpu $cpu inum $inum". 413 " has changed")) { 414 $delta{missing} = 1; 415 return (\%delta); 416 } 417 my $ivec = $cpst->{ivecs}{$inum}; 418 419 # Create $delta{$cpu}{ivecs}{$inum}. 420 421 my %dltivec = (); 422 $delta{$cpu}{ivecs}{$inum} = \%dltivec; 423 424 # calculate time used by this interrupt 425 426 my $time = $newivec->{time} - $ivec->{time}; 427 if (VERIFY($time >= 0, 428 "generate_delta: ivec went backwards?")) { 429 $delta{missing} = 1; 430 delete($delta{$cpu}{ivecs}{$inum}); 431 return (\%delta); 432 } 433 $delta{$cpu}{intrs} += $time; 434 $dltivec{time} = $time; 435 if ($time > $delta{$cpu}{bigintr}) { 436 $delta{$cpu}{bigintr} = $time; 437 } 438 439 # Transfer over basic info about the kstat. We 440 # don't have to worry about discrepancies between 441 # ivec and newivec because we verified that both 442 # have the same crtime. 443 444 $dltivec{pil} = $newivec->{pil}; 445 $dltivec{ino} = $newivec->{ino}; 446 $dltivec{buspath} = $newivec->{buspath}; 447 $dltivec{name} = $newivec->{name}; 448 $dltivec{ihs} = $newivec->{ihs}; 449 } 450 if ($delta{$cpu}{tot} < $delta{$cpu}{intrs}) { 451 # Ewww! Hopefully just a rounding error. 452 # Make something up. 453 $delta{$cpu}{tot} = $delta{$cpu}{intrs}; 454 } 455 $delta{$cpu}{intrload} = 456 $delta{$cpu}{intrs} / $delta{$cpu}{tot}; 457 $intrload += $delta{$cpu}{intrload}; 458 $intrnsec += $delta{$cpu}{intrs}; 459 $cpus++; 460 } 461 if ($cpus > 0) { 462 $delta{avgintrload} = $intrload / $cpus; 463 $delta{avgintrnsec} = $intrnsec / $cpus; 464 } else { 465 $delta{avgintrload} = 0; 466 $delta{avgintrnsec} = 0; 467 } 468 return (\%delta); 469} 470 471 472# compress_delta takes a list of deltas, and returns a single new delta 473# which represents the combined information from all the deltas. The deltas 474# provided are assumed to be sequential in time. The resulting compressed 475# delta looks just like any other delta. This new delta is also more accurate 476# since its statistics are averaged over a longer period than any of the 477# original deltas. 478 479sub compress_deltas ($) 480{ 481 my ($deltas) = @_; 482 483 my %newdelta = (); 484 my ($intrs, $tot); 485 my $cpus = 0; 486 my ($high_intrload) = 0; 487 488 if (VERIFY($#$deltas != -1, 489 "compress_deltas: list of delta is empty?")) { 490 return (0); 491 } 492 $newdelta{minsnap} = $deltas->[0]{minsnap}; 493 $newdelta{maxsnap} = $deltas->[$#$deltas]{maxsnap}; 494 $newdelta{missing} = 0; 495 496 foreach my $delta (@$deltas) { 497 if (VERIFY($delta->{missing} == 0, 498 "compressing bad deltas?")) { 499 return (0); 500 } 501 while (my ($cpuid, $cpu) = each %$delta) { 502 next if !ref($cpu); 503 504 $intrs += $cpu->{intrs}; 505 $tot += $cpu->{tot}; 506 $newdelta{$cpuid}{intrs} += $cpu->{intrs}; 507 $newdelta{$cpuid}{tot} += $cpu->{tot}; 508 if (!exists $newdelta{$cpuid}{ivecs}) { 509 my %ivecs = (); 510 $newdelta{$cpuid}{ivecs} = \%ivecs; 511 } 512 while (my ($inum, $ivec) = each %{$cpu->{ivecs}}) { 513 my $newivecs = $newdelta{$cpuid}{ivecs}; 514 $newivecs->{$inum}{time} += $ivec->{time}; 515 $newivecs->{$inum}{pil} = $ivec->{pil}; 516 $newivecs->{$inum}{ino} = $ivec->{ino}; 517 $newivecs->{$inum}{buspath} = $ivec->{buspath}; 518 $newivecs->{$inum}{name} = $ivec->{name}; 519 $newivecs->{$inum}{ihs} = $ivec->{ihs}; 520 } 521 } 522 } 523 foreach my $cpu (values(%newdelta)) { 524 next if !ref($cpu); # ignore non-cpu fields 525 $cpus++; 526 527 my $bigintr = 0; 528 foreach my $ivec (values(%{$cpu->{ivecs}})) { 529 if ($ivec->{time} > $bigintr) { 530 $bigintr = $ivec->{time}; 531 } 532 } 533 $cpu->{bigintr} = $bigintr; 534 $cpu->{intrload} = $cpu->{intrs} / $cpu->{tot}; 535 if ($high_intrload < $cpu->{intrload}) { 536 $high_intrload = $cpu->{intrload}; 537 } 538 $cpu->{tot} = 1 if $cpu->{tot} <= 0; 539 } 540 if ($cpus == 0) { 541 $newdelta{avgintrnsec} = 0; 542 $newdelta{avgintrload} = 0; 543 } else { 544 $newdelta{avgintrnsec} = $intrs / $cpus; 545 $newdelta{avgintrload} = $intrs / $tot; 546 } 547 $sleeptime = ($high_intrload < $idle_intrload) ? $idle_sleeptime : 548 $normal_sleeptime; 549 return (\%newdelta); 550} 551 552 553 554 555 556# What follow are the core functions responsible for examining the deltas 557# generated above and deciding what to do about them. 558# 559# goodness() and its helper goodness_cpu() return a heuristic which describe 560# how good (or bad) the current interrupt balance is. The value returned will 561# be between 0 and 1, with 0 representing maximum goodness, and 1 representing 562# maximum badness. 563# 564# imbalanced() compares a current and historical value of goodness, and 565# determines if there has been enough change to warrant evaluating a 566# reconfiguration of the interrupts 567# 568# do_reconfig(), and its helpers, do_reconfig_cpu(), do_reconfig_cpu2cpu(), 569# find_goal(), do_find_goal(), and move_intr(), are responsible for examining 570# a delta and determining the best possible assignment of interrupts to CPUs. 571# 572# It is important that do_reconfig() be in alignment with goodness(). If 573# do_reconfig were to generate a new interrupt distribution that worsened 574# goodness, we could get into a pathological loop with intrd fighting itself, 575# constantly deciding that things are imbalanced, and then changing things 576# only to make them worse. 577 578 579 580# any goodness over $goodness_unsafe_load is considered really bad 581# goodness must drop by at least $goodness_mindelta for a reconfig 582 583my $goodness_unsafe_load = .9; 584my $goodness_mindelta = .1; 585 586# goodness(%delta) examines a delta and return its "goodness". goodness will 587# be between 0 (best) and 1 (major bad). goodness is determined by evaluating 588# the goodness of each individual cpu, and returning the worst case. This 589# helps on systems with many CPUs, where otherwise a single pathological CPU 590# might otherwise be ignored because the average was OK. 591# 592# To calculate the goodness of an individual CPU, we start by looking at its 593# load due to interrupts. If the load is above a certain high threshold and 594# there is more than one interrupt assigned to this CPU, we set goodness 595# to worst-case. If the load is below the average interrupt load of all CPUs, 596# then we return best-case, since what's to complain about? 597# 598# Otherwise we look at how much the load is above the average, and return 599# that as the goodness, with one caveat: we never return more than the CPU's 600# interrupt load ignoring its largest single interrupt source. This is 601# because a CPU with one high-load interrupt, and no other interrupts, is 602# perfectly balanced. Nothing can be done to improve the situation, and thus 603# it is perfectly balanced even if the interrupt's load is 100%. 604 605sub goodness($) 606{ 607 my ($delta) = @_; 608 609 return (1) if $delta->{missing} > 0; 610 611 my $high_goodness = 0; 612 my $goodness; 613 614 foreach my $cpu (values(%$delta)) { 615 next if !ref($cpu); # skip non-cpuid fields 616 617 $goodness = goodness_cpu($cpu, $delta->{avgintrload}); 618 if (VERIFY($goodness >= 0 && $goodness <= 1, 619 "goodness: cpu goodness out of range?")) { 620 dumpdelta($delta); 621 return (1); 622 } 623 if ($goodness == 1) { 624 return (1); # worst case, no need to continue 625 } 626 if ($goodness > $high_goodness) { 627 $high_goodness = $goodness; 628 } 629 } 630 return ($high_goodness); 631} 632 633sub goodness_cpu($$) # private function 634{ 635 my ($cpu, $avgintrload) = @_; 636 637 my $goodness; 638 my $load = $cpu->{intrs} / $cpu->{tot}; 639 640 return (0) if ($load < $avgintrload); # low loads are perfectly good 641 642 # Calculate $load_no_bigintr, which represents the load 643 # due to interrupts, excluding the one biggest interrupt. 644 # This is the most gain we can get on this CPU from 645 # offloading interrupts. 646 647 my $load_no_bigintr = ($cpu->{intrs} - $cpu->{bigintr}) / $cpu->{tot}; 648 649 # A major imbalance is indicated if a CPU is saturated 650 # with interrupt handling, and it has more than one 651 # source of interrupts. Those other interrupts could be 652 # starved if of a lower pil. Return a goodness of 1, 653 # which is the worst possible return value, 654 # which will effectively contaminate this entire delta. 655 656 my $cnt = keys(%{$cpu->{ivecs}}); 657 658 if ($load > $goodness_unsafe_load && $cnt > 1) { 659 return (1); 660 } 661 $goodness = $load - $avgintrload; 662 if ($goodness > $load_no_bigintr) { 663 $goodness = $load_no_bigintr; 664 } 665 return ($goodness); 666} 667 668 669# imbalanced() is used by the main routine to determine if the goodness 670# has shifted far enough from our last baseline to warrant a reassignment 671# of interrupts. A very high goodness indicates that a CPU is way out of 672# whack. If the goodness has varied too much since the baseline, then 673# perhaps a reconfiguration is worth considering. 674 675sub imbalanced ($$) 676{ 677 my ($goodness, $baseline) = @_; 678 679 # Return 1 if we are pathological, or creeping away from the baseline 680 681 return (1) if $goodness > .50; 682 return (1) if abs($goodness - $baseline) > $goodness_mindelta; 683 return (0); 684} 685 686# do_reconfig(), do_reconfig_cpu(), and do_reconfig_cpu2cpu(), are the 687# decision-making functions responsible for generating a new interrupt 688# distribution. They are designed with the definition of goodness() in 689# mind, i.e. they use the same definition of "good distribution" as does 690# goodness(). 691# 692# do_reconfig() is responsible for deciding whether a redistribution is 693# actually warranted. If the goodness is already pretty good, it doesn't 694# waste the CPU time to generate a new distribution. If it 695# calculates a new distribution and finds that it is not sufficiently 696# improved from the prior distirbution, it will not do the redistribution, 697# mainly to avoid the disruption to system performance caused by 698# rejuggling interrupts. 699# 700# Its main loop works by going through a list of cpus sorted from 701# highest to lowest interrupt load. It removes the highest-load cpus 702# one at a time and hands them off to do_reconfig_cpu(). This function 703# then re-sorts the remaining CPUs from lowest to highest interrupt load, 704# and one at a time attempts to rejuggle interrupts between the original 705# high-load CPU and the low-load CPU. Rejuggling on a high-load CPU is 706# considered finished as soon as its interrupt load is within 707# $goodness_mindelta of the average interrupt load. Such a CPU will have 708# a goodness of below the $goodness_mindelta threshold. 709 710# 711# move_intr(\%delta, $inum, $oldcpu, $newcpu) 712# used by reconfiguration code to move an interrupt between cpus within 713# a delta. This manipulates data structures, and does not actually move 714# the interrupt on the running system. 715# 716sub move_intr($$$$) # private function 717{ 718 my ($delta, $inum, $oldcpuid, $newcpuid) = @_; 719 720 my $ivec = $delta->{$oldcpuid}{ivecs}{$inum}; 721 722 # Remove ivec from old cpu 723 724 my $oldcpu = $delta->{$oldcpuid}; 725 $oldcpu->{intrs} -= $ivec->{time}; 726 $oldcpu->{intrload} = $oldcpu->{intrs} / $oldcpu->{tot}; 727 delete($oldcpu->{ivecs}{$inum}); 728 729 VERIFY($oldcpu->{intrs} >= 0, "move_intr: intr's time > total time?"); 730 VERIFY($ivec->{time} <= $oldcpu->{bigintr}, 731 "move_intr: intr's time > bigintr?"); 732 733 if ($ivec->{time} >= $oldcpu->{bigintr}) { 734 my $bigtime = 0; 735 736 foreach my $ivec (values(%{$oldcpu->{ivecs}})) { 737 $bigtime = $ivec->{time} if $ivec->{time} > $bigtime; 738 } 739 $oldcpu->{bigintr} = $bigtime; 740 } 741 742 # Add ivec onto new cpu 743 744 my $newcpu = $delta->{$newcpuid}; 745 746 $ivec->{nowcpu} = $newcpuid; 747 $newcpu->{intrs} += $ivec->{time}; 748 $newcpu->{intrload} = $newcpu->{intrs} / $newcpu->{tot}; 749 $newcpu->{ivecs}{$inum} = $ivec; 750 751 $newcpu->{bigintr} = $ivec->{time} 752 if $ivec->{time} > $newcpu->{bigintr}; 753} 754 755sub move_intr_check($$$) # private function 756{ 757 my ($delta, $oldcpuid, $newcpuid) = @_; 758 759 VERIFY($delta->{$oldcpuid}{tot} >= $delta->{$oldcpuid}{intrs}, 760 "Moved interrupts left 100+%% load on src cpu"); 761 VERIFY($delta->{$newcpuid}{tot} >= $delta->{$newcpuid}{intrs}, 762 "Moved interrupts left 100+%% load on tgt cpu"); 763} 764 765sub ivecs_to_string(@) # private function 766{ 767 my $str = ""; 768 foreach my $ivec (@_) { 769 $str = "$str $ivec->{inum}"; 770 } 771 return ($str); 772} 773 774 775sub do_reconfig($) 776{ 777 my ($delta) = @_; 778 779 my $goodness = $delta->{goodness}; 780 781 # We can't improve goodness to better than 0. We should stop here 782 # if, even if we achieve a goodness of 0, the improvement is still 783 # too small to merit the action. 784 785 if ($goodness - 0 < $goodness_mindelta) { 786 syslog('debug', "goodness good enough, don't reconfig"); 787 return (0); 788 } 789 790 syslog('notice', "Optimizing interrupt assignments"); 791 792 if (VERIFY ($delta->{missing} == 0, "RECONFIG Aborted: should not ". 793 "have a delta with missing")) { 794 return (-1); 795 } 796 797 # Make a list of all cpuids, and also add some extra information 798 # to the ivec structures. 799 800 my @cpusortlist = (); 801 802 while (my ($cpuid, $cpu) = each %$delta) { 803 next if !ref($cpu); # skip non-cpu entries 804 805 push(@cpusortlist, $cpuid); 806 while (my ($inum, $ivec) = each %{$cpu->{ivecs}}) { 807 $ivec->{origcpu} = $cpuid; 808 $ivec->{nowcpu} = $cpuid; 809 $ivec->{inum} = $inum; 810 } 811 } 812 813 # Sort the list of CPUs from highest to lowest interrupt load. 814 # Remove the top CPU from that list and attempt to redistribute 815 # its interrupts. If the CPU has a goodness below a threshold, 816 # just ignore the CPU and move to the next one. If the CPU's 817 # load falls below the average load plus that same threshold, 818 # then there are no CPUs left worth reconfiguring, and we're done. 819 820 while (@cpusortlist) { 821 # Re-sort cpusortlist each time, since do_reconfig_cpu can 822 # move interrupts around. 823 824 @cpusortlist = 825 sort({$delta->{$b}{intrload} <=> $delta->{$a}{intrload}} 826 @cpusortlist); 827 828 my $cpu = shift(@cpusortlist); 829 if (($delta->{$cpu}{intrload} <= $goodness_unsafe_load) && 830 ($delta->{$cpu}{intrload} <= 831 $delta->{avgintrload} + $goodness_mindelta)) { 832 syslog('debug', "finished reconfig: cpu $cpu load ". 833 "$delta->{$cpu}{intrload} avgload ". 834 "$delta->{avgintrload}"); 835 last; 836 } 837 if (goodness_cpu($delta->{$cpu}, $delta->{avgintrload}) < 838 $goodness_mindelta) { 839 next; 840 } 841 do_reconfig_cpu($delta, \@cpusortlist, $cpu); 842 } 843 844 # How good a job did we do? If the improvement was minimal, and 845 # our goodness wasn't pathological (and thus needing any help it 846 # can get), then don't bother moving the interrupts. 847 848 my $newgoodness = goodness($delta); 849 VERIFY($newgoodness <= $goodness, 850 "reconfig: result has worse goodness?"); 851 852 if (($goodness != 1 || $newgoodness == 1) && 853 $goodness - $newgoodness < $goodness_mindelta) { 854 syslog('debug', "goodness already near optimum, ". 855 "don't reconfig"); 856 return (0); 857 } 858 syslog('debug', "goodness %5.2f%% --> %5.2f%%", $goodness*100, 859 $newgoodness*100); 860 861 # Time to move those interrupts! 862 863 my $ret = 1; 864 my $warned = 0; 865 while (my ($cpuid, $cpu) = each %$delta) { 866 next if $cpuid =~ /\D/; 867 while (my ($inum, $ivec) = each %{$cpu->{ivecs}}) { 868 next if ($ivec->{origcpu} == $cpuid); 869 870 if (!intrmove($ivec->{buspath}, $ivec->{ino}, 871 $cpuid)) { 872 syslog('warning', "Unable to move interrupts") 873 if $warned++ == 0; 874 syslog('debug', "Unable to move buspath ". 875 "$ivec->{buspath} ino $ivec->{ino} to ". 876 "cpu $cpuid"); 877 $ret = -1; 878 } 879 } 880 } 881 882 syslog('notice', "Interrupt assignments optimized"); 883 return ($ret); 884} 885 886sub do_reconfig_cpu($$$) # private function 887{ 888 my ($delta, $cpusortlist, $oldcpuid) = @_; 889 890 # We have been asked to rejuggle interrupts between $oldcpuid and 891 # other CPUs found on $cpusortlist so as to improve the load on 892 # $oldcpuid. We reverse $cpusortlist to get our own copy of the 893 # list, sorted from lowest to highest interrupt load. One at a 894 # time, shift a CPU off of this list of CPUs, and attempt to 895 # rejuggle interrupts between the two CPUs. Don't do this if the 896 # other CPU has a higher load than oldcpuid. We're done rejuggling 897 # once $oldcpuid's goodness falls below a threshold. 898 899 syslog('debug', "reconfiguring $oldcpuid"); 900 901 my $cpu = $delta->{$oldcpuid}; 902 my $avgintrload = $delta->{avgintrload}; 903 904 my @cputargetlist = reverse(@$cpusortlist); # make a copy of the list 905 while ($#cputargetlist != -1) { 906 last if goodness_cpu($cpu, $avgintrload) < $goodness_mindelta; 907 908 my $tgtcpuid = shift(@cputargetlist); 909 my $tgt = $delta->{$tgtcpuid}; 910 my $load = $cpu->{intrload}; 911 my $tgtload = $tgt->{intrload}; 912 last if $tgtload > $load; 913 do_reconfig_cpu2cpu($delta, $oldcpuid, $tgtcpuid, $load); 914 } 915} 916 917sub do_reconfig_cpu2cpu($$$$) # private function 918{ 919 my ($delta, $srccpuid, $tgtcpuid, $srcload) = @_; 920 921 # We've been asked to consider interrupt juggling between srccpuid 922 # (with a high interrupt load) and tgtcpuid (with a lower interrupt 923 # load). First, make a single list with all of the ivecs from both 924 # CPUs, and sort the list from highest to lowest load. 925 926 syslog('debug', "exchanging intrs between $srccpuid and $tgtcpuid"); 927 928 # Gather together all the ivecs and sort by load 929 930 my @ivecs = (values(%{$delta->{$srccpuid}{ivecs}}), 931 values(%{$delta->{$tgtcpuid}{ivecs}})); 932 return if $#ivecs == -1; 933 934 @ivecs = sort({$b->{time} <=> $a->{time}} @ivecs); 935 936 # Our "goal" load for srccpuid is the average load across all CPUs. 937 # find_goal() will find determine the optimum selection of the 938 # available interrupts which comes closest to this goal without 939 # falling below the goal. 940 941 my $goal = $delta->{avgintrnsec}; 942 943 # We know that the interrupt load on tgtcpuid is less than that on 944 # srccpuid, but its load could still be above avgintrnsec. Don't 945 # choose a goal which would bring srccpuid below the load on tgtcpuid. 946 947 my $avgnsec = 948 ($delta->{$srccpuid}{intrs} + $delta->{$tgtcpuid}{intrs}) / 2; 949 if ($goal < $avgnsec) { 950 $goal = $avgnsec; 951 } 952 953 # If the largest of the interrupts is on srccpuid, leave it there. 954 # This can help minimize the disruption caused by moving interrupts. 955 956 if ($ivecs[0]->{origcpu} == $srccpuid) { 957 syslog('debug', "Keeping $ivecs[0]->{inum} on $srccpuid"); 958 $goal -= $ivecs[0]->{time}; 959 shift(@ivecs); 960 } 961 962 syslog('debug', "GOAL: inums should total $goal"); 963 find_goal(\@ivecs, $goal); 964 965 # find_goal() returned its results to us by setting $ivec->{goal} if 966 # the ivec should be on srccpuid, or clearing it for tgtcpuid. 967 # Call move_intr() to update our $delta with the new results. 968 969 foreach my $ivec (@ivecs) { 970 syslog('debug', "ivec $ivec->{inum} goal $ivec->{goal}"); 971 VERIFY($ivec->{nowcpu} == $srccpuid || 972 $ivec->{nowcpu} == $tgtcpuid, "cpu2cpu found an ". 973 "interrupt not currently on src or tgt cpu"); 974 975 if ($ivec->{goal} && $ivec->{nowcpu} != $srccpuid) { 976 move_intr($delta, $ivec->{inum}, $ivec->{nowcpu}, 977 $srccpuid); 978 } elsif ($ivec->{goal} == 0 && $ivec->{nowcpu} != $tgtcpuid) { 979 move_intr($delta, $ivec->{inum}, $ivec->{nowcpu}, 980 $tgtcpuid); 981 } 982 } 983 move_intr_check($delta, $srccpuid, $tgtcpuid); # asserts 984 985 my $newload = $delta->{$srccpuid}{intrs} / $delta->{$srccpuid}{tot}; 986 VERIFY($newload <= $srcload && $newload > $delta->{avgintrload}, 987 "cpu2cpu: new load didn't end up in expected range"); 988} 989 990 991# find_goal() and its helper do_find_goal() are used to find the best 992# combination of interrupts in order to generate a load that is as close 993# as possible to a goal load without falling below that goal. Before returning 994# to its caller, find_goal() sets a new value in the hash of each interrupt, 995# {goal}, which if set signifies that this interrupt is one of the interrupts 996# identified as part of the set of interrupts which best meet the goal. 997# 998# The arguments to find_goal are a list of ivecs (hash references), sorted 999# by descending {time}, and the goal load. The goal is relative to {time}. 1000# The best fit is determined by performing a depth-first search. do_find_goal 1001# is the recursive subroutine which carries out the search. 1002# 1003# It is passed an index as an argument, originally 0. On a given invocation, 1004# it is only to consider interrupts in the ivecs array starting at that index. 1005# It then considers two possibilities: 1006# 1) What is the best goal-fit if I include ivecs[index]? 1007# 2) What is the best goal-fit if I exclude ivecs[index]? 1008# To determine case 1, it subtracts the load of ivecs[index] from the goal, 1009# and calls itself recursively with that new goal and index++. 1010# To determine case 2, it calls itself recursively with the same goal and 1011# index++. 1012# 1013# It then compares the two results, decide which one best meets the goals, 1014# and returns the result. The return value is the best-fit's interrupt load, 1015# followed by a list of all the interrupts which make up that best-fit. 1016# 1017# As an optimization, a second array loads[] is created which mirrors ivecs[]. 1018# loads[i] will equal the total loads of all ivecs[i..$#ivecs]. This is used 1019# by do_find_goal to avoid recursing all the way to the end of the ivecs 1020# array if including all remaining interrupts will still leave the best-fit 1021# at below goal load. If so, it then includes all remaining interrupts on 1022# the goal list and returns. 1023# 1024sub find_goal($$) # private function 1025{ 1026 my ($ivecs, $goal) = @_; 1027 1028 my @goals; 1029 my $load; 1030 my $ivec; 1031 1032 if ($goal <= 0) { 1033 @goals = (); # the empty set will best meet the goal 1034 } else { 1035 syslog('debug', "finding goal from intrs %s", 1036 ivecs_to_string(@$ivecs)); 1037 1038 # Generate @loads array 1039 1040 my $tot = 0; 1041 foreach $ivec (@$ivecs) { 1042 $tot += $ivec->{time}; 1043 } 1044 my @loads = (); 1045 foreach $ivec (@$ivecs) { 1046 push(@loads, $tot); 1047 $tot -= $ivec->{time}; 1048 } 1049 ($load, @goals) = do_find_goal($ivecs, \@loads, $goal, 0); 1050 VERIFY($load >= $goal, "find_goal didn't meet goals"); 1051 } 1052 syslog('debug', "goals found: %s", ivecs_to_string(@goals)); 1053 1054 # Set or clear $ivec->{goal} for each ivec, based on returned @goals 1055 1056 foreach $ivec (@$ivecs) { 1057 if ($#goals > -1 && $ivec == $goals[0]) { 1058 syslog('debug', "inum $ivec->{inum} on source cpu"); 1059 $ivec->{goal} = 1; 1060 shift(@goals); 1061 } else { 1062 syslog('debug', "inum $ivec->{inum} on target cpu"); 1063 $ivec->{goal} = 0; 1064 } 1065 } 1066} 1067 1068 1069sub do_find_goal($$$$) # private function 1070{ 1071 my ($ivecs, $loads, $goal, $idx) = @_; 1072 1073 if ($idx > $#{$ivecs}) { 1074 return (0); 1075 } 1076 syslog('debug', "$idx: finding goal $goal inum $ivecs->[$idx]{inum}"); 1077 1078 my $load = $ivecs->[$idx]{time}; 1079 my @goals_with = (); 1080 my @goals_without = (); 1081 my ($with, $without); 1082 1083 # If we include all remaining items and we're still below goal, 1084 # stop here. We can just return a result that includes $idx and all 1085 # subsequent ivecs. Since this will still be below goal, there's 1086 # nothing better to be done. 1087 1088 if ($loads->[$idx] <= $goal) { 1089 syslog('debug', 1090 "$idx: including all remaining intrs %s with load %d", 1091 ivecs_to_string(@$ivecs[$idx .. $#{$ivecs}]), 1092 $loads->[$idx]); 1093 return ($loads->[$idx], @$ivecs[$idx .. $#{$ivecs}]); 1094 } 1095 1096 # Evaluate the "with" option, i.e. the best matching goal which 1097 # includes $ivecs->[$idx]. If idx's load is more than our goal load, 1098 # stop here. Once we're above the goal, there is no need to consider 1099 # further interrupts since they'll only take us further from the goal. 1100 1101 if ($goal <= $load) { 1102 $with = $load; # stop here 1103 } else { 1104 ($with, @goals_with) = 1105 do_find_goal($ivecs, $loads, $goal - $load, $idx + 1); 1106 $with += $load; 1107 } 1108 syslog('debug', "$idx: with-load $with intrs %s", 1109 ivecs_to_string($ivecs->[$idx], @goals_with)); 1110 1111 # Evaluate the "without" option, i.e. the best matching goal which 1112 # excludes $ivecs->[$idx]. 1113 1114 ($without, @goals_without) = 1115 &do_find_goal($ivecs, $loads, $goal, $idx + 1); 1116 syslog('debug', "$idx: without-load $without intrs %s", 1117 ivecs_to_string(@goals_without)); 1118 1119 # We now have our "with" and "without" options, and we choose which 1120 # best fits the goal. If one is greater than goal and the other is 1121 # below goal, we choose the one that is greater. If they are both 1122 # below goal, then we choose the one that is greater. If they are 1123 # both above goal, then we choose the smaller. 1124 1125 my $which; # 0 == with, 1 == without 1126 if ($with >= $goal && $without < $goal) { 1127 $which = 0; 1128 } elsif ($with < $goal && $without >= $goal) { 1129 $which = 1; 1130 } elsif ($with >= $goal && $without >= $goal) { 1131 $which = ($without < $with); 1132 } else { 1133 $which = ($without > $with); 1134 } 1135 1136 # Return the load of our best case scenario, followed by all the ivecs 1137 # which compose that goal. 1138 1139 if ($which == 1) { # without 1140 syslog('debug', "$idx: going without"); 1141 return ($without, @goals_without); 1142 } else { 1143 syslog('debug', "$idx: going with"); 1144 return ($with, $ivecs->[$idx], @goals_with); 1145 } 1146 # Not reached 1147} 1148 1149 1150 1151 1152syslog('debug', "intrd is starting".($debug ? " (debug)" : "")); 1153 1154my @deltas = (); 1155my $deltas_tottime = 0; # sum of maxsnap-minsnap across @deltas 1156my $avggoodness; 1157my $baseline_goodness = 0; 1158my $compdelta; 1159 1160my $do_reconfig; 1161 1162# temp variables 1163my $goodness; 1164my $deltatime; 1165my $olddelta; 1166my $olddeltatime; 1167my $delta; 1168my $newstat; 1169my $below_statslen; 1170my $newtime; 1171my $ret; 1172 1173 1174my $gotsig = 0; 1175$SIG{INT} = sub { $gotsig = 1; }; # don't die in the middle of retargeting 1176$SIG{HUP} = $SIG{INT}; 1177$SIG{TERM} = $SIG{INT}; 1178 1179my $ks; 1180if ($using_scengen == 0) { 1181 $ks = Sun::Solaris::Kstat->new(); 1182} else { 1183 $ks = myks_update(); # supplied by the simulator 1184} 1185 1186# If no pci_intrs kstats were found, we need to exit, but we can't because 1187# SMF will restart us and/or report an error to the administrator. But 1188# there's nothing an administrator can do. So print out a message for SMF 1189# logs and silently pause forever. 1190 1191if (!exists($ks->{pci_intrs})) { 1192 print STDERR "$cmdname: no interrupts were found; ". 1193 "your PCI bus may not yet be supported\n"; 1194 pause() while $gotsig == 0; 1195 exit 0; 1196} 1197 1198my $stat = getstat($ks); 1199 1200 1201 1202for (;;) { 1203 sub clear_deltas { 1204 @deltas = (); 1205 $deltas_tottime = 0; 1206 $stat = 0; # prevent next gen_delta() from setting {missing} 1207 } 1208 1209 # 1. Sleep, update the kstats, and save the new stats in $newstat. 1210 1211 exit 0 if $gotsig; # if we got ^C / SIGTERM, exit 1212 if ($using_scengen == 0) { 1213 sleep($sleeptime); 1214 exit 0 if $gotsig; # if we got ^C / SIGTERM, exit 1215 $ks->update(); 1216 } else { 1217 $ks = myks_update(); 1218 } 1219 $newstat = getstat($ks); 1220 1221 # $stat or $newstat could be zero if they're uninitialized, or if 1222 # getstat() failed. If $stat is zero, move $newstat to $stat, sleep 1223 # and try again. If $newstat is zero, then we also sleep and try 1224 # again, hoping the problem will clear up. 1225 1226 next if (!ref $newstat); 1227 if (!ref $stat) { 1228 $stat = $newstat; 1229 next; 1230 } 1231 1232 1233 # 2. Compare $newstat with the prior set of values, result in %$delta. 1234 1235 $delta = generate_delta($stat, $newstat); 1236 dumpdelta($delta) if $debug; # Dump most recent stats to stdout. 1237 $stat = $newstat; # The new stats now become the old stats. 1238 1239 1240 # 3. If $delta->{missing}, then there has been a reconfiguration of 1241 # either cpus or interrupts (probably both). We need to toss out our 1242 # old set of statistics and start from scratch. 1243 # 1244 # Also, if the delta covers a very long range of time, then we've 1245 # been experiencing a system overload that has resulted in intrd 1246 # not being allowed to run effectively for a while now. As above, 1247 # toss our old statistics and start from scratch. 1248 1249 $deltatime = $delta->{maxsnap} - $delta->{minsnap}; 1250 if ($delta->{missing} > 0 || $deltatime > $statslen) { 1251 clear_deltas(); 1252 syslog('debug', "evaluating interrupt assignments"); 1253 next; 1254 } 1255 1256 1257 # 4. Incorporate new delta into the list of deltas, and associated 1258 # statistics. If we've just now received $statslen deltas, then it's 1259 # time to evaluate a reconfiguration. 1260 1261 $below_statslen = ($deltas_tottime < $statslen); 1262 $deltas_tottime += $deltatime; 1263 $do_reconfig = ($below_statslen && $deltas_tottime >= $statslen); 1264 push(@deltas, $delta); 1265 1266 # 5. Remove old deltas if total time is more than $statslen. We use 1267 # @deltas as a moving average of the last $statslen seconds. Shift 1268 # off the olders deltas, but only if that doesn't cause us to fall 1269 # below $statslen seconds. 1270 1271 while (@deltas > 1) { 1272 $olddelta = $deltas[0]; 1273 $olddeltatime = $olddelta->{maxsnap} - $olddelta->{minsnap}; 1274 $newtime = $deltas_tottime - $olddeltatime; 1275 last if ($newtime < $statslen); 1276 1277 shift(@deltas); 1278 $deltas_tottime = $newtime; 1279 } 1280 1281 # 6. The brains of the operation are here. First, check if we're 1282 # imbalanced, and if so set $do_reconfig. If $do_reconfig is set, 1283 # either because of imbalance or above in step 4, we evaluate a 1284 # new configuration. 1285 # 1286 # First, take @deltas and generate a single "compressed" delta 1287 # which summarizes them all. Pass that to do_reconfig and see 1288 # what it does with it: 1289 # 1290 # $ret == -1 : failure 1291 # $ret == 0 : current config is optimal (or close enough) 1292 # $ret == 1 : reconfiguration has occurred 1293 # 1294 # If $ret is -1 or 1, dump all our deltas and start from scratch. 1295 # Step 4 above will set do_reconfig soon thereafter. 1296 # 1297 # If $ret is 0, then nothing has happened because we're already 1298 # good enough. Set baseline_goodness to current goodness. 1299 1300 $compdelta = compress_deltas(\@deltas); 1301 if (VERIFY(ref($compdelta) eq "HASH", "couldn't compress deltas")) { 1302 clear_deltas(); 1303 next; 1304 } 1305 $compdelta->{goodness} = goodness($compdelta); 1306 dumpdelta($compdelta) if $debug; 1307 1308 $goodness = $compdelta->{goodness}; 1309 syslog('debug', "GOODNESS: %5.2f%%", $goodness * 100); 1310 1311 if ($deltas_tottime >= $statslen && 1312 imbalanced($goodness, $baseline_goodness)) { 1313 $do_reconfig = 1; 1314 } 1315 1316 if ($do_reconfig) { 1317 $ret = do_reconfig($compdelta); 1318 1319 if ($ret != 0) { 1320 clear_deltas(); 1321 syslog('debug', "do_reconfig FAILED!") if $ret == -1; 1322 } else { 1323 syslog('debug', "setting new baseline of $goodness"); 1324 $baseline_goodness = $goodness; 1325 } 1326 } 1327 syslog('debug', "---------------------------------------"); 1328} 1329