1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 30 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 31 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 32 * PSMI 1.5 extensions are supported in Solaris Nevada. 33 */ 34 #define PSMI_1_5 35 36 #include <sys/processor.h> 37 #include <sys/time.h> 38 #include <sys/psm.h> 39 #include <sys/smp_impldefs.h> 40 #include <sys/cram.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/psm_common.h> 44 #include "apic.h" 45 #include <sys/pit.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/ddi_impldefs.h> 49 #include <sys/pci.h> 50 #include <sys/promif.h> 51 #include <sys/x86_archext.h> 52 #include <sys/cpc_impl.h> 53 #include <sys/uadmin.h> 54 #include <sys/panic.h> 55 #include <sys/debug.h> 56 #include <sys/archsystm.h> 57 #include <sys/trap.h> 58 #include <sys/machsystm.h> 59 #include <sys/cpuvar.h> 60 #include <sys/rm_platter.h> 61 #include <sys/privregs.h> 62 #include <sys/cyclic.h> 63 #include <sys/note.h> 64 #include <sys/pci_intr_lib.h> 65 66 /* 67 * Local Function Prototypes 68 */ 69 static void apic_init_intr(); 70 static void apic_ret(); 71 static int apic_handle_defconf(); 72 static int apic_parse_mpct(caddr_t mpct, int bypass); 73 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 74 static int apic_checksum(caddr_t bptr, int len); 75 static int get_apic_cmd1(); 76 static int get_apic_pri(); 77 static int apic_find_bus_type(char *bus); 78 static int apic_find_bus(int busid); 79 static int apic_find_bus_id(int bustype); 80 static struct apic_io_intr *apic_find_io_intr(int irqno); 81 int apic_allocate_irq(int irq); 82 static int apic_find_free_irq(int start, int end); 83 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 84 static void apic_modify_vector(uchar_t vector, int irq); 85 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 86 static uchar_t apic_xlate_vector(uchar_t oldvector); 87 static void apic_xlate_vector_free_timeout_handler(void *arg); 88 static void apic_free_vector(uchar_t vector); 89 static void apic_reprogram_timeout_handler(void *arg); 90 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 91 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq, 92 int iflag, boolean_t *restore_intrp); 93 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq); 94 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq); 95 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 96 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 97 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 98 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 99 int child_ipin, struct apic_io_intr **intrp); 100 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 101 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 102 int type); 103 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 104 iflag_t *intr_flagp); 105 static void apic_nmi_intr(caddr_t arg); 106 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 107 uchar_t intin); 108 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, 109 int when); 110 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe); 111 static void apic_intr_redistribute(); 112 static void apic_cleanup_busy(); 113 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 114 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 115 116 /* ACPI support routines */ 117 static int acpi_probe(void); 118 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 119 int *pci_irqp, iflag_t *intr_flagp); 120 121 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 122 int ipin, int *pci_irqp, iflag_t *intr_flagp); 123 static uchar_t acpi_find_ioapic(int irq); 124 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 125 126 /* 127 * standard MP entries 128 */ 129 static int apic_probe(); 130 static int apic_clkinit(); 131 static int apic_getclkirq(int ipl); 132 static uint_t apic_calibrate(volatile uint32_t *addr, 133 uint16_t *pit_ticks_adj); 134 static hrtime_t apic_gettime(); 135 static hrtime_t apic_gethrtime(); 136 static void apic_init(); 137 static void apic_picinit(void); 138 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 139 static int apic_post_cpu_start(void); 140 static void apic_send_ipi(int cpun, int ipl); 141 static void apic_set_softintr(int softintr); 142 static void apic_set_idlecpu(processorid_t cpun); 143 static void apic_unset_idlecpu(processorid_t cpun); 144 static int apic_softlvl_to_irq(int ipl); 145 static int apic_intr_enter(int ipl, int *vect); 146 static void apic_intr_exit(int ipl, int vect); 147 static void apic_setspl(int ipl); 148 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 149 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 150 static void apic_shutdown(int cmd, int fcn); 151 static void apic_preshutdown(int cmd, int fcn); 152 static int apic_disable_intr(processorid_t cpun); 153 static void apic_enable_intr(processorid_t cpun); 154 static processorid_t apic_get_next_processorid(processorid_t cpun); 155 static int apic_get_ipivect(int ipl, int type); 156 static void apic_timer_reprogram(hrtime_t time); 157 static void apic_timer_enable(void); 158 static void apic_timer_disable(void); 159 static void apic_post_cyclic_setup(void *arg); 160 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 161 psm_intr_op_t, int *); 162 163 static int apic_oneshot = 0; 164 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 165 166 /* 167 * These variables are frequently accessed in apic_intr_enter(), 168 * apic_intr_exit and apic_setspl, so group them together 169 */ 170 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 171 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 172 int apic_clkvect; 173 174 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 175 int apic_sci_vect = -1; 176 iflag_t apic_sci_flags; 177 178 /* vector at which error interrupts come in */ 179 int apic_errvect; 180 int apic_enable_error_intr = 1; 181 int apic_error_display_delay = 100; 182 183 /* vector at which performance counter overflow interrupts come in */ 184 int apic_cpcovf_vect; 185 int apic_enable_cpcovf_intr = 1; 186 187 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */ 188 static int apic_max_usecs_clear_pending = 1000; 189 190 /* Amt of usecs to wait before checking if RDT flags have reset. */ 191 #define APIC_USECS_PER_WAIT_INTERVAL 100 192 193 /* Maximum number of times to retry reprogramming via the timeout */ 194 #define APIC_REPROGRAM_MAX_TIMEOUTS 10 195 196 /* timeout delay for IOAPIC delayed reprogramming */ 197 #define APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */ 198 199 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */ 200 #define DEFERRED 1 201 #define IMMEDIATE 0 202 203 /* 204 * number of bits per byte, from <sys/param.h> 205 */ 206 #define UCHAR_MAX ((1 << NBBY) - 1) 207 208 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ + 1]; 209 210 /* 211 * The following vector assignments influence the value of ipltopri and 212 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 213 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 214 * we care to do so in future. Note some IPLs which are rarely used 215 * will share the vector ranges and heavily used IPLs (5 and 6) have 216 * a wide range. 217 * IPL Vector range. as passed to intr_enter 218 * 0 none. 219 * 1,2,3 0x20-0x2f 0x0-0xf 220 * 4 0x30-0x3f 0x10-0x1f 221 * 5 0x40-0x5f 0x20-0x3f 222 * 6 0x60-0x7f 0x40-0x5f 223 * 7,8,9 0x80-0x8f 0x60-0x6f 224 * 10 0x90-0x9f 0x70-0x7f 225 * 11 0xa0-0xaf 0x80-0x8f 226 * ... ... 227 * 16 0xf0-0xff 0xd0-0xdf 228 */ 229 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 230 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 231 }; 232 /* 233 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 234 * NOTE that this is vector as passed into intr_enter which is 235 * programmed vector - 0x20 (APIC_BASE_VECT) 236 */ 237 238 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 239 /* The taskpri to be programmed into apic to mask given ipl */ 240 241 #if defined(__amd64) 242 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 243 #endif 244 245 /* 246 * Patchable global variables. 247 */ 248 int apic_forceload = 0; 249 250 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 251 #define INTR_ROUND_ROBIN 1 252 #define INTR_LOWEST_PRIORITY 2 253 254 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 255 256 static int apic_next_bind_cpu = 1; /* For round robin assignment */ 257 /* start with cpu 1 */ 258 259 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 260 /* 1 - use gettime() for performance */ 261 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 262 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 263 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 264 int apic_panic_on_nmi = 0; 265 int apic_panic_on_apic_error = 0; 266 267 int apic_verbose = 0; 268 269 /* Flag definitions for apic_verbose */ 270 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 271 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 272 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 273 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 274 275 276 #define APIC_VERBOSE_IOAPIC(fmt) \ 277 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 278 cmn_err fmt; 279 280 #define APIC_VERBOSE_IRQ(fmt) \ 281 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 282 cmn_err fmt; 283 284 #define APIC_VERBOSE_POWEROFF(fmt) \ 285 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 286 prom_printf fmt; 287 288 289 /* Now the ones for Dynamic Interrupt distribution */ 290 int apic_enable_dynamic_migration = 0; 291 292 /* 293 * If enabled, the distribution works as follows: 294 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 295 * and the irq corresponding to the ipl is also set in the aci_current array. 296 * interrupt exit and setspl (due to soft interrupts) will cause the current 297 * ipl to be be changed. This is cache friendly as these frequently used 298 * paths write into a per cpu structure. 299 * 300 * Sampling is done by checking the structures for all CPUs and incrementing 301 * the busy field of the irq (if any) executing on each CPU and the busy field 302 * of the corresponding CPU. 303 * In periodic mode this is done on every clock interrupt. 304 * In one-shot mode, this is done thru a cyclic with an interval of 305 * apic_redistribute_sample_interval (default 10 milli sec). 306 * 307 * Every apic_sample_factor_redistribution times we sample, we do computations 308 * to decide which interrupt needs to be migrated (see comments 309 * before apic_intr_redistribute(). 310 */ 311 312 /* 313 * Following 3 variables start as % and can be patched or set using an 314 * API to be defined in future. They will be scaled to 315 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 316 * mode), or 101 in one-shot mode to stagger it away from one sec processing 317 */ 318 319 int apic_int_busy_mark = 60; 320 int apic_int_free_mark = 20; 321 int apic_diff_for_redistribution = 10; 322 323 /* sampling interval for interrupt redistribution for dynamic migration */ 324 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 325 326 /* 327 * number of times we sample before deciding to redistribute interrupts 328 * for dynamic migration 329 */ 330 int apic_sample_factor_redistribution = 101; 331 332 /* timeout for xlate_vector, mark_vector */ 333 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 334 335 int apic_redist_cpu_skip = 0; 336 int apic_num_imbalance = 0; 337 int apic_num_rebind = 0; 338 339 int apic_nproc = 0; 340 int apic_defconf = 0; 341 int apic_irq_translate = 0; 342 int apic_spec_rev = 0; 343 int apic_imcrp = 0; 344 345 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 346 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 347 348 /* 349 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 350 * will be assigned (via _SRS). If it is not set, use the current 351 * irq setting (via _CRS), but only if that irq is in the set of possible 352 * irqs (returned by _PRS) for the device. 353 */ 354 int apic_unconditional_srs = 1; 355 356 /* 357 * For interrupt link devices, if apic_prefer_crs is set when we are 358 * assigning an IRQ resource to a device, prefer the current IRQ setting 359 * over other possible irq settings under same conditions. 360 */ 361 362 int apic_prefer_crs = 1; 363 364 365 /* minimum number of timer ticks to program to */ 366 int apic_min_timer_ticks = 1; 367 /* 368 * Local static data 369 */ 370 static struct psm_ops apic_ops = { 371 apic_probe, 372 373 apic_init, 374 apic_picinit, 375 apic_intr_enter, 376 apic_intr_exit, 377 apic_setspl, 378 apic_addspl, 379 apic_delspl, 380 apic_disable_intr, 381 apic_enable_intr, 382 apic_softlvl_to_irq, 383 apic_set_softintr, 384 385 apic_set_idlecpu, 386 apic_unset_idlecpu, 387 388 apic_clkinit, 389 apic_getclkirq, 390 (void (*)(void))NULL, /* psm_hrtimeinit */ 391 apic_gethrtime, 392 393 apic_get_next_processorid, 394 apic_cpu_start, 395 apic_post_cpu_start, 396 apic_shutdown, 397 apic_get_ipivect, 398 apic_send_ipi, 399 400 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 401 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 402 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 403 (void (*)(int, char *))NULL, /* psm_notify_error */ 404 (void (*)(int))NULL, /* psm_notify_func */ 405 apic_timer_reprogram, 406 apic_timer_enable, 407 apic_timer_disable, 408 apic_post_cyclic_setup, 409 apic_preshutdown, 410 apic_intr_ops /* Advanced DDI Interrupt framework */ 411 }; 412 413 414 static struct psm_info apic_psm_info = { 415 PSM_INFO_VER01_5, /* version */ 416 PSM_OWN_EXCLUSIVE, /* ownership */ 417 (struct psm_ops *)&apic_ops, /* operation */ 418 "pcplusmp", /* machine name */ 419 "pcplusmp v1.4 compatible %I%", 420 }; 421 422 static void *apic_hdlp; 423 424 #ifdef DEBUG 425 #define DENT 0x0001 426 int apic_debug = 0; 427 /* 428 * set apic_restrict_vector to the # of vectors we want to allow per range 429 * useful in testing shared interrupt logic by setting it to 2 or 3 430 */ 431 int apic_restrict_vector = 0; 432 433 #define APIC_DEBUG_MSGBUFSIZE 2048 434 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 435 int apic_debug_msgbufindex = 0; 436 437 /* 438 * Put "int" info into debug buffer. No MP consistency, but light weight. 439 * Good enough for most debugging. 440 */ 441 #define APIC_DEBUG_BUF_PUT(x) \ 442 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 443 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 444 apic_debug_msgbufindex = 0; 445 446 #endif /* DEBUG */ 447 448 apic_cpus_info_t *apic_cpus; 449 450 static cpuset_t apic_cpumask; 451 static uint_t apic_flag; 452 453 /* Flag to indicate that we need to shut down all processors */ 454 static uint_t apic_shutdown_processors; 455 456 uint_t apic_nsec_per_intr = 0; 457 458 /* 459 * apic_let_idle_redistribute can have the following values: 460 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 461 * apic_redistribute_lock prevents multiple idle cpus from redistributing 462 */ 463 int apic_num_idle_redistributions = 0; 464 static int apic_let_idle_redistribute = 0; 465 static uint_t apic_nticks = 0; 466 static uint_t apic_skipped_redistribute = 0; 467 468 /* to gather intr data and redistribute */ 469 static void apic_redistribute_compute(void); 470 471 static uint_t last_count_read = 0; 472 static lock_t apic_gethrtime_lock; 473 volatile int apic_hrtime_stamp = 0; 474 volatile hrtime_t apic_nsec_since_boot = 0; 475 static uint_t apic_hertz_count; 476 477 uint64_t apic_ticks_per_SFnsecs; /* # of ticks in SF nsecs */ 478 479 static hrtime_t apic_nsec_max; 480 481 static hrtime_t apic_last_hrtime = 0; 482 int apic_hrtime_error = 0; 483 int apic_remote_hrterr = 0; 484 int apic_num_nmis = 0; 485 int apic_apic_error = 0; 486 int apic_num_apic_errors = 0; 487 int apic_num_cksum_errors = 0; 488 489 static uchar_t apic_io_id[MAX_IO_APIC]; 490 static uchar_t apic_io_ver[MAX_IO_APIC]; 491 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 492 static uchar_t apic_io_vectend[MAX_IO_APIC]; 493 volatile int32_t *apicioadr[MAX_IO_APIC]; 494 495 /* 496 * First available slot to be used as IRQ index into the apic_irq_table 497 * for those interrupts (like MSI/X) that don't have a physical IRQ. 498 */ 499 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 500 501 /* 502 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 503 * and bound elements of cpus_info and the temp_cpu element of irq_struct 504 */ 505 lock_t apic_ioapic_lock; 506 507 /* 508 * apic_ioapic_reprogram_lock prevents a CPU from exiting 509 * apic_intr_exit before IOAPIC reprogramming information 510 * is collected. 511 */ 512 static lock_t apic_ioapic_reprogram_lock; 513 static int apic_io_max = 0; /* no. of i/o apics enabled */ 514 515 static struct apic_io_intr *apic_io_intrp = 0; 516 static struct apic_bus *apic_busp; 517 518 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 519 static uchar_t apic_resv_vector[MAXIPL+1]; 520 521 static char apic_level_intr[APIC_MAX_VECTOR+1]; 522 static int apic_error = 0; 523 /* values which apic_error can take. Not catastrophic, but may help debug */ 524 #define APIC_ERR_BOOT_EOI 0x1 525 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 526 #define APIC_ERR_INVALID_INDEX 0x4 527 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 528 #define APIC_ERR_APIC_ERROR 0x40000000 529 #define APIC_ERR_NMI 0x80000000 530 531 static int apic_cmos_ssb_set = 0; 532 533 static uint32_t eisa_level_intr_mask = 0; 534 /* At least MSB will be set if EISA bus */ 535 536 static int apic_pci_bus_total = 0; 537 static uchar_t apic_single_pci_busid = 0; 538 539 540 /* 541 * airq_mutex protects additions to the apic_irq_table - the first 542 * pointer and any airq_nexts off of that one. It also protects 543 * apic_max_device_irq & apic_min_device_irq. It also guarantees 544 * that share_id is unique as new ids are generated only when new 545 * irq_t structs are linked in. Once linked in the structs are never 546 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 547 * or allocated. Note that there is a slight gap between allocating in 548 * apic_introp_xlate and programming in addspl. 549 */ 550 kmutex_t airq_mutex; 551 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 552 int apic_max_device_irq = 0; 553 int apic_min_device_irq = APIC_MAX_VECTOR; 554 555 /* use to make sure only one cpu handles the nmi */ 556 static lock_t apic_nmi_lock; 557 /* use to make sure only one cpu handles the error interrupt */ 558 static lock_t apic_error_lock; 559 560 /* 561 * Following declarations are for revectoring; used when ISRs at different 562 * IPLs share an irq. 563 */ 564 static lock_t apic_revector_lock; 565 static int apic_revector_pending = 0; 566 static uchar_t *apic_oldvec_to_newvec; 567 static uchar_t *apic_newvec_to_oldvec; 568 569 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */ 570 static kmutex_t apic_reprogram_timeout_mutex; 571 572 static struct ioapic_reprogram_data { 573 int valid; /* This entry is valid */ 574 int bindcpu; /* The CPU to which the int will be bound */ 575 unsigned timeouts; /* # times the reprogram timeout was called */ 576 } apic_reprogram_info[APIC_MAX_VECTOR+1]; 577 /* 578 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 579 * is indexed by IRQ number, NOT by vector number. 580 */ 581 582 583 /* 584 * The following added to identify a software poweroff method if available. 585 */ 586 587 static struct { 588 int poweroff_method; 589 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 590 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 591 } apic_mps_ids[] = { 592 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 593 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 594 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 595 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 596 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 597 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 598 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 599 }; 600 601 int apic_poweroff_method = APIC_POWEROFF_NONE; 602 603 static struct { 604 uchar_t cntl; 605 uchar_t data; 606 } aspen_bmc[] = { 607 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 608 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 609 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 610 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 611 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 612 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 613 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 614 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 615 616 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 617 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 618 }; 619 620 static struct { 621 int port; 622 uchar_t data; 623 } sitka_bmc[] = { 624 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 625 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 626 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 627 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 628 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 629 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 630 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 631 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 632 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 633 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 634 635 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 636 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 637 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 638 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 639 }; 640 641 642 /* Patchable global variables. */ 643 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 644 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 645 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ 646 647 /* 648 * ACPI definitions 649 */ 650 /* _PIC method arguments */ 651 #define ACPI_PIC_MODE 0 652 #define ACPI_APIC_MODE 1 653 654 /* APIC error flags we care about */ 655 #define APIC_SEND_CS_ERROR 0x01 656 #define APIC_RECV_CS_ERROR 0x02 657 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 658 659 /* 660 * ACPI variables 661 */ 662 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 663 static int apic_enable_acpi = 0; 664 665 /* ACPI Multiple APIC Description Table ptr */ 666 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 667 668 /* ACPI Interrupt Source Override Structure ptr */ 669 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 670 static int acpi_iso_cnt = 0; 671 672 /* ACPI Non-maskable Interrupt Sources ptr */ 673 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 674 static int acpi_nmi_scnt = 0; 675 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 676 static int acpi_nmi_ccnt = 0; 677 678 /* 679 * extern declarations 680 */ 681 extern int intr_clear(void); 682 extern void intr_restore(uint_t); 683 #if defined(__amd64) 684 extern int intpri_use_cr8; 685 #endif /* __amd64 */ 686 687 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 688 int, int, int); 689 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 690 extern int apic_pci_msi_unconfigure(dev_info_t *, int, int); 691 extern int apic_pci_msi_disable_mode(dev_info_t *, int, int); 692 extern int apic_pci_msi_enable_mode(dev_info_t *, int, int); 693 694 /* 695 * This is the loadable module wrapper 696 */ 697 698 int 699 _init(void) 700 { 701 if (apic_coarse_hrtime) 702 apic_ops.psm_gethrtime = &apic_gettime; 703 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 704 } 705 706 int 707 _fini(void) 708 { 709 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 710 } 711 712 int 713 _info(struct modinfo *modinfop) 714 { 715 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 716 } 717 718 /* 719 * Auto-configuration routines 720 */ 721 722 /* 723 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 724 * May work with 1.1 - but not guaranteed. 725 * According to the MP Spec, the MP floating pointer structure 726 * will be searched in the order described below: 727 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 728 * 2. Within the last kilobyte of system base memory 729 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 730 * Once we find the right signature with proper checksum, we call 731 * either handle_defconf or parse_mpct to get all info necessary for 732 * subsequent operations. 733 */ 734 static int 735 apic_probe() 736 { 737 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 738 caddr_t biosdatap; 739 caddr_t mpct; 740 caddr_t fptr; 741 int i, mpct_size, mapsize, retval = PSM_FAILURE; 742 ushort_t ebda_seg, base_mem_size; 743 struct apic_mpfps_hdr *fpsp; 744 struct apic_mp_cnf_hdr *hdrp; 745 int bypass_cpu_and_ioapics_in_mptables; 746 int acpi_user_options; 747 748 if (apic_forceload < 0) 749 return (retval); 750 751 /* Allow override for MADT-only mode */ 752 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 753 "acpi-user-options", 0); 754 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 755 756 /* Allow apic_use_acpi to override MADT-only mode */ 757 if (!apic_use_acpi) 758 apic_use_acpi_madt_only = 0; 759 760 retval = acpi_probe(); 761 762 /* 763 * mapin the bios data area 40:0 764 * 40:13h - two-byte location reports the base memory size 765 * 40:0Eh - two-byte location for the exact starting address of 766 * the EBDA segment for EISA 767 */ 768 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 769 if (!biosdatap) 770 return (retval); 771 fpsp = (struct apic_mpfps_hdr *)NULL; 772 mapsize = MPFPS_RAM_WIN_LEN; 773 /*LINTED: pointer cast may result in improper alignment */ 774 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 775 /* check the 1k of EBDA */ 776 if (ebda_seg) { 777 ebda_start = ((uint32_t)ebda_seg) << 4; 778 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 779 if (fptr) { 780 if (!(fpsp = 781 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 782 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 783 } 784 } 785 /* If not in EBDA, check the last k of system base memory */ 786 if (!fpsp) { 787 /*LINTED: pointer cast may result in improper alignment */ 788 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 789 790 if (base_mem_size > 512) 791 base_mem_end = 639 * 1024; 792 else 793 base_mem_end = 511 * 1024; 794 /* if ebda == last k of base mem, skip to check BIOS ROM */ 795 if (base_mem_end != ebda_start) { 796 797 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 798 PROT_READ); 799 800 if (fptr) { 801 if (!(fpsp = apic_find_fps_sig(fptr, 802 MPFPS_RAM_WIN_LEN))) 803 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 804 } 805 } 806 } 807 psm_unmap_phys(biosdatap, 0x20); 808 809 /* If still cannot find it, check the BIOS ROM space */ 810 if (!fpsp) { 811 mapsize = MPFPS_ROM_WIN_LEN; 812 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 813 MPFPS_ROM_WIN_LEN, PROT_READ); 814 if (fptr) { 815 if (!(fpsp = 816 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 817 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 818 return (retval); 819 } 820 } 821 } 822 823 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 824 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 825 return (retval); 826 } 827 828 apic_spec_rev = fpsp->mpfps_spec_rev; 829 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 830 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 831 return (retval); 832 } 833 834 /* check IMCR is present or not */ 835 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 836 837 /* check default configuration (dual CPUs) */ 838 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 839 psm_unmap_phys(fptr, mapsize); 840 return (apic_handle_defconf()); 841 } 842 843 /* MP Configuration Table */ 844 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 845 846 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 847 848 /* 849 * Map in enough memory for the MP Configuration Table Header. 850 * Use this table to read the total length of the BIOS data and 851 * map in all the info 852 */ 853 /*LINTED: pointer cast may result in improper alignment */ 854 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 855 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 856 if (!hdrp) 857 return (retval); 858 859 /* check mp configuration table signature PCMP */ 860 if (hdrp->mpcnf_sig != 0x504d4350) { 861 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 862 return (retval); 863 } 864 mpct_size = (int)hdrp->mpcnf_tbl_length; 865 866 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 867 868 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 869 870 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 871 /* This is an ACPI machine No need for further checks */ 872 return (retval); 873 } 874 875 /* 876 * Map in the entries for this machine, ie. Processor 877 * Entry Tables, Bus Entry Tables, etc. 878 * They are in fixed order following one another 879 */ 880 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 881 if (!mpct) 882 return (retval); 883 884 if (apic_checksum(mpct, mpct_size) != 0) 885 goto apic_fail1; 886 887 888 /*LINTED: pointer cast may result in improper alignment */ 889 hdrp = (struct apic_mp_cnf_hdr *)mpct; 890 /*LINTED: pointer cast may result in improper alignment */ 891 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 892 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 893 if (!apicadr) 894 goto apic_fail1; 895 896 /* Parse all information in the tables */ 897 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 898 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 899 PSM_SUCCESS) 900 return (PSM_SUCCESS); 901 902 for (i = 0; i < apic_io_max; i++) 903 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 904 if (apic_cpus) 905 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 906 if (apicadr) 907 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 908 apic_fail1: 909 psm_unmap_phys(mpct, mpct_size); 910 return (retval); 911 } 912 913 static void 914 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 915 { 916 int i; 917 918 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 919 i++) { 920 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 921 strlen(apic_mps_ids[i].oem_id)) == 0) && 922 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 923 strlen(apic_mps_ids[i].prod_id)) == 0)) { 924 925 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 926 break; 927 } 928 } 929 930 if (apic_debug_mps_id != 0) { 931 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 932 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 933 hdrp->mpcnf_oem_str[0], 934 hdrp->mpcnf_oem_str[1], 935 hdrp->mpcnf_oem_str[2], 936 hdrp->mpcnf_oem_str[3], 937 hdrp->mpcnf_oem_str[4], 938 hdrp->mpcnf_oem_str[5], 939 hdrp->mpcnf_oem_str[6], 940 hdrp->mpcnf_oem_str[7], 941 hdrp->mpcnf_prod_str[0], 942 hdrp->mpcnf_prod_str[1], 943 hdrp->mpcnf_prod_str[2], 944 hdrp->mpcnf_prod_str[3], 945 hdrp->mpcnf_prod_str[4], 946 hdrp->mpcnf_prod_str[5], 947 hdrp->mpcnf_prod_str[6], 948 hdrp->mpcnf_prod_str[7], 949 hdrp->mpcnf_prod_str[8], 950 hdrp->mpcnf_prod_str[9], 951 hdrp->mpcnf_prod_str[10], 952 hdrp->mpcnf_prod_str[11]); 953 } 954 } 955 956 static int 957 acpi_probe(void) 958 { 959 int i, id, intmax, ver, index, rv; 960 int acpi_verboseflags = 0; 961 int madt_seen, madt_size; 962 APIC_HEADER *ap; 963 MADT_PROCESSOR_APIC *mpa; 964 MADT_IO_APIC *mia; 965 MADT_IO_SAPIC *misa; 966 MADT_INTERRUPT_OVERRIDE *mio; 967 MADT_NMI_SOURCE *mns; 968 MADT_INTERRUPT_SOURCE *mis; 969 MADT_LOCAL_APIC_NMI *mlan; 970 MADT_ADDRESS_OVERRIDE *mao; 971 ACPI_OBJECT_LIST arglist; 972 ACPI_OBJECT arg; 973 int sci; 974 iflag_t sci_flags; 975 volatile int32_t *ioapic; 976 char local_ids[NCPU]; 977 char proc_ids[NCPU]; 978 uchar_t hid; 979 980 if (!apic_use_acpi) 981 return (PSM_FAILURE); 982 983 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 984 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 985 return (PSM_FAILURE); 986 987 apicadr = (uint32_t *)psm_map_phys( 988 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 989 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 990 if (!apicadr) 991 return (PSM_FAILURE); 992 993 id = apicadr[APIC_LID_REG]; 994 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 995 apic_nproc = index = 1; 996 CPUSET_ONLY(apic_cpumask, 0); 997 apic_io_max = 0; 998 999 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 1000 madt_size = acpi_mapic_dtp->Length; 1001 madt_seen = sizeof (*acpi_mapic_dtp); 1002 1003 while (madt_seen < madt_size) { 1004 switch (ap->Type) { 1005 case APIC_PROCESSOR: 1006 mpa = (MADT_PROCESSOR_APIC *) ap; 1007 if (mpa->ProcessorEnabled) { 1008 if (mpa->LocalApicId == local_ids[0]) 1009 proc_ids[0] = mpa->ProcessorId; 1010 else if (apic_nproc < NCPU) { 1011 local_ids[index] = mpa->LocalApicId; 1012 proc_ids[index] = mpa->ProcessorId; 1013 CPUSET_ADD(apic_cpumask, index); 1014 index++; 1015 apic_nproc++; 1016 } else 1017 cmn_err(CE_WARN, "pcplusmp: exceeded " 1018 "maximum no. of CPUs (= %d)", NCPU); 1019 } 1020 break; 1021 1022 case APIC_IO: 1023 mia = (MADT_IO_APIC *) ap; 1024 if (apic_io_max < MAX_IO_APIC) { 1025 apic_io_id[apic_io_max] = mia->IoApicId; 1026 apic_io_vectbase[apic_io_max] = 1027 mia->Interrupt; 1028 ioapic = apicioadr[apic_io_max] = 1029 (int32_t *)psm_map_phys( 1030 (uint32_t)mia->Address, 1031 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1032 if (!ioapic) 1033 goto cleanup; 1034 apic_io_max++; 1035 } 1036 break; 1037 1038 case APIC_XRUPT_OVERRIDE: 1039 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1040 if (acpi_isop == NULL) 1041 acpi_isop = mio; 1042 acpi_iso_cnt++; 1043 break; 1044 1045 case APIC_NMI: 1046 /* UNIMPLEMENTED */ 1047 mns = (MADT_NMI_SOURCE *) ap; 1048 if (acpi_nmi_sp == NULL) 1049 acpi_nmi_sp = mns; 1050 acpi_nmi_scnt++; 1051 1052 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1053 mns->Interrupt, mns->Polarity, 1054 mns->TriggerMode); 1055 break; 1056 1057 case APIC_LOCAL_NMI: 1058 /* UNIMPLEMENTED */ 1059 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1060 if (acpi_nmi_cp == NULL) 1061 acpi_nmi_cp = mlan; 1062 acpi_nmi_ccnt++; 1063 1064 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1065 mlan->ProcessorId, mlan->Polarity, 1066 mlan->TriggerMode, mlan->Lint); 1067 break; 1068 1069 case APIC_ADDRESS_OVERRIDE: 1070 /* UNIMPLEMENTED */ 1071 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1072 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1073 (long)mao->Address); 1074 break; 1075 1076 case APIC_IO_SAPIC: 1077 /* UNIMPLEMENTED */ 1078 misa = (MADT_IO_SAPIC *) ap; 1079 1080 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1081 misa->IoSapicId, misa->InterruptBase, 1082 (long)misa->Address); 1083 break; 1084 1085 case APIC_XRUPT_SOURCE: 1086 /* UNIMPLEMENTED */ 1087 mis = (MADT_INTERRUPT_SOURCE *) ap; 1088 1089 cmn_err(CE_NOTE, 1090 "!apic: irq source: %d %d %d %d %d %d %d\n", 1091 mis->ProcessorId, mis->ProcessorEid, 1092 mis->Interrupt, mis->Polarity, 1093 mis->TriggerMode, mis->InterruptType, 1094 mis->IoSapicVector); 1095 break; 1096 case APIC_RESERVED: 1097 default: 1098 break; /* ignore unknown items as per ACPI spec */ 1099 } 1100 1101 /* advance to next entry */ 1102 madt_seen += ap->Length; 1103 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1104 } 1105 1106 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1107 KM_NOSLEEP)) == NULL) 1108 goto cleanup; 1109 1110 /* 1111 * ACPI doesn't provide the local apic ver, get it directly from the 1112 * local apic 1113 */ 1114 ver = apicadr[APIC_VERS_REG]; 1115 for (i = 0; i < apic_nproc; i++) { 1116 apic_cpus[i].aci_local_id = local_ids[i]; 1117 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1118 } 1119 for (i = 0; i < apic_io_max; i++) { 1120 ioapic = apicioadr[i]; 1121 1122 /* 1123 * need to check Sitka on the following acpi problem 1124 * On the Sitka, the ioapic's apic_id field isn't reporting 1125 * the actual io apic id. We have reported this problem 1126 * to Intel. Until they fix the problem, we will get the 1127 * actual id directly from the ioapic. 1128 */ 1129 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1130 id = ioapic[APIC_IO_DATA]; 1131 hid = (uchar_t)(((uint_t)id) >> 24); 1132 1133 if (hid != apic_io_id[i]) { 1134 if (apic_io_id[i] == 0) 1135 apic_io_id[i] = hid; 1136 else { /* set ioapic id to whatever reported by ACPI */ 1137 id = ((int32_t)apic_io_id[i]) << 24; 1138 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1139 ioapic[APIC_IO_DATA] = id; 1140 } 1141 } 1142 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1143 ver = ioapic[APIC_IO_DATA]; 1144 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1145 intmax = (ver >> 16) & 0xff; 1146 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1147 if (apic_first_avail_irq <= apic_io_vectend[i]) 1148 apic_first_avail_irq = apic_io_vectend[i] + 1; 1149 } 1150 1151 1152 /* 1153 * Process SCI configuration here 1154 * An error may be returned here if 1155 * acpi-user-options specifies legacy mode 1156 * (no SCI, no ACPI mode) 1157 */ 1158 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1159 sci = -1; 1160 1161 /* 1162 * Now call acpi_init() to generate namespaces 1163 * If this fails, we don't attempt to use ACPI 1164 * even if we were able to get a MADT above 1165 */ 1166 if (acpica_init() != AE_OK) 1167 goto cleanup; 1168 1169 /* 1170 * Squirrel away the SCI and flags for later on 1171 * in apic_picinit() when we're ready 1172 */ 1173 apic_sci_vect = sci; 1174 apic_sci_flags = sci_flags; 1175 1176 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1177 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1178 1179 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1180 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1181 1182 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1183 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1184 1185 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1186 ACPI_PSM_FAILURE) 1187 goto cleanup; 1188 1189 /* Enable ACPI APIC interrupt routing */ 1190 arglist.Count = 1; 1191 arglist.Pointer = &arg; 1192 arg.Type = ACPI_TYPE_INTEGER; 1193 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1194 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1195 if (rv == AE_OK) { 1196 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1197 apic_enable_acpi = 1; 1198 if (apic_use_acpi_madt_only) { 1199 cmn_err(CE_CONT, 1200 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1201 } 1202 return (PSM_SUCCESS); 1203 } 1204 /* if setting APIC mode failed above, we fall through to cleanup */ 1205 1206 cleanup: 1207 if (apicadr != NULL) { 1208 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1209 apicadr = NULL; 1210 } 1211 apic_nproc = 0; 1212 for (i = 0; i < apic_io_max; i++) { 1213 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1214 apicioadr[i] = NULL; 1215 } 1216 apic_io_max = 0; 1217 acpi_isop = NULL; 1218 acpi_iso_cnt = 0; 1219 acpi_nmi_sp = NULL; 1220 acpi_nmi_scnt = 0; 1221 acpi_nmi_cp = NULL; 1222 acpi_nmi_ccnt = 0; 1223 return (PSM_FAILURE); 1224 } 1225 1226 /* 1227 * Handle default configuration. Fill in reqd global variables & tables 1228 * Fill all details as MP table does not give any more info 1229 */ 1230 static int 1231 apic_handle_defconf() 1232 { 1233 uint_t lid; 1234 1235 /*LINTED: pointer cast may result in improper alignment */ 1236 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1237 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1238 /*LINTED: pointer cast may result in improper alignment */ 1239 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1240 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1241 apic_cpus = (apic_cpus_info_t *) 1242 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1243 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1244 goto apic_handle_defconf_fail; 1245 CPUSET_ONLY(apic_cpumask, 0); 1246 CPUSET_ADD(apic_cpumask, 1); 1247 apic_nproc = 2; 1248 lid = apicadr[APIC_LID_REG]; 1249 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1250 /* 1251 * According to the PC+MP spec 1.1, the local ids 1252 * for the default configuration has to be 0 or 1 1253 */ 1254 if (apic_cpus[0].aci_local_id == 1) 1255 apic_cpus[1].aci_local_id = 0; 1256 else if (apic_cpus[0].aci_local_id == 0) 1257 apic_cpus[1].aci_local_id = 1; 1258 else 1259 goto apic_handle_defconf_fail; 1260 1261 apic_io_id[0] = 2; 1262 apic_io_max = 1; 1263 if (apic_defconf >= 5) { 1264 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1265 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1266 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1267 } else { 1268 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1269 apic_cpus[1].aci_local_ver = 0; 1270 apic_io_ver[0] = 0; 1271 } 1272 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1273 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1274 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1275 return (PSM_SUCCESS); 1276 1277 apic_handle_defconf_fail: 1278 if (apic_cpus) 1279 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1280 if (apicadr) 1281 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1282 if (apicioadr[0]) 1283 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1284 return (PSM_FAILURE); 1285 } 1286 1287 /* Parse the entries in MP configuration table and collect info that we need */ 1288 static int 1289 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1290 { 1291 struct apic_procent *procp; 1292 struct apic_bus *busp; 1293 struct apic_io_entry *ioapicp; 1294 struct apic_io_intr *intrp; 1295 volatile int32_t *ioapic; 1296 uint_t lid; 1297 int id; 1298 uchar_t hid; 1299 1300 /*LINTED: pointer cast may result in improper alignment */ 1301 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1302 1303 /* No need to count cpu entries if we won't use them */ 1304 if (!bypass_cpus_and_ioapics) { 1305 1306 /* Find max # of CPUS and allocate structure accordingly */ 1307 apic_nproc = 0; 1308 CPUSET_ZERO(apic_cpumask); 1309 while (procp->proc_entry == APIC_CPU_ENTRY) { 1310 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1311 if (apic_nproc < NCPU) 1312 CPUSET_ADD(apic_cpumask, apic_nproc); 1313 apic_nproc++; 1314 } 1315 procp++; 1316 } 1317 if (apic_nproc > NCPU) 1318 cmn_err(CE_WARN, "pcplusmp: exceeded " 1319 "maximum no. of CPUs (= %d)", NCPU); 1320 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1321 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1322 return (PSM_FAILURE); 1323 } 1324 1325 /*LINTED: pointer cast may result in improper alignment */ 1326 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1327 1328 /* 1329 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1330 * if we're bypassing this information, it has already been filled 1331 * in by acpi_probe(), so don't overwrite it. 1332 */ 1333 if (!bypass_cpus_and_ioapics) 1334 apic_nproc = 1; 1335 1336 while (procp->proc_entry == APIC_CPU_ENTRY) { 1337 /* check whether the cpu exists or not */ 1338 if (!bypass_cpus_and_ioapics && 1339 procp->proc_cpuflags & CPUFLAGS_EN) { 1340 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1341 lid = apicadr[APIC_LID_REG]; 1342 apic_cpus[0].aci_local_id = procp->proc_apicid; 1343 if (apic_cpus[0].aci_local_id != 1344 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1345 return (PSM_FAILURE); 1346 } 1347 apic_cpus[0].aci_local_ver = 1348 procp->proc_version; 1349 } else { 1350 1351 apic_cpus[apic_nproc].aci_local_id = 1352 procp->proc_apicid; 1353 apic_cpus[apic_nproc].aci_local_ver = 1354 procp->proc_version; 1355 apic_nproc++; 1356 1357 } 1358 } 1359 procp++; 1360 } 1361 1362 /* 1363 * Save start of bus entries for later use. 1364 * Get EISA level cntrl if EISA bus is present. 1365 * Also get the CPI bus id for single CPI bus case 1366 */ 1367 apic_busp = busp = (struct apic_bus *)procp; 1368 while (busp->bus_entry == APIC_BUS_ENTRY) { 1369 lid = apic_find_bus_type((char *)&busp->bus_str1); 1370 if (lid == BUS_EISA) { 1371 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1372 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1373 } else if (lid == BUS_PCI) { 1374 /* 1375 * apic_single_pci_busid will be used only if 1376 * apic_pic_bus_total is equal to 1 1377 */ 1378 apic_pci_bus_total++; 1379 apic_single_pci_busid = busp->bus_id; 1380 } 1381 busp++; 1382 } 1383 1384 ioapicp = (struct apic_io_entry *)busp; 1385 1386 if (!bypass_cpus_and_ioapics) 1387 apic_io_max = 0; 1388 do { 1389 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1390 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1391 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1392 apic_io_ver[apic_io_max] = ioapicp->io_version; 1393 /*LINTED: pointer cast may result in improper alignment */ 1394 apicioadr[apic_io_max] = 1395 (int32_t *)psm_map_phys( 1396 (uint32_t)ioapicp->io_apic_addr, 1397 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1398 1399 if (!apicioadr[apic_io_max]) 1400 return (PSM_FAILURE); 1401 1402 ioapic = apicioadr[apic_io_max]; 1403 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1404 id = ioapic[APIC_IO_DATA]; 1405 hid = (uchar_t)(((uint_t)id) >> 24); 1406 1407 if (hid != apic_io_id[apic_io_max]) { 1408 if (apic_io_id[apic_io_max] == 0) 1409 apic_io_id[apic_io_max] = hid; 1410 else { 1411 /* 1412 * set ioapic id to whatever 1413 * reported by MPS 1414 * 1415 * may not need to set index 1416 * again ??? 1417 * take it out and try 1418 */ 1419 1420 id = ((int32_t) 1421 apic_io_id[apic_io_max]) << 1422 24; 1423 1424 ioapic[APIC_IO_REG] = 1425 APIC_ID_CMD; 1426 1427 ioapic[APIC_IO_DATA] = id; 1428 1429 } 1430 } 1431 apic_io_max++; 1432 } 1433 } 1434 ioapicp++; 1435 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1436 1437 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1438 1439 intrp = apic_io_intrp; 1440 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1441 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1442 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1443 apic_irq_translate = 1; 1444 break; 1445 } 1446 intrp++; 1447 } 1448 1449 return (PSM_SUCCESS); 1450 } 1451 1452 boolean_t 1453 apic_cpu_in_range(int cpu) 1454 { 1455 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1456 } 1457 1458 static struct apic_mpfps_hdr * 1459 apic_find_fps_sig(caddr_t cptr, int len) 1460 { 1461 int i; 1462 1463 /* Look for the pattern "_MP_" */ 1464 for (i = 0; i < len; i += 16) { 1465 if ((*(cptr+i) == '_') && 1466 (*(cptr+i+1) == 'M') && 1467 (*(cptr+i+2) == 'P') && 1468 (*(cptr+i+3) == '_')) 1469 /*LINTED: pointer cast may result in improper alignment */ 1470 return ((struct apic_mpfps_hdr *)(cptr + i)); 1471 } 1472 return (NULL); 1473 } 1474 1475 static int 1476 apic_checksum(caddr_t bptr, int len) 1477 { 1478 int i; 1479 uchar_t cksum; 1480 1481 cksum = 0; 1482 for (i = 0; i < len; i++) 1483 cksum += *bptr++; 1484 return ((int)cksum); 1485 } 1486 1487 1488 /* 1489 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1490 * are also set to NULL. vector->irq is set to a value which cannot map 1491 * to a real irq to show that it is free. 1492 */ 1493 void 1494 apic_init() 1495 { 1496 int i; 1497 int *iptr; 1498 1499 int j = 1; 1500 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1501 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1502 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1503 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1504 /* get to highest vector at the same ipl */ 1505 continue; 1506 for (; j <= apic_vectortoipl[i]; j++) { 1507 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1508 APIC_BASE_VECT; 1509 } 1510 } 1511 for (; j < MAXIPL + 1; j++) 1512 /* fill up any empty ipltopri slots */ 1513 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1514 1515 /* cpu 0 is always up */ 1516 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1517 1518 iptr = (int *)&apic_irq_table[0]; 1519 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1520 apic_level_intr[i] = 0; 1521 *iptr++ = NULL; 1522 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1523 apic_reprogram_info[i].valid = 0; 1524 apic_reprogram_info[i].bindcpu = 0; 1525 apic_reprogram_info[i].timeouts = 0; 1526 } 1527 1528 /* 1529 * Allocate a dummy irq table entry for the reserved entry. 1530 * This takes care of the race between removing an irq and 1531 * clock detecting a CPU in that irq during interrupt load 1532 * sampling. 1533 */ 1534 apic_irq_table[APIC_RESV_IRQ] = 1535 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1536 1537 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1538 mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL); 1539 #if defined(__amd64) 1540 /* 1541 * Make cpu-specific interrupt info point to cr8pri vector 1542 */ 1543 for (i = 0; i <= MAXIPL; i++) 1544 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1545 CPU->cpu_pri_data = apic_cr8pri; 1546 intpri_use_cr8 = 1; 1547 #endif /* __amd64 */ 1548 } 1549 1550 /* 1551 * handler for APIC Error interrupt. Just print a warning and continue 1552 */ 1553 static int 1554 apic_error_intr() 1555 { 1556 uint_t error0, error1, error; 1557 uint_t i; 1558 1559 /* 1560 * We need to write before read as per 7.4.17 of system prog manual. 1561 * We do both and or the results to be safe 1562 */ 1563 error0 = apicadr[APIC_ERROR_STATUS]; 1564 apicadr[APIC_ERROR_STATUS] = 0; 1565 error1 = apicadr[APIC_ERROR_STATUS]; 1566 error = error0 | error1; 1567 1568 /* 1569 * Clear the APIC error status (do this on all cpus that enter here) 1570 * (two writes are required due to the semantics of accessing the 1571 * error status register.) 1572 */ 1573 apicadr[APIC_ERROR_STATUS] = 0; 1574 apicadr[APIC_ERROR_STATUS] = 0; 1575 1576 /* 1577 * Prevent more than 1 CPU from handling error interrupt causing 1578 * double printing (interleave of characters from multiple 1579 * CPU's when using prom_printf) 1580 */ 1581 if (lock_try(&apic_error_lock) == 0) 1582 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1583 if (error) { 1584 #if DEBUG 1585 if (apic_debug) 1586 debug_enter("pcplusmp: APIC Error interrupt received"); 1587 #endif /* DEBUG */ 1588 if (apic_panic_on_apic_error) 1589 cmn_err(CE_PANIC, 1590 "APIC Error interrupt on CPU %d. Status = %x\n", 1591 psm_get_cpu_id(), error); 1592 else { 1593 if ((error & ~APIC_CS_ERRORS) == 0) { 1594 /* cksum error only */ 1595 apic_error |= APIC_ERR_APIC_ERROR; 1596 apic_apic_error |= error; 1597 apic_num_apic_errors++; 1598 apic_num_cksum_errors++; 1599 } else { 1600 /* 1601 * prom_printf is the best shot we have of 1602 * something which is problem free from 1603 * high level/NMI type of interrupts 1604 */ 1605 prom_printf("APIC Error interrupt on CPU %d. " 1606 "Status 0 = %x, Status 1 = %x\n", 1607 psm_get_cpu_id(), error0, error1); 1608 apic_error |= APIC_ERR_APIC_ERROR; 1609 apic_apic_error |= error; 1610 apic_num_apic_errors++; 1611 for (i = 0; i < apic_error_display_delay; i++) { 1612 tenmicrosec(); 1613 } 1614 /* 1615 * provide more delay next time limited to 1616 * roughly 1 clock tick time 1617 */ 1618 if (apic_error_display_delay < 500) 1619 apic_error_display_delay *= 2; 1620 } 1621 } 1622 lock_clear(&apic_error_lock); 1623 return (DDI_INTR_CLAIMED); 1624 } else { 1625 lock_clear(&apic_error_lock); 1626 return (DDI_INTR_UNCLAIMED); 1627 } 1628 /* NOTREACHED */ 1629 } 1630 1631 /* 1632 * Turn off the mask bit in the performance counter Local Vector Table entry. 1633 */ 1634 static void 1635 apic_cpcovf_mask_clear(void) 1636 { 1637 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1638 } 1639 1640 static void 1641 apic_init_intr() 1642 { 1643 processorid_t cpun = psm_get_cpu_id(); 1644 1645 #if defined(__amd64) 1646 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1647 #else 1648 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1649 #endif 1650 1651 if (apic_flat_model) 1652 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1653 else 1654 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1655 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1656 1657 /* need to enable APIC before unmasking NMI */ 1658 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1659 1660 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1661 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1662 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1663 1664 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1665 return; 1666 1667 /* Enable performance counter overflow interrupt */ 1668 1669 if ((x86_feature & X86_MSR) != X86_MSR) 1670 apic_enable_cpcovf_intr = 0; 1671 if (apic_enable_cpcovf_intr) { 1672 if (apic_cpcovf_vect == 0) { 1673 int ipl = APIC_PCINT_IPL; 1674 int irq = apic_get_ipivect(ipl, -1); 1675 1676 ASSERT(irq != -1); 1677 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1678 ASSERT(apic_cpcovf_vect); 1679 (void) add_avintr(NULL, ipl, 1680 (avfunc)kcpc_hw_overflow_intr, 1681 "apic pcint", irq, NULL, NULL, NULL, NULL); 1682 kcpc_hw_overflow_intr_installed = 1; 1683 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1684 } 1685 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1686 } 1687 1688 /* Enable error interrupt */ 1689 1690 if (apic_enable_error_intr) { 1691 if (apic_errvect == 0) { 1692 int ipl = 0xf; /* get highest priority intr */ 1693 int irq = apic_get_ipivect(ipl, -1); 1694 1695 ASSERT(irq != -1); 1696 apic_errvect = apic_irq_table[irq]->airq_vector; 1697 ASSERT(apic_errvect); 1698 /* 1699 * Not PSMI compliant, but we are going to merge 1700 * with ON anyway 1701 */ 1702 (void) add_avintr((void *)NULL, ipl, 1703 (avfunc)apic_error_intr, "apic error intr", 1704 irq, NULL, NULL, NULL, NULL); 1705 } 1706 apicadr[APIC_ERR_VECT] = apic_errvect; 1707 apicadr[APIC_ERROR_STATUS] = 0; 1708 apicadr[APIC_ERROR_STATUS] = 0; 1709 } 1710 } 1711 1712 static void 1713 apic_disable_local_apic() 1714 { 1715 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1716 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1717 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1718 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1719 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1720 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1721 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1722 } 1723 1724 static void 1725 apic_picinit(void) 1726 { 1727 int i, j; 1728 uint_t isr; 1729 volatile int32_t *ioapic; 1730 apic_irq_t *irqptr; 1731 struct intrspec ispec; 1732 1733 /* 1734 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1735 * bit on without clearing it with EOI. Since softint 1736 * uses vector 0x20 to interrupt itself, so softint will 1737 * not work on this machine. In order to fix this problem 1738 * a check is made to verify all the isr bits are clear. 1739 * If not, EOIs are issued to clear the bits. 1740 */ 1741 for (i = 7; i >= 1; i--) { 1742 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1743 for (j = 0; ((j < 32) && (isr != 0)); j++) 1744 if (isr & (1 << j)) { 1745 apicadr[APIC_EOI_REG] = 0; 1746 isr &= ~(1 << j); 1747 apic_error |= APIC_ERR_BOOT_EOI; 1748 } 1749 } 1750 1751 /* set a flag so we know we have run apic_picinit() */ 1752 apic_flag = 1; 1753 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1754 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1755 LOCK_INIT_CLEAR(&apic_revector_lock); 1756 LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock); 1757 LOCK_INIT_CLEAR(&apic_error_lock); 1758 1759 picsetup(); /* initialise the 8259 */ 1760 1761 /* add nmi handler - least priority nmi handler */ 1762 LOCK_INIT_CLEAR(&apic_nmi_lock); 1763 1764 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1765 "pcplusmp NMI handler", (caddr_t)NULL)) 1766 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1767 1768 apic_init_intr(); 1769 1770 /* enable apic mode if imcr present */ 1771 if (apic_imcrp) { 1772 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1773 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1774 } 1775 1776 /* mask interrupt vectors */ 1777 for (j = 0; j < apic_io_max; j++) { 1778 int intin_max; 1779 ioapic = apicioadr[j]; 1780 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1781 /* Bits 23-16 define the maximum redirection entries */ 1782 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1783 for (i = 0; i < intin_max; i++) { 1784 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1785 ioapic[APIC_IO_DATA] = AV_MASK; 1786 } 1787 } 1788 1789 /* 1790 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1791 */ 1792 if (apic_sci_vect > 0) { 1793 /* 1794 * acpica has already done add_avintr(); we just 1795 * to finish the job by mimicing translate_irq() 1796 * 1797 * Fake up an intrspec and setup the tables 1798 */ 1799 ispec.intrspec_vec = apic_sci_vect; 1800 ispec.intrspec_pri = SCI_IPL; 1801 1802 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1803 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1804 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1805 return; 1806 } 1807 irqptr = apic_irq_table[apic_sci_vect]; 1808 1809 /* Program I/O APIC */ 1810 (void) apic_setup_io_intr(irqptr, apic_sci_vect); 1811 1812 irqptr->airq_share++; 1813 } 1814 } 1815 1816 1817 static void 1818 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1819 { 1820 int loop_count; 1821 uint32_t vector; 1822 uint_t cpu_id, iflag; 1823 1824 cpu_id = apic_cpus[cpun].aci_local_id; 1825 1826 apic_cmos_ssb_set = 1; 1827 1828 /* 1829 * Interrupts on BSP cpu will be disabled during these startup 1830 * steps in order to avoid unwanted side effects from 1831 * executing interrupt handlers on a problematic BIOS. 1832 */ 1833 1834 iflag = intr_clear(); 1835 outb(CMOS_ADDR, SSB); 1836 outb(CMOS_DATA, BIOS_SHUTDOWN); 1837 1838 while (get_apic_cmd1() & AV_PENDING) 1839 apic_ret(); 1840 1841 /* for integrated - make sure there is one INIT IPI in buffer */ 1842 /* for external - it will wake up the cpu */ 1843 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1844 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1845 1846 /* If only 1 CPU is installed, PENDING bit will not go low */ 1847 for (loop_count = 0x1000; loop_count; loop_count--) 1848 if (get_apic_cmd1() & AV_PENDING) 1849 apic_ret(); 1850 else 1851 break; 1852 1853 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1854 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1855 1856 drv_usecwait(20000); /* 20 milli sec */ 1857 1858 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1859 /* integrated apic */ 1860 1861 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1862 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1863 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1864 1865 /* to offset the INIT IPI queue up in the buffer */ 1866 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1867 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1868 1869 drv_usecwait(200); /* 20 micro sec */ 1870 1871 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1872 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1873 1874 drv_usecwait(200); /* 20 micro sec */ 1875 } 1876 intr_restore(iflag); 1877 } 1878 1879 1880 #ifdef DEBUG 1881 int apic_break_on_cpu = 9; 1882 int apic_stretch_interrupts = 0; 1883 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1884 1885 void 1886 apic_break() 1887 { 1888 } 1889 #endif /* DEBUG */ 1890 1891 /* 1892 * platform_intr_enter 1893 * 1894 * Called at the beginning of the interrupt service routine to 1895 * mask all level equal to and below the interrupt priority 1896 * of the interrupting vector. An EOI should be given to 1897 * the interrupt controller to enable other HW interrupts. 1898 * 1899 * Return -1 for spurious interrupts 1900 * 1901 */ 1902 /*ARGSUSED*/ 1903 static int 1904 apic_intr_enter(int ipl, int *vectorp) 1905 { 1906 uchar_t vector; 1907 int nipl; 1908 int irq, iflag; 1909 apic_cpus_info_t *cpu_infop; 1910 1911 /* 1912 * The real vector programmed in APIC is *vectorp + 0x20 1913 * But, cmnint code subtracts 0x20 before pushing it. 1914 * Hence APIC_BASE_VECT is 0x20. 1915 */ 1916 1917 vector = (uchar_t)*vectorp; 1918 1919 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1920 if (vector == apic_clkvect) { 1921 if (!apic_oneshot) { 1922 /* NOTE: this is not MT aware */ 1923 apic_hrtime_stamp++; 1924 apic_nsec_since_boot += apic_nsec_per_intr; 1925 apic_hrtime_stamp++; 1926 last_count_read = apic_hertz_count; 1927 apic_redistribute_compute(); 1928 } 1929 1930 /* We will avoid all the book keeping overhead for clock */ 1931 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1932 #if defined(__amd64) 1933 setcr8((ulong_t)apic_cr8pri[nipl]); 1934 #else 1935 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1936 #endif 1937 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1938 apicadr[APIC_EOI_REG] = 0; 1939 return (nipl); 1940 } 1941 1942 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1943 1944 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1945 cpu_infop->aci_spur_cnt++; 1946 return (APIC_INT_SPURIOUS); 1947 } 1948 1949 /* Check if the vector we got is really what we need */ 1950 if (apic_revector_pending) { 1951 /* 1952 * Disable interrupts for the duration of 1953 * the vector translation to prevent a self-race for 1954 * the apic_revector_lock. This cannot be done 1955 * in apic_xlate_vector because it is recursive and 1956 * we want the vector translation to be atomic with 1957 * respect to other (higher-priority) interrupts. 1958 */ 1959 iflag = intr_clear(); 1960 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1961 APIC_BASE_VECT; 1962 intr_restore(iflag); 1963 } 1964 1965 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1966 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1967 1968 #if defined(__amd64) 1969 setcr8((ulong_t)apic_cr8pri[nipl]); 1970 #else 1971 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1972 #endif 1973 1974 cpu_infop->aci_current[nipl] = (uchar_t)irq; 1975 cpu_infop->aci_curipl = (uchar_t)nipl; 1976 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 1977 1978 /* 1979 * apic_level_intr could have been assimilated into the irq struct. 1980 * but, having it as a character array is more efficient in terms of 1981 * cache usage. So, we leave it as is. 1982 */ 1983 if (!apic_level_intr[irq]) 1984 apicadr[APIC_EOI_REG] = 0; 1985 1986 #ifdef DEBUG 1987 APIC_DEBUG_BUF_PUT(vector); 1988 APIC_DEBUG_BUF_PUT(irq); 1989 APIC_DEBUG_BUF_PUT(nipl); 1990 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 1991 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 1992 drv_usecwait(apic_stretch_interrupts); 1993 1994 if (apic_break_on_cpu == psm_get_cpu_id()) 1995 apic_break(); 1996 #endif /* DEBUG */ 1997 return (nipl); 1998 } 1999 2000 static void 2001 apic_intr_exit(int prev_ipl, int irq) 2002 { 2003 apic_cpus_info_t *cpu_infop; 2004 2005 #if defined(__amd64) 2006 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 2007 #else 2008 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 2009 #endif 2010 2011 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 2012 if (apic_level_intr[irq]) 2013 apicadr[APIC_EOI_REG] = 0; 2014 2015 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 2016 /* ISR above current pri could not be in progress */ 2017 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 2018 } 2019 2020 /* 2021 * Mask all interrupts below or equal to the given IPL 2022 */ 2023 static void 2024 apic_setspl(int ipl) 2025 { 2026 2027 #if defined(__amd64) 2028 setcr8((ulong_t)apic_cr8pri[ipl]); 2029 #else 2030 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 2031 #endif 2032 2033 /* interrupts at ipl above this cannot be in progress */ 2034 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2035 /* 2036 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2037 * have enough time to come in before the priority is raised again 2038 * during the idle() loop. 2039 */ 2040 if (apic_setspl_delay) 2041 (void) get_apic_pri(); 2042 } 2043 2044 /* 2045 * trigger a software interrupt at the given IPL 2046 */ 2047 static void 2048 apic_set_softintr(int ipl) 2049 { 2050 int vector; 2051 uint_t flag; 2052 2053 vector = apic_resv_vector[ipl]; 2054 2055 flag = intr_clear(); 2056 2057 while (get_apic_cmd1() & AV_PENDING) 2058 apic_ret(); 2059 2060 /* generate interrupt at vector on itself only */ 2061 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2062 2063 intr_restore(flag); 2064 } 2065 2066 /* 2067 * generates an interprocessor interrupt to another CPU 2068 */ 2069 static void 2070 apic_send_ipi(int cpun, int ipl) 2071 { 2072 int vector; 2073 uint_t flag; 2074 2075 vector = apic_resv_vector[ipl]; 2076 2077 flag = intr_clear(); 2078 2079 while (get_apic_cmd1() & AV_PENDING) 2080 apic_ret(); 2081 2082 apicadr[APIC_INT_CMD2] = 2083 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2084 apicadr[APIC_INT_CMD1] = vector; 2085 2086 intr_restore(flag); 2087 } 2088 2089 2090 /*ARGSUSED*/ 2091 static void 2092 apic_set_idlecpu(processorid_t cpun) 2093 { 2094 } 2095 2096 /*ARGSUSED*/ 2097 static void 2098 apic_unset_idlecpu(processorid_t cpun) 2099 { 2100 } 2101 2102 2103 static void 2104 apic_ret() 2105 { 2106 } 2107 2108 static int 2109 get_apic_cmd1() 2110 { 2111 return (apicadr[APIC_INT_CMD1]); 2112 } 2113 2114 static int 2115 get_apic_pri() 2116 { 2117 #if defined(__amd64) 2118 return ((int)getcr8()); 2119 #else 2120 return (apicadr[APIC_TASK_REG]); 2121 #endif 2122 } 2123 2124 /* 2125 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2126 * apic_gethrtime(). This is used for performance instead of accuracy. 2127 */ 2128 2129 static hrtime_t 2130 apic_gettime() 2131 { 2132 int old_hrtime_stamp; 2133 hrtime_t temp; 2134 2135 /* 2136 * In one-shot mode, we do not keep time, so if anyone 2137 * calls psm_gettime() directly, we vector over to 2138 * gethrtime(). 2139 * one-shot mode MUST NOT be enabled if this psm is the source of 2140 * hrtime. 2141 */ 2142 2143 if (apic_oneshot) 2144 return (gethrtime()); 2145 2146 2147 gettime_again: 2148 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2149 apic_ret(); 2150 2151 temp = apic_nsec_since_boot; 2152 2153 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2154 goto gettime_again; 2155 } 2156 return (temp); 2157 } 2158 2159 /* 2160 * Here we return the number of nanoseconds since booting. Note every 2161 * clock interrupt increments apic_nsec_since_boot by the appropriate 2162 * amount. 2163 */ 2164 static hrtime_t 2165 apic_gethrtime() 2166 { 2167 int curr_timeval, countval, elapsed_ticks, oflags; 2168 int old_hrtime_stamp, status; 2169 hrtime_t temp; 2170 uchar_t cpun; 2171 2172 2173 /* 2174 * In one-shot mode, we do not keep time, so if anyone 2175 * calls psm_gethrtime() directly, we vector over to 2176 * gethrtime(). 2177 * one-shot mode MUST NOT be enabled if this psm is the source of 2178 * hrtime. 2179 */ 2180 2181 if (apic_oneshot) 2182 return (gethrtime()); 2183 2184 oflags = intr_clear(); /* prevent migration */ 2185 2186 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2187 2188 lock_set(&apic_gethrtime_lock); 2189 2190 gethrtime_again: 2191 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2192 apic_ret(); 2193 2194 /* 2195 * Check to see which CPU we are on. Note the time is kept on 2196 * the local APIC of CPU 0. If on CPU 0, simply read the current 2197 * counter. If on another CPU, issue a remote read command to CPU 0. 2198 */ 2199 if (cpun == apic_cpus[0].aci_local_id) { 2200 countval = apicadr[APIC_CURR_COUNT]; 2201 } else { 2202 while (get_apic_cmd1() & AV_PENDING) 2203 apic_ret(); 2204 2205 apicadr[APIC_INT_CMD2] = 2206 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2207 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2208 2209 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2210 apic_ret(); 2211 2212 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2213 countval = apicadr[APIC_REMOTE_READ]; 2214 else { /* 0 = invalid */ 2215 apic_remote_hrterr++; 2216 /* 2217 * return last hrtime right now, will need more 2218 * testing if change to retry 2219 */ 2220 temp = apic_last_hrtime; 2221 2222 lock_clear(&apic_gethrtime_lock); 2223 2224 intr_restore(oflags); 2225 2226 return (temp); 2227 } 2228 } 2229 if (countval > last_count_read) 2230 countval = 0; 2231 else 2232 last_count_read = countval; 2233 2234 elapsed_ticks = apic_hertz_count - countval; 2235 2236 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks); 2237 temp = apic_nsec_since_boot + curr_timeval; 2238 2239 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2240 /* we might have clobbered last_count_read. Restore it */ 2241 last_count_read = apic_hertz_count; 2242 goto gethrtime_again; 2243 } 2244 2245 if (temp < apic_last_hrtime) { 2246 /* return last hrtime if error occurs */ 2247 apic_hrtime_error++; 2248 temp = apic_last_hrtime; 2249 } 2250 else 2251 apic_last_hrtime = temp; 2252 2253 lock_clear(&apic_gethrtime_lock); 2254 intr_restore(oflags); 2255 2256 return (temp); 2257 } 2258 2259 /* apic NMI handler */ 2260 /*ARGSUSED*/ 2261 static void 2262 apic_nmi_intr(caddr_t arg) 2263 { 2264 if (apic_shutdown_processors) { 2265 apic_disable_local_apic(); 2266 return; 2267 } 2268 2269 if (lock_try(&apic_nmi_lock)) { 2270 if (apic_kmdb_on_nmi) { 2271 if (psm_debugger() == 0) { 2272 cmn_err(CE_PANIC, 2273 "NMI detected, kmdb is not available."); 2274 } else { 2275 debug_enter("\nNMI detected, entering kmdb.\n"); 2276 } 2277 } else { 2278 if (apic_panic_on_nmi) { 2279 /* Keep panic from entering kmdb. */ 2280 nopanicdebug = 1; 2281 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2282 } else { 2283 /* 2284 * prom_printf is the best shot we have 2285 * of something which is problem free from 2286 * high level/NMI type of interrupts 2287 */ 2288 prom_printf("pcplusmp: NMI received\n"); 2289 apic_error |= APIC_ERR_NMI; 2290 apic_num_nmis++; 2291 } 2292 } 2293 lock_clear(&apic_nmi_lock); 2294 } 2295 } 2296 2297 /* 2298 * Add mask bits to disable interrupt vector from happening 2299 * at or above IPL. In addition, it should remove mask bits 2300 * to enable interrupt vectors below the given IPL. 2301 * 2302 * Both add and delspl are complicated by the fact that different interrupts 2303 * may share IRQs. This can happen in two ways. 2304 * 1. The same H/W line is shared by more than 1 device 2305 * 1a. with interrupts at different IPLs 2306 * 1b. with interrupts at same IPL 2307 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2308 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2309 * will get called often when no interrupt is pending for the device. 2310 * For 1a, we just hope that the machine blows up with the person who 2311 * set it up that way!. In the meantime, we handle it at the higher IPL. 2312 */ 2313 /*ARGSUSED*/ 2314 static int 2315 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2316 { 2317 uchar_t vector; 2318 int iflag; 2319 apic_irq_t *irqptr, *irqheadptr; 2320 int irqindex; 2321 2322 ASSERT(max_ipl <= UCHAR_MAX); 2323 irqindex = IRQINDEX(irqno); 2324 2325 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2326 return (PSM_FAILURE); 2327 2328 irqptr = irqheadptr = apic_irq_table[irqindex]; 2329 2330 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2331 "vector=0x%x\n", (void *)irqptr->airq_dip, 2332 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2333 2334 while (irqptr) { 2335 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2336 break; 2337 irqptr = irqptr->airq_next; 2338 } 2339 irqptr->airq_share++; 2340 2341 /* return if it is not hardware interrupt */ 2342 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2343 return (PSM_SUCCESS); 2344 2345 /* Or if there are more interupts at a higher IPL */ 2346 if (ipl != max_ipl) 2347 return (PSM_SUCCESS); 2348 2349 /* 2350 * if apic_picinit() has not been called yet, just return. 2351 * At the end of apic_picinit(), we will call setup_io_intr(). 2352 */ 2353 2354 if (!apic_flag) 2355 return (PSM_SUCCESS); 2356 2357 iflag = intr_clear(); 2358 2359 /* 2360 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2361 * return failure. Not very elegant, but then we hope the 2362 * machine will blow up with ... 2363 */ 2364 if (irqptr->airq_ipl != max_ipl) { 2365 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2366 if (vector == 0) { 2367 intr_restore(iflag); 2368 irqptr->airq_share--; 2369 return (PSM_FAILURE); 2370 } 2371 irqptr = irqheadptr; 2372 apic_mark_vector(irqptr->airq_vector, vector); 2373 while (irqptr) { 2374 irqptr->airq_vector = vector; 2375 irqptr->airq_ipl = (uchar_t)max_ipl; 2376 /* 2377 * reprogram irq being added and every one else 2378 * who is not in the UNINIT state 2379 */ 2380 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2381 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2382 apic_record_rdt_entry(irqptr, irqindex); 2383 (void) apic_setup_io_intr(irqptr, irqindex); 2384 } 2385 irqptr = irqptr->airq_next; 2386 } 2387 intr_restore(iflag); 2388 return (PSM_SUCCESS); 2389 } 2390 2391 ASSERT(irqptr); 2392 (void) apic_setup_io_intr(irqptr, irqindex); 2393 intr_restore(iflag); 2394 return (PSM_SUCCESS); 2395 } 2396 2397 /* 2398 * Recompute mask bits for the given interrupt vector. 2399 * If there is no interrupt servicing routine for this 2400 * vector, this function should disable interrupt vector 2401 * from happening at all IPLs. If there are still 2402 * handlers using the given vector, this function should 2403 * disable the given vector from happening below the lowest 2404 * IPL of the remaining hadlers. 2405 */ 2406 /*ARGSUSED*/ 2407 static int 2408 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2409 { 2410 uchar_t vector, bind_cpu; 2411 int iflag, intin, irqindex; 2412 volatile int32_t *ioapic; 2413 apic_irq_t *irqptr, *irqheadptr; 2414 2415 irqindex = IRQINDEX(irqno); 2416 irqptr = irqheadptr = apic_irq_table[irqindex]; 2417 2418 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2419 "vector=0x%x\n", (void *)irqptr->airq_dip, 2420 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2421 2422 while (irqptr) { 2423 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2424 break; 2425 irqptr = irqptr->airq_next; 2426 } 2427 ASSERT(irqptr); 2428 2429 irqptr->airq_share--; 2430 2431 if (ipl < max_ipl) 2432 return (PSM_SUCCESS); 2433 2434 /* return if it is not hardware interrupt */ 2435 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2436 return (PSM_SUCCESS); 2437 2438 if (!apic_flag) { 2439 /* 2440 * Clear irq_struct. If two devices shared an intpt 2441 * line & 1 unloaded before picinit, we are hosed. But, then 2442 * we hope the machine will ... 2443 */ 2444 irqptr->airq_mps_intr_index = FREE_INDEX; 2445 irqptr->airq_temp_cpu = IRQ_UNINIT; 2446 apic_free_vector(irqptr->airq_vector); 2447 return (PSM_SUCCESS); 2448 } 2449 /* 2450 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2451 * use old IPL. Not very elegant, but then we hope ... 2452 */ 2453 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2454 apic_irq_t *irqp; 2455 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2456 apic_mark_vector(irqheadptr->airq_vector, vector); 2457 irqp = irqheadptr; 2458 while (irqp) { 2459 irqp->airq_vector = vector; 2460 irqp->airq_ipl = (uchar_t)max_ipl; 2461 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2462 apic_record_rdt_entry(irqp, irqindex); 2463 (void) apic_setup_io_intr(irqp, 2464 irqindex); 2465 } 2466 irqp = irqp->airq_next; 2467 } 2468 } 2469 } 2470 2471 if (irqptr->airq_share) 2472 return (PSM_SUCCESS); 2473 2474 iflag = intr_clear(); 2475 lock_set(&apic_ioapic_lock); 2476 2477 /* Disable the MSI/X vector */ 2478 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2479 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2480 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2481 2482 /* 2483 * Make sure we only disable on the last 2484 * of the multi-MSI support 2485 */ 2486 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2487 (void) apic_pci_msi_unconfigure(irqptr->airq_dip, 2488 type, irqptr->airq_ioapicindex); 2489 (void) apic_pci_msi_disable_mode(irqptr->airq_dip, 2490 type, irqptr->airq_ioapicindex); 2491 } 2492 } else { 2493 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2494 intin = irqptr->airq_intin_no; 2495 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2496 ioapic[APIC_IO_DATA] = AV_MASK; 2497 } 2498 2499 if (max_ipl == PSM_INVALID_IPL) { 2500 ASSERT(irqheadptr == irqptr); 2501 bind_cpu = irqptr->airq_temp_cpu; 2502 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2503 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2504 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2505 if (bind_cpu & IRQ_USER_BOUND) { 2506 /* If hardbound, temp_cpu == cpu */ 2507 bind_cpu &= ~IRQ_USER_BOUND; 2508 apic_cpus[bind_cpu].aci_bound--; 2509 } else 2510 apic_cpus[bind_cpu].aci_temp_bound--; 2511 } 2512 lock_clear(&apic_ioapic_lock); 2513 intr_restore(iflag); 2514 irqptr->airq_temp_cpu = IRQ_UNINIT; 2515 irqptr->airq_mps_intr_index = FREE_INDEX; 2516 apic_free_vector(irqptr->airq_vector); 2517 return (PSM_SUCCESS); 2518 } 2519 lock_clear(&apic_ioapic_lock); 2520 intr_restore(iflag); 2521 2522 mutex_enter(&airq_mutex); 2523 if ((irqptr == apic_irq_table[irqindex])) { 2524 apic_irq_t *oldirqptr; 2525 /* Move valid irq entry to the head */ 2526 irqheadptr = oldirqptr = irqptr; 2527 irqptr = irqptr->airq_next; 2528 ASSERT(irqptr); 2529 while (irqptr) { 2530 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2531 break; 2532 oldirqptr = irqptr; 2533 irqptr = irqptr->airq_next; 2534 } 2535 /* remove all invalid ones from the beginning */ 2536 apic_irq_table[irqindex] = irqptr; 2537 /* 2538 * and link them back after the head. The invalid ones 2539 * begin with irqheadptr and end at oldirqptr 2540 */ 2541 oldirqptr->airq_next = irqptr->airq_next; 2542 irqptr->airq_next = irqheadptr; 2543 } 2544 mutex_exit(&airq_mutex); 2545 2546 irqptr->airq_temp_cpu = IRQ_UNINIT; 2547 irqptr->airq_mps_intr_index = FREE_INDEX; 2548 return (PSM_SUCCESS); 2549 } 2550 2551 /* 2552 * Return HW interrupt number corresponding to the given IPL 2553 */ 2554 /*ARGSUSED*/ 2555 static int 2556 apic_softlvl_to_irq(int ipl) 2557 { 2558 /* 2559 * Do not use apic to trigger soft interrupt. 2560 * It will cause the system to hang when 2 hardware interrupts 2561 * at the same priority with the softint are already accepted 2562 * by the apic. Cause the AV_PENDING bit will not be cleared 2563 * until one of the hardware interrupt is eoi'ed. If we need 2564 * to send an ipi at this time, we will end up looping forever 2565 * to wait for the AV_PENDING bit to clear. 2566 */ 2567 return (PSM_SV_SOFTWARE); 2568 } 2569 2570 static int 2571 apic_post_cpu_start() 2572 { 2573 int i, cpun; 2574 apic_irq_t *irq_ptr; 2575 2576 apic_init_intr(); 2577 2578 /* 2579 * since some systems don't enable the internal cache on the non-boot 2580 * cpus, so we have to enable them here 2581 */ 2582 setcr0(getcr0() & ~(0x60000000)); 2583 2584 while (get_apic_cmd1() & AV_PENDING) 2585 apic_ret(); 2586 2587 cpun = psm_get_cpu_id(); 2588 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2589 2590 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2591 irq_ptr = apic_irq_table[i]; 2592 if ((irq_ptr == NULL) || 2593 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2594 continue; 2595 2596 while (irq_ptr) { 2597 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) 2598 (void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE); 2599 irq_ptr = irq_ptr->airq_next; 2600 } 2601 } 2602 2603 apicadr[APIC_DIVIDE_REG] = apic_divide_reg_init; 2604 return (PSM_SUCCESS); 2605 } 2606 2607 processorid_t 2608 apic_get_next_processorid(processorid_t cpu_id) 2609 { 2610 2611 int i; 2612 2613 if (cpu_id == -1) 2614 return ((processorid_t)0); 2615 2616 for (i = cpu_id + 1; i < NCPU; i++) { 2617 if (CPU_IN_SET(apic_cpumask, i)) 2618 return (i); 2619 } 2620 2621 return ((processorid_t)-1); 2622 } 2623 2624 2625 /* 2626 * type == -1 indicates it is an internal request. Do not change 2627 * resv_vector for these requests 2628 */ 2629 static int 2630 apic_get_ipivect(int ipl, int type) 2631 { 2632 uchar_t vector; 2633 int irq; 2634 2635 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2636 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2637 apic_irq_table[irq]->airq_mps_intr_index = 2638 RESERVE_INDEX; 2639 apic_irq_table[irq]->airq_vector = vector; 2640 if (type != -1) { 2641 apic_resv_vector[ipl] = vector; 2642 } 2643 return (irq); 2644 } 2645 } 2646 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2647 return (-1); /* shouldn't happen */ 2648 } 2649 2650 static int 2651 apic_getclkirq(int ipl) 2652 { 2653 int irq; 2654 2655 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2656 return (-1); 2657 /* 2658 * Note the vector in apic_clkvect for per clock handling. 2659 */ 2660 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2661 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2662 apic_clkvect)); 2663 return (irq); 2664 } 2665 2666 2667 /* 2668 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2669 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2670 */ 2671 static uint_t 2672 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2673 { 2674 uint8_t pit_tick_lo; 2675 uint16_t pit_tick, target_pit_tick; 2676 uint32_t start_apic_tick, end_apic_tick; 2677 int iflag; 2678 2679 addr += APIC_CURR_COUNT; 2680 2681 iflag = intr_clear(); 2682 2683 do { 2684 pit_tick_lo = inb(PITCTR0_PORT); 2685 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2686 } while (pit_tick < APIC_TIME_MIN || 2687 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2688 2689 /* 2690 * Wait for the 8254 to decrement by 5 ticks to ensure 2691 * we didn't start in the middle of a tick. 2692 * Compare with 0x10 for the wrap around case. 2693 */ 2694 target_pit_tick = pit_tick - 5; 2695 do { 2696 pit_tick_lo = inb(PITCTR0_PORT); 2697 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2698 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2699 2700 start_apic_tick = *addr; 2701 2702 /* 2703 * Wait for the 8254 to decrement by 2704 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2705 */ 2706 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2707 do { 2708 pit_tick_lo = inb(PITCTR0_PORT); 2709 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2710 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2711 2712 end_apic_tick = *addr; 2713 2714 *pit_ticks_adj = target_pit_tick - pit_tick; 2715 2716 intr_restore(iflag); 2717 2718 return (start_apic_tick - end_apic_tick); 2719 } 2720 2721 /* 2722 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2723 * frequency. Note at this stage in the boot sequence, the boot processor 2724 * is the only active processor. 2725 * hertz value of 0 indicates a one-shot mode request. In this case 2726 * the function returns the resolution (in nanoseconds) for the hardware 2727 * timer interrupt. If one-shot mode capability is not available, 2728 * the return value will be 0. apic_enable_oneshot is a global switch 2729 * for disabling the functionality. 2730 * A non-zero positive value for hertz indicates a periodic mode request. 2731 * In this case the hardware will be programmed to generate clock interrupts 2732 * at hertz frequency and returns the resolution of interrupts in 2733 * nanosecond. 2734 */ 2735 2736 static int 2737 apic_clkinit(int hertz) 2738 { 2739 2740 uint_t apic_ticks = 0; 2741 uint_t pit_ticks; 2742 int ret; 2743 uint16_t pit_ticks_adj; 2744 static int firsttime = 1; 2745 2746 if (firsttime) { 2747 /* first time calibrate on CPU0 only */ 2748 2749 apicadr[APIC_DIVIDE_REG] = apic_divide_reg_init; 2750 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; /* start counting */ 2751 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2752 2753 /* total number of PIT ticks corresponding to apic_ticks */ 2754 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj; 2755 2756 /* 2757 * Determine the number of nanoseconds per APIC clock tick 2758 * and then determine how many APIC ticks to interrupt at the 2759 * desired frequency 2760 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s 2761 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s 2762 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9) 2763 * apic_ticks_per_SFns = 2764 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9) 2765 */ 2766 apic_ticks_per_SFnsecs = 2767 ((SF * apic_ticks * PIT_HZ) / 2768 ((uint64_t)pit_ticks * NANOSEC)); 2769 2770 /* the interval timer initial count is 32 bit max */ 2771 apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL); 2772 firsttime = 0; 2773 } 2774 2775 if (hertz != 0) { 2776 /* periodic */ 2777 apic_nsec_per_intr = NANOSEC / hertz; 2778 apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr); 2779 } 2780 2781 apic_int_busy_mark = (apic_int_busy_mark * 2782 apic_sample_factor_redistribution) / 100; 2783 apic_int_free_mark = (apic_int_free_mark * 2784 apic_sample_factor_redistribution) / 100; 2785 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2786 apic_sample_factor_redistribution) / 100; 2787 2788 if (hertz == 0) { 2789 /* requested one_shot */ 2790 if (!apic_oneshot_enable) 2791 return (0); 2792 apic_oneshot = 1; 2793 ret = (int)APIC_TICKS_TO_NSECS(1); 2794 } else { 2795 /* program the local APIC to interrupt at the given frequency */ 2796 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2797 apicadr[APIC_LOCAL_TIMER] = 2798 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2799 apic_oneshot = 0; 2800 ret = NANOSEC / hertz; 2801 } 2802 2803 return (ret); 2804 2805 } 2806 2807 /* 2808 * apic_preshutdown: 2809 * Called early in shutdown whilst we can still access filesystems to do 2810 * things like loading modules which will be required to complete shutdown 2811 * after filesystems are all unmounted. 2812 */ 2813 static void 2814 apic_preshutdown(int cmd, int fcn) 2815 { 2816 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2817 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2818 2819 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2820 return; 2821 } 2822 } 2823 2824 static void 2825 apic_shutdown(int cmd, int fcn) 2826 { 2827 int iflag, restarts, attempts; 2828 int i, j; 2829 volatile int32_t *ioapic; 2830 uchar_t byte; 2831 2832 /* Send NMI to all CPUs except self to do per processor shutdown */ 2833 iflag = intr_clear(); 2834 while (get_apic_cmd1() & AV_PENDING) 2835 apic_ret(); 2836 apic_shutdown_processors = 1; 2837 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2838 2839 /* restore cmos shutdown byte before reboot */ 2840 if (apic_cmos_ssb_set) { 2841 outb(CMOS_ADDR, SSB); 2842 outb(CMOS_DATA, 0); 2843 } 2844 /* Disable the I/O APIC redirection entries */ 2845 for (j = 0; j < apic_io_max; j++) { 2846 int intin_max; 2847 ioapic = apicioadr[j]; 2848 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2849 /* Bits 23-16 define the maximum redirection entries */ 2850 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2851 for (i = 0; i < intin_max; i++) { 2852 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2853 ioapic[APIC_IO_DATA] = AV_MASK; 2854 } 2855 } 2856 2857 /* disable apic mode if imcr present */ 2858 if (apic_imcrp) { 2859 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2860 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2861 } 2862 2863 apic_disable_local_apic(); 2864 2865 intr_restore(iflag); 2866 2867 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2868 return; 2869 } 2870 2871 switch (apic_poweroff_method) { 2872 case APIC_POWEROFF_VIA_RTC: 2873 2874 /* select the extended NVRAM bank in the RTC */ 2875 outb(CMOS_ADDR, RTC_REGA); 2876 byte = inb(CMOS_DATA); 2877 outb(CMOS_DATA, (byte | EXT_BANK)); 2878 2879 outb(CMOS_ADDR, PFR_REG); 2880 2881 /* for Predator must toggle the PAB bit */ 2882 byte = inb(CMOS_DATA); 2883 2884 /* 2885 * clear power active bar, wakeup alarm and 2886 * kickstart 2887 */ 2888 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2889 outb(CMOS_DATA, byte); 2890 2891 /* delay before next write */ 2892 drv_usecwait(1000); 2893 2894 /* for S40 the following would suffice */ 2895 byte = inb(CMOS_DATA); 2896 2897 /* power active bar control bit */ 2898 byte |= PAB_CBIT; 2899 outb(CMOS_DATA, byte); 2900 2901 break; 2902 2903 case APIC_POWEROFF_VIA_ASPEN_BMC: 2904 restarts = 0; 2905 restart_aspen_bmc: 2906 if (++restarts == 3) 2907 break; 2908 attempts = 0; 2909 do { 2910 byte = inb(MISMIC_FLAG_REGISTER); 2911 byte &= MISMIC_BUSY_MASK; 2912 if (byte != 0) { 2913 drv_usecwait(1000); 2914 if (attempts >= 3) 2915 goto restart_aspen_bmc; 2916 ++attempts; 2917 } 2918 } while (byte != 0); 2919 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2920 byte = inb(MISMIC_FLAG_REGISTER); 2921 byte |= 0x1; 2922 outb(MISMIC_FLAG_REGISTER, byte); 2923 i = 0; 2924 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2925 i++) { 2926 attempts = 0; 2927 do { 2928 byte = inb(MISMIC_FLAG_REGISTER); 2929 byte &= MISMIC_BUSY_MASK; 2930 if (byte != 0) { 2931 drv_usecwait(1000); 2932 if (attempts >= 3) 2933 goto restart_aspen_bmc; 2934 ++attempts; 2935 } 2936 } while (byte != 0); 2937 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 2938 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 2939 byte = inb(MISMIC_FLAG_REGISTER); 2940 byte |= 0x1; 2941 outb(MISMIC_FLAG_REGISTER, byte); 2942 } 2943 break; 2944 2945 case APIC_POWEROFF_VIA_SITKA_BMC: 2946 restarts = 0; 2947 restart_sitka_bmc: 2948 if (++restarts == 3) 2949 break; 2950 attempts = 0; 2951 do { 2952 byte = inb(SMS_STATUS_REGISTER); 2953 byte &= SMS_STATE_MASK; 2954 if ((byte == SMS_READ_STATE) || 2955 (byte == SMS_WRITE_STATE)) { 2956 drv_usecwait(1000); 2957 if (attempts >= 3) 2958 goto restart_sitka_bmc; 2959 ++attempts; 2960 } 2961 } while ((byte == SMS_READ_STATE) || 2962 (byte == SMS_WRITE_STATE)); 2963 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 2964 i = 0; 2965 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 2966 i++) { 2967 attempts = 0; 2968 do { 2969 byte = inb(SMS_STATUS_REGISTER); 2970 byte &= SMS_IBF_MASK; 2971 if (byte != 0) { 2972 drv_usecwait(1000); 2973 if (attempts >= 3) 2974 goto restart_sitka_bmc; 2975 ++attempts; 2976 } 2977 } while (byte != 0); 2978 outb(sitka_bmc[i].port, sitka_bmc[i].data); 2979 } 2980 break; 2981 2982 case APIC_POWEROFF_NONE: 2983 2984 /* If no APIC direct method, we will try using ACPI */ 2985 if (apic_enable_acpi) { 2986 if (acpi_poweroff() == 1) 2987 return; 2988 } else 2989 return; 2990 2991 break; 2992 } 2993 /* 2994 * Wait a limited time here for power to go off. 2995 * If the power does not go off, then there was a 2996 * problem and we should continue to the halt which 2997 * prints a message for the user to press a key to 2998 * reboot. 2999 */ 3000 drv_usecwait(7000000); /* wait seven seconds */ 3001 3002 } 3003 3004 /* 3005 * Try and disable all interrupts. We just assign interrupts to other 3006 * processors based on policy. If any were bound by user request, we 3007 * let them continue and return failure. We do not bother to check 3008 * for cache affinity while rebinding. 3009 */ 3010 3011 static int 3012 apic_disable_intr(processorid_t cpun) 3013 { 3014 int bind_cpu = 0, i, hardbound = 0, iflag; 3015 apic_irq_t *irq_ptr; 3016 3017 iflag = intr_clear(); 3018 lock_set(&apic_ioapic_lock); 3019 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 3020 lock_clear(&apic_ioapic_lock); 3021 intr_restore(iflag); 3022 apic_cpus[cpun].aci_curipl = 0; 3023 i = apic_min_device_irq; 3024 for (; i <= apic_max_device_irq; i++) { 3025 /* 3026 * If there are bound interrupts on this cpu, then 3027 * rebind them to other processors. 3028 */ 3029 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3030 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3031 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3032 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3033 apic_nproc)); 3034 3035 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3036 hardbound = 1; 3037 continue; 3038 } 3039 3040 if (irq_ptr->airq_temp_cpu == cpun) { 3041 do { 3042 apic_next_bind_cpu += 2; 3043 bind_cpu = apic_next_bind_cpu / 2; 3044 if (bind_cpu >= apic_nproc) { 3045 apic_next_bind_cpu = 1; 3046 bind_cpu = 0; 3047 3048 } 3049 } while (apic_rebind_all(irq_ptr, bind_cpu, 1)); 3050 } 3051 } 3052 } 3053 if (hardbound) { 3054 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3055 "due to user bound interrupts", cpun); 3056 return (PSM_FAILURE); 3057 } 3058 else 3059 return (PSM_SUCCESS); 3060 } 3061 3062 static void 3063 apic_enable_intr(processorid_t cpun) 3064 { 3065 int i, iflag; 3066 apic_irq_t *irq_ptr; 3067 3068 iflag = intr_clear(); 3069 lock_set(&apic_ioapic_lock); 3070 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3071 lock_clear(&apic_ioapic_lock); 3072 intr_restore(iflag); 3073 3074 i = apic_min_device_irq; 3075 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3076 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3077 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3078 (void) apic_rebind_all(irq_ptr, 3079 irq_ptr->airq_cpu, 1); 3080 } 3081 } 3082 } 3083 } 3084 3085 /* 3086 * apic_introp_xlate() replaces apic_translate_irq() and is 3087 * called only from apic_intr_ops(). With the new ADII framework, 3088 * the priority can no longer be retrived through i_ddi_get_intrspec(). 3089 * It has to be passed in from the caller. 3090 */ 3091 int 3092 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3093 { 3094 char dev_type[16]; 3095 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3096 int irqno = ispec->intrspec_vec; 3097 ddi_acc_handle_t cfg_handle; 3098 uchar_t ipin; 3099 struct apic_io_intr *intrp; 3100 iflag_t intr_flag; 3101 APIC_HEADER *hp; 3102 MADT_INTERRUPT_OVERRIDE *isop; 3103 apic_irq_t *airqp; 3104 int parent_is_pci_or_pciex = 0; 3105 int child_is_pciex = 0; 3106 3107 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3108 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3109 irqno)); 3110 3111 dev_len = sizeof (dev_type); 3112 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 3113 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3114 &dev_len) == DDI_PROP_SUCCESS) { 3115 if ((strcmp(dev_type, "pci") == 0) || 3116 (strcmp(dev_type, "pciex") == 0)) 3117 parent_is_pci_or_pciex = 1; 3118 } 3119 3120 if (parent_is_pci_or_pciex && ddi_prop_get_int(DDI_DEV_T_ANY, dip, 3121 DDI_PROP_DONTPASS, "pcie-capid-pointer", PCI_CAP_NEXT_PTR_NULL) != 3122 PCI_CAP_NEXT_PTR_NULL) { 3123 child_is_pciex = 1; 3124 } 3125 3126 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3127 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) { 3128 airqp->airq_iflag.bustype = 3129 child_is_pciex ? BUS_PCIE : BUS_PCI; 3130 return (apic_vector_to_irq[airqp->airq_vector]); 3131 } 3132 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3133 NULL, type)); 3134 } 3135 3136 bustype = 0; 3137 3138 /* check if we have already translated this irq */ 3139 mutex_enter(&airq_mutex); 3140 newirq = apic_min_device_irq; 3141 for (; newirq <= apic_max_device_irq; newirq++) { 3142 airqp = apic_irq_table[newirq]; 3143 while (airqp) { 3144 if ((airqp->airq_dip == dip) && 3145 (airqp->airq_origirq == irqno) && 3146 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3147 3148 mutex_exit(&airq_mutex); 3149 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3150 } 3151 airqp = airqp->airq_next; 3152 } 3153 } 3154 mutex_exit(&airq_mutex); 3155 3156 if (apic_defconf) 3157 goto defconf; 3158 3159 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3160 goto nonpci; 3161 3162 if (parent_is_pci_or_pciex) { 3163 /* pci device */ 3164 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3165 goto nonpci; 3166 if (busid == 0 && apic_pci_bus_total == 1) 3167 busid = (int)apic_single_pci_busid; 3168 3169 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3170 goto nonpci; 3171 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3172 pci_config_teardown(&cfg_handle); 3173 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3174 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3175 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3176 goto nonpci; 3177 3178 intr_flag.bustype = child_is_pciex ? BUS_PCIE : BUS_PCI; 3179 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3180 ispec, &intr_flag, type)) == -1) 3181 goto nonpci; 3182 return (newirq); 3183 } else { 3184 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3185 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3186 == NULL) { 3187 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3188 devid, ipin, &intrp)) == -1) 3189 goto nonpci; 3190 } 3191 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3192 ispec, NULL, type)) == -1) 3193 goto nonpci; 3194 return (newirq); 3195 } 3196 } else if (strcmp(dev_type, "isa") == 0) 3197 bustype = BUS_ISA; 3198 else if (strcmp(dev_type, "eisa") == 0) 3199 bustype = BUS_EISA; 3200 3201 nonpci: 3202 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3203 /* search iso entries first */ 3204 if (acpi_iso_cnt != 0) { 3205 hp = (APIC_HEADER *)acpi_isop; 3206 i = 0; 3207 while (i < acpi_iso_cnt) { 3208 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3209 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3210 if (isop->Bus == 0 && 3211 isop->Source == irqno) { 3212 newirq = isop->Interrupt; 3213 intr_flag.intr_po = 3214 isop->Polarity; 3215 intr_flag.intr_el = 3216 isop->TriggerMode; 3217 intr_flag.bustype = BUS_ISA; 3218 3219 return (apic_setup_irq_table( 3220 dip, newirq, NULL, ispec, 3221 &intr_flag, type)); 3222 3223 } 3224 i++; 3225 } 3226 hp = (APIC_HEADER *)(((char *)hp) + 3227 hp->Length); 3228 } 3229 } 3230 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3231 intr_flag.intr_el = INTR_EL_EDGE; 3232 intr_flag.bustype = BUS_ISA; 3233 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3234 &intr_flag, type)); 3235 } else { 3236 if (bustype == 0) 3237 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3238 for (i = 0; i < 2; i++) { 3239 if (((busid = apic_find_bus_id(bustype)) != -1) && 3240 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3241 != NULL)) { 3242 if ((newirq = apic_setup_irq_table(dip, irqno, 3243 intrp, ispec, NULL, type)) != -1) { 3244 return (newirq); 3245 } 3246 goto defconf; 3247 } 3248 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3249 } 3250 } 3251 3252 /* MPS default configuration */ 3253 defconf: 3254 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3255 if (newirq == -1) 3256 return (newirq); 3257 ASSERT(IRQINDEX(newirq) == irqno); 3258 ASSERT(apic_irq_table[irqno]); 3259 return (newirq); 3260 } 3261 3262 3263 3264 3265 3266 3267 /* 3268 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3269 * needs special handling. We may need to chase up the device tree, 3270 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3271 * to find the IPIN at the root bus that relates to the IPIN on the 3272 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3273 * in the MP table or the ACPI namespace for this device itself. 3274 * We handle both cases in the search below. 3275 */ 3276 /* this is the non-acpi version */ 3277 static int 3278 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3279 struct apic_io_intr **intrp) 3280 { 3281 dev_info_t *dipp, *dip; 3282 int pci_irq; 3283 ddi_acc_handle_t cfg_handle; 3284 int bridge_devno, bridge_bus; 3285 int ipin; 3286 3287 dip = idip; 3288 3289 /*CONSTCOND*/ 3290 while (1) { 3291 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3292 return (-1); 3293 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3294 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3295 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3296 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3297 pci_config_teardown(&cfg_handle); 3298 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3299 NULL) != 0) 3300 return (-1); 3301 /* 3302 * This is the rotating scheme that Compaq is using 3303 * and documented in the pci to pci spec. Also, if 3304 * the pci to pci bridge is behind another pci to 3305 * pci bridge, then it need to keep transversing 3306 * up until an interrupt entry is found or reach 3307 * the top of the tree 3308 */ 3309 ipin = (child_devno + child_ipin) % PCI_INTD; 3310 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3311 bridge_bus = (int)apic_single_pci_busid; 3312 pci_irq = ((bridge_devno & 0x1f) << 2) | 3313 (ipin & 0x3); 3314 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3315 bridge_bus)) != NULL) { 3316 return (pci_irq); 3317 } 3318 dip = dipp; 3319 child_devno = bridge_devno; 3320 child_ipin = ipin; 3321 } else 3322 return (-1); 3323 } 3324 /*LINTED: function will not fall off the bottom */ 3325 } 3326 3327 3328 3329 3330 static uchar_t 3331 acpi_find_ioapic(int irq) 3332 { 3333 int i; 3334 3335 for (i = 0; i < apic_io_max; i++) { 3336 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3337 return (i); 3338 } 3339 return (0xFF); /* shouldn't happen */ 3340 } 3341 3342 /* 3343 * See if two irqs are compatible for sharing a vector. 3344 * Currently we only support sharing of PCI devices. 3345 */ 3346 static int 3347 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3348 { 3349 uint_t level1, po1; 3350 uint_t level2, po2; 3351 3352 /* Assume active high by default */ 3353 po1 = 0; 3354 po2 = 0; 3355 3356 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3357 return (0); 3358 3359 if (iflag1.intr_el == INTR_EL_CONFORM) 3360 level1 = AV_LEVEL; 3361 else 3362 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3363 3364 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3365 (iflag1.intr_po == INTR_PO_CONFORM))) 3366 po1 = AV_ACTIVE_LOW; 3367 3368 if (iflag2.intr_el == INTR_EL_CONFORM) 3369 level2 = AV_LEVEL; 3370 else 3371 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3372 3373 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3374 (iflag2.intr_po == INTR_PO_CONFORM))) 3375 po2 = AV_ACTIVE_LOW; 3376 3377 if ((level1 == level2) && (po1 == po2)) 3378 return (1); 3379 3380 return (0); 3381 } 3382 3383 /* 3384 * Attempt to share vector with someone else 3385 */ 3386 static int 3387 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3388 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3389 { 3390 #ifdef DEBUG 3391 apic_irq_t *tmpirqp = NULL; 3392 #endif /* DEBUG */ 3393 apic_irq_t *irqptr, dummyirq; 3394 int newirq, chosen_irq = -1, share = 127; 3395 int lowest, highest, i; 3396 uchar_t share_id; 3397 3398 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3399 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3400 3401 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3402 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3403 3404 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3405 lowest -= APIC_VECTOR_PER_IPL; 3406 dummyirq.airq_mps_intr_index = intr_index; 3407 dummyirq.airq_ioapicindex = ioapicindex; 3408 dummyirq.airq_intin_no = ipin; 3409 if (intr_flagp) 3410 dummyirq.airq_iflag = *intr_flagp; 3411 apic_record_rdt_entry(&dummyirq, irqno); 3412 for (i = lowest; i <= highest; i++) { 3413 newirq = apic_vector_to_irq[i]; 3414 if (newirq == APIC_RESV_IRQ) 3415 continue; 3416 irqptr = apic_irq_table[newirq]; 3417 3418 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3419 (irqptr->airq_rdt_entry & 0xFF00)) 3420 /* not compatible */ 3421 continue; 3422 3423 if (irqptr->airq_share < share) { 3424 share = irqptr->airq_share; 3425 chosen_irq = newirq; 3426 } 3427 } 3428 if (chosen_irq != -1) { 3429 /* 3430 * Assign a share id which is free or which is larger 3431 * than the largest one. 3432 */ 3433 share_id = 1; 3434 mutex_enter(&airq_mutex); 3435 irqptr = apic_irq_table[chosen_irq]; 3436 while (irqptr) { 3437 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3438 share_id = irqptr->airq_share_id; 3439 break; 3440 } 3441 if (share_id <= irqptr->airq_share_id) 3442 share_id = irqptr->airq_share_id + 1; 3443 #ifdef DEBUG 3444 tmpirqp = irqptr; 3445 #endif /* DEBUG */ 3446 irqptr = irqptr->airq_next; 3447 } 3448 if (!irqptr) { 3449 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3450 irqptr->airq_temp_cpu = IRQ_UNINIT; 3451 irqptr->airq_next = 3452 apic_irq_table[chosen_irq]->airq_next; 3453 apic_irq_table[chosen_irq]->airq_next = irqptr; 3454 #ifdef DEBUG 3455 tmpirqp = apic_irq_table[chosen_irq]; 3456 #endif /* DEBUG */ 3457 } 3458 irqptr->airq_mps_intr_index = intr_index; 3459 irqptr->airq_ioapicindex = ioapicindex; 3460 irqptr->airq_intin_no = ipin; 3461 if (intr_flagp) 3462 irqptr->airq_iflag = *intr_flagp; 3463 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3464 irqptr->airq_share_id = share_id; 3465 apic_record_rdt_entry(irqptr, irqno); 3466 *irqptrp = irqptr; 3467 #ifdef DEBUG 3468 /* shuffle the pointers to test apic_delspl path */ 3469 if (tmpirqp) { 3470 tmpirqp->airq_next = irqptr->airq_next; 3471 irqptr->airq_next = apic_irq_table[chosen_irq]; 3472 apic_irq_table[chosen_irq] = irqptr; 3473 } 3474 #endif /* DEBUG */ 3475 mutex_exit(&airq_mutex); 3476 return (VIRTIRQ(chosen_irq, share_id)); 3477 } 3478 return (-1); 3479 } 3480 3481 /* 3482 * 3483 */ 3484 static int 3485 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3486 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3487 { 3488 int origirq = ispec->intrspec_vec; 3489 uchar_t ipl = ispec->intrspec_pri; 3490 int newirq, intr_index; 3491 uchar_t ipin, ioapic, ioapicindex, vector; 3492 apic_irq_t *irqptr; 3493 major_t major; 3494 dev_info_t *sdip; 3495 3496 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3497 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3498 3499 ASSERT(ispec != NULL); 3500 3501 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3502 3503 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3504 /* MSI/X doesn't need to setup ioapic stuffs */ 3505 ioapicindex = 0xff; 3506 ioapic = 0xff; 3507 ipin = (uchar_t)0xff; 3508 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3509 MSIX_INDEX; 3510 mutex_enter(&airq_mutex); 3511 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 3512 mutex_exit(&airq_mutex); 3513 /* need an irq for MSI/X to index into autovect[] */ 3514 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3515 ddi_get_name(dip), ddi_get_instance(dip)); 3516 return (-1); 3517 } 3518 mutex_exit(&airq_mutex); 3519 3520 } else if (intrp != NULL) { 3521 intr_index = (int)(intrp - apic_io_intrp); 3522 ioapic = intrp->intr_destid; 3523 ipin = intrp->intr_destintin; 3524 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3525 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3526 if (apic_io_id[ioapicindex] == ioapic) 3527 break; 3528 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3529 (ioapic == INTR_ALL_APIC)); 3530 3531 /* check whether this intin# has been used by another irqno */ 3532 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3533 return (newirq); 3534 } 3535 3536 } else if (intr_flagp != NULL) { 3537 /* ACPI case */ 3538 intr_index = ACPI_INDEX; 3539 ioapicindex = acpi_find_ioapic(irqno); 3540 ASSERT(ioapicindex != 0xFF); 3541 ioapic = apic_io_id[ioapicindex]; 3542 ipin = irqno - apic_io_vectbase[ioapicindex]; 3543 if (apic_irq_table[irqno] && 3544 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3545 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3546 apic_irq_table[irqno]->airq_ioapicindex == 3547 ioapicindex); 3548 return (irqno); 3549 } 3550 3551 } else { 3552 /* default configuration */ 3553 ioapicindex = 0; 3554 ioapic = apic_io_id[ioapicindex]; 3555 ipin = (uchar_t)irqno; 3556 intr_index = DEFAULT_INDEX; 3557 } 3558 3559 if (ispec == NULL) { 3560 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3561 irqno)); 3562 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3563 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3564 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3565 irqptr->airq_ipl = ipl; 3566 irqptr->airq_origirq = (uchar_t)origirq; 3567 irqptr->airq_dip = dip; 3568 irqptr->airq_major = major; 3569 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3570 /* This is OK to do really */ 3571 if (sdip == NULL) { 3572 cmn_err(CE_WARN, "Sharing vectors: %s" 3573 " instance %d and SCI", 3574 ddi_get_name(dip), ddi_get_instance(dip)); 3575 } else { 3576 cmn_err(CE_WARN, "Sharing vectors: %s" 3577 " instance %d and %s instance %d", 3578 ddi_get_name(sdip), ddi_get_instance(sdip), 3579 ddi_get_name(dip), ddi_get_instance(dip)); 3580 } 3581 return (newirq); 3582 } 3583 /* try high priority allocation now that share has failed */ 3584 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3585 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3586 ddi_get_name(dip), ddi_get_instance(dip)); 3587 return (-1); 3588 } 3589 } 3590 3591 mutex_enter(&airq_mutex); 3592 if (apic_irq_table[irqno] == NULL) { 3593 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3594 irqptr->airq_temp_cpu = IRQ_UNINIT; 3595 apic_irq_table[irqno] = irqptr; 3596 } else { 3597 irqptr = apic_irq_table[irqno]; 3598 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3599 /* 3600 * The slot is used by another irqno, so allocate 3601 * a free irqno for this interrupt 3602 */ 3603 newirq = apic_allocate_irq(apic_first_avail_irq); 3604 if (newirq == -1) { 3605 mutex_exit(&airq_mutex); 3606 return (-1); 3607 } 3608 irqno = newirq; 3609 irqptr = apic_irq_table[irqno]; 3610 if (irqptr == NULL) { 3611 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3612 KM_SLEEP); 3613 irqptr->airq_temp_cpu = IRQ_UNINIT; 3614 apic_irq_table[irqno] = irqptr; 3615 } 3616 apic_modify_vector(vector, newirq); 3617 } 3618 } 3619 apic_max_device_irq = max(irqno, apic_max_device_irq); 3620 apic_min_device_irq = min(irqno, apic_min_device_irq); 3621 mutex_exit(&airq_mutex); 3622 irqptr->airq_ioapicindex = ioapicindex; 3623 irqptr->airq_intin_no = ipin; 3624 irqptr->airq_ipl = ipl; 3625 irqptr->airq_vector = vector; 3626 irqptr->airq_origirq = (uchar_t)origirq; 3627 irqptr->airq_share_id = 0; 3628 irqptr->airq_mps_intr_index = (short)intr_index; 3629 irqptr->airq_dip = dip; 3630 irqptr->airq_major = major; 3631 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3632 if (intr_flagp) 3633 irqptr->airq_iflag = *intr_flagp; 3634 3635 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3636 /* setup I/O APIC entry for non-MSI/X interrupts */ 3637 apic_record_rdt_entry(irqptr, irqno); 3638 } 3639 return (irqno); 3640 } 3641 3642 /* 3643 * return the cpu to which this intr should be bound. 3644 * Check properties or any other mechanism to see if user wants it 3645 * bound to a specific CPU. If so, return the cpu id with high bit set. 3646 * If not, use the policy to choose a cpu and return the id. 3647 */ 3648 uchar_t 3649 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3650 { 3651 int instance, instno, prop_len, bind_cpu, count; 3652 uint_t i, rc; 3653 uchar_t cpu; 3654 major_t major; 3655 char *name, *drv_name, *prop_val, *cptr; 3656 char prop_name[32]; 3657 3658 3659 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3660 return (IRQ_UNBOUND); 3661 3662 drv_name = NULL; 3663 rc = DDI_PROP_NOT_FOUND; 3664 major = (major_t)-1; 3665 if (dip != NULL) { 3666 name = ddi_get_name(dip); 3667 major = ddi_name_to_major(name); 3668 drv_name = ddi_major_to_name(major); 3669 instance = ddi_get_instance(dip); 3670 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3671 i = apic_min_device_irq; 3672 for (; i <= apic_max_device_irq; i++) { 3673 3674 if ((i == irq) || (apic_irq_table[i] == NULL) || 3675 (apic_irq_table[i]->airq_mps_intr_index 3676 == FREE_INDEX)) 3677 continue; 3678 3679 if ((apic_irq_table[i]->airq_major == major) && 3680 (!(apic_irq_table[i]->airq_cpu & 3681 IRQ_USER_BOUND))) { 3682 3683 cpu = apic_irq_table[i]->airq_cpu; 3684 3685 cmn_err(CE_CONT, 3686 "!pcplusmp: %s (%s) instance #%d " 3687 "vector 0x%x ioapic 0x%x " 3688 "intin 0x%x is bound to cpu %d\n", 3689 name, drv_name, instance, irq, 3690 ioapicid, intin, cpu); 3691 return (cpu); 3692 } 3693 } 3694 } 3695 /* 3696 * search for "drvname"_intpt_bind_cpus property first, the 3697 * syntax of the property should be "a[,b,c,...]" where 3698 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3699 * instance 3 binds to cpu c... 3700 * ddi_getlongprop() will search /option first, then / 3701 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3702 * intpt_bind_cpus property. The syntax is the same, and 3703 * it applies to all the devices if its "drvname" specific 3704 * property doesn't exist 3705 */ 3706 (void) strcpy(prop_name, drv_name); 3707 (void) strcat(prop_name, "_intpt_bind_cpus"); 3708 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3709 (caddr_t)&prop_val, &prop_len); 3710 if (rc != DDI_PROP_SUCCESS) { 3711 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3712 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3713 } 3714 } 3715 if (rc == DDI_PROP_SUCCESS) { 3716 for (i = count = 0; i < (prop_len - 1); i++) 3717 if (prop_val[i] == ',') 3718 count++; 3719 if (prop_val[i-1] != ',') 3720 count++; 3721 /* 3722 * if somehow the binding instances defined in the 3723 * property are not enough for this instno., then 3724 * reuse the pattern for the next instance until 3725 * it reaches the requested instno 3726 */ 3727 instno = instance % count; 3728 i = 0; 3729 cptr = prop_val; 3730 while (i < instno) 3731 if (*cptr++ == ',') 3732 i++; 3733 bind_cpu = stoi(&cptr); 3734 kmem_free(prop_val, prop_len); 3735 /* if specific cpu is bogus, then default to cpu 0 */ 3736 if (bind_cpu >= apic_nproc) { 3737 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3738 prop_name, prop_val, bind_cpu); 3739 bind_cpu = 0; 3740 } else { 3741 /* indicate that we are bound at user request */ 3742 bind_cpu |= IRQ_USER_BOUND; 3743 } 3744 /* 3745 * no need to check apic_cpus[].aci_status, if specific cpu is 3746 * not up, then post_cpu_start will handle it. 3747 */ 3748 } else { 3749 bind_cpu = apic_next_bind_cpu++; 3750 if (bind_cpu >= apic_nproc) { 3751 apic_next_bind_cpu = 1; 3752 bind_cpu = 0; 3753 } 3754 } 3755 if (drv_name != NULL) 3756 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3757 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3758 name, drv_name, instance, 3759 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3760 else 3761 cmn_err(CE_CONT, "!pcplusmp: " 3762 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3763 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3764 3765 return ((uchar_t)bind_cpu); 3766 } 3767 3768 static struct apic_io_intr * 3769 apic_find_io_intr_w_busid(int irqno, int busid) 3770 { 3771 struct apic_io_intr *intrp; 3772 3773 /* 3774 * It can have more than 1 entry with same source bus IRQ, 3775 * but unique with the source bus id 3776 */ 3777 intrp = apic_io_intrp; 3778 if (intrp != NULL) { 3779 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3780 if (intrp->intr_irq == irqno && 3781 intrp->intr_busid == busid && 3782 intrp->intr_type == IO_INTR_INT) 3783 return (intrp); 3784 intrp++; 3785 } 3786 } 3787 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3788 "busid %x:%x\n", irqno, busid)); 3789 return ((struct apic_io_intr *)NULL); 3790 } 3791 3792 3793 struct mps_bus_info { 3794 char *bus_name; 3795 int bus_id; 3796 } bus_info_array[] = { 3797 "ISA ", BUS_ISA, 3798 "PCI ", BUS_PCI, 3799 "EISA ", BUS_EISA, 3800 "XPRESS", BUS_XPRESS, 3801 "PCMCIA", BUS_PCMCIA, 3802 "VL ", BUS_VL, 3803 "CBUS ", BUS_CBUS, 3804 "CBUSII", BUS_CBUSII, 3805 "FUTURE", BUS_FUTURE, 3806 "INTERN", BUS_INTERN, 3807 "MBI ", BUS_MBI, 3808 "MBII ", BUS_MBII, 3809 "MPI ", BUS_MPI, 3810 "MPSA ", BUS_MPSA, 3811 "NUBUS ", BUS_NUBUS, 3812 "TC ", BUS_TC, 3813 "VME ", BUS_VME, 3814 "PCI-E ", BUS_PCIE 3815 }; 3816 3817 static int 3818 apic_find_bus_type(char *bus) 3819 { 3820 int i = 0; 3821 3822 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3823 if (strncmp(bus, bus_info_array[i].bus_name, 3824 strlen(bus_info_array[i].bus_name)) == 0) 3825 return (bus_info_array[i].bus_id); 3826 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3827 return (0); 3828 } 3829 3830 static int 3831 apic_find_bus(int busid) 3832 { 3833 struct apic_bus *busp; 3834 3835 busp = apic_busp; 3836 while (busp->bus_entry == APIC_BUS_ENTRY) { 3837 if (busp->bus_id == busid) 3838 return (apic_find_bus_type((char *)&busp->bus_str1)); 3839 busp++; 3840 } 3841 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3842 return (0); 3843 } 3844 3845 static int 3846 apic_find_bus_id(int bustype) 3847 { 3848 struct apic_bus *busp; 3849 3850 busp = apic_busp; 3851 while (busp->bus_entry == APIC_BUS_ENTRY) { 3852 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3853 return (busp->bus_id); 3854 busp++; 3855 } 3856 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3857 bustype)); 3858 return (-1); 3859 } 3860 3861 /* 3862 * Check if a particular irq need to be reserved for any io_intr 3863 */ 3864 static struct apic_io_intr * 3865 apic_find_io_intr(int irqno) 3866 { 3867 struct apic_io_intr *intrp; 3868 3869 intrp = apic_io_intrp; 3870 if (intrp != NULL) { 3871 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3872 if (intrp->intr_irq == irqno && 3873 intrp->intr_type == IO_INTR_INT) 3874 return (intrp); 3875 intrp++; 3876 } 3877 } 3878 return ((struct apic_io_intr *)NULL); 3879 } 3880 3881 /* 3882 * Check if the given ioapicindex intin combination has already been assigned 3883 * an irq. If so return irqno. Else -1 3884 */ 3885 static int 3886 apic_find_intin(uchar_t ioapic, uchar_t intin) 3887 { 3888 apic_irq_t *irqptr; 3889 int i; 3890 3891 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3892 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3893 irqptr = apic_irq_table[i]; 3894 while (irqptr) { 3895 if ((irqptr->airq_mps_intr_index >= 0) && 3896 (irqptr->airq_intin_no == intin) && 3897 (irqptr->airq_ioapicindex == ioapic)) { 3898 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3899 "entry for ioapic:intin %x:%x " 3900 "shared interrupts ?", ioapic, intin)); 3901 return (i); 3902 } 3903 irqptr = irqptr->airq_next; 3904 } 3905 } 3906 return (-1); 3907 } 3908 3909 int 3910 apic_allocate_irq(int irq) 3911 { 3912 int freeirq, i; 3913 3914 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 3915 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 3916 (irq - 1))) == -1) { 3917 /* 3918 * if BIOS really defines every single irq in the mps 3919 * table, then don't worry about conflicting with 3920 * them, just use any free slot in apic_irq_table 3921 */ 3922 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 3923 if ((apic_irq_table[i] == NULL) || 3924 apic_irq_table[i]->airq_mps_intr_index == 3925 FREE_INDEX) { 3926 freeirq = i; 3927 break; 3928 } 3929 } 3930 if (freeirq == -1) { 3931 /* This shouldn't happen, but just in case */ 3932 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 3933 return (-1); 3934 } 3935 } 3936 if (apic_irq_table[freeirq] == NULL) { 3937 apic_irq_table[freeirq] = 3938 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 3939 if (apic_irq_table[freeirq] == NULL) { 3940 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 3941 return (-1); 3942 } 3943 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 3944 } 3945 return (freeirq); 3946 } 3947 3948 static int 3949 apic_find_free_irq(int start, int end) 3950 { 3951 int i; 3952 3953 for (i = start; i <= end; i++) 3954 /* Check if any I/O entry needs this IRQ */ 3955 if (apic_find_io_intr(i) == NULL) { 3956 /* Then see if it is free */ 3957 if ((apic_irq_table[i] == NULL) || 3958 (apic_irq_table[i]->airq_mps_intr_index == 3959 FREE_INDEX)) { 3960 return (i); 3961 } 3962 } 3963 return (-1); 3964 } 3965 3966 /* 3967 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 3968 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 3969 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 3970 * requests and allocated only when pri is set. 3971 */ 3972 static uchar_t 3973 apic_allocate_vector(int ipl, int irq, int pri) 3974 { 3975 int lowest, highest, i; 3976 3977 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3978 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 3979 3980 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 3981 lowest -= APIC_VECTOR_PER_IPL; 3982 3983 #ifdef DEBUG 3984 if (apic_restrict_vector) /* for testing shared interrupt logic */ 3985 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 3986 #endif /* DEBUG */ 3987 if (pri == 0) 3988 highest -= APIC_HI_PRI_VECTS; 3989 3990 for (i = lowest; i < highest; i++) { 3991 if (APIC_CHECK_RESERVE_VECTORS(i)) 3992 continue; 3993 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 3994 apic_vector_to_irq[i] = (uchar_t)irq; 3995 return (i); 3996 } 3997 } 3998 3999 return (0); 4000 } 4001 4002 static void 4003 apic_modify_vector(uchar_t vector, int irq) 4004 { 4005 apic_vector_to_irq[vector] = (uchar_t)irq; 4006 } 4007 4008 /* 4009 * Mark vector as being in the process of being deleted. Interrupts 4010 * may still come in on some CPU. The moment an interrupt comes with 4011 * the new vector, we know we can free the old one. Called only from 4012 * addspl and delspl with interrupts disabled. Because an interrupt 4013 * can be shared, but no interrupt from either device may come in, 4014 * we also use a timeout mechanism, which we arbitrarily set to 4015 * apic_revector_timeout microseconds. 4016 */ 4017 static void 4018 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4019 { 4020 int iflag = intr_clear(); 4021 lock_set(&apic_revector_lock); 4022 if (!apic_oldvec_to_newvec) { 4023 apic_oldvec_to_newvec = 4024 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4025 KM_NOSLEEP); 4026 4027 if (!apic_oldvec_to_newvec) { 4028 /* 4029 * This failure is not catastrophic. 4030 * But, the oldvec will never be freed. 4031 */ 4032 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4033 lock_clear(&apic_revector_lock); 4034 intr_restore(iflag); 4035 return; 4036 } 4037 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4038 } 4039 4040 /* See if we already did this for drivers which do double addintrs */ 4041 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4042 apic_oldvec_to_newvec[oldvector] = newvector; 4043 apic_newvec_to_oldvec[newvector] = oldvector; 4044 apic_revector_pending++; 4045 } 4046 lock_clear(&apic_revector_lock); 4047 intr_restore(iflag); 4048 (void) timeout(apic_xlate_vector_free_timeout_handler, 4049 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4050 } 4051 4052 /* 4053 * xlate_vector is called from intr_enter if revector_pending is set. 4054 * It will xlate it if needed and mark the old vector as free. 4055 */ 4056 static uchar_t 4057 apic_xlate_vector(uchar_t vector) 4058 { 4059 uchar_t newvector, oldvector = 0; 4060 4061 lock_set(&apic_revector_lock); 4062 /* Do we really need to do this ? */ 4063 if (!apic_revector_pending) { 4064 lock_clear(&apic_revector_lock); 4065 return (vector); 4066 } 4067 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4068 oldvector = vector; 4069 else { 4070 /* 4071 * The incoming vector is new . See if a stale entry is 4072 * remaining 4073 */ 4074 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4075 newvector = vector; 4076 } 4077 4078 if (oldvector) { 4079 apic_revector_pending--; 4080 apic_oldvec_to_newvec[oldvector] = 0; 4081 apic_newvec_to_oldvec[newvector] = 0; 4082 apic_free_vector(oldvector); 4083 lock_clear(&apic_revector_lock); 4084 /* There could have been more than one reprogramming! */ 4085 return (apic_xlate_vector(newvector)); 4086 } 4087 lock_clear(&apic_revector_lock); 4088 return (vector); 4089 } 4090 4091 void 4092 apic_xlate_vector_free_timeout_handler(void *arg) 4093 { 4094 int iflag; 4095 uchar_t oldvector, newvector; 4096 4097 oldvector = (uchar_t)(uintptr_t)arg; 4098 iflag = intr_clear(); 4099 lock_set(&apic_revector_lock); 4100 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4101 apic_free_vector(oldvector); 4102 apic_oldvec_to_newvec[oldvector] = 0; 4103 apic_newvec_to_oldvec[newvector] = 0; 4104 apic_revector_pending--; 4105 } 4106 4107 lock_clear(&apic_revector_lock); 4108 intr_restore(iflag); 4109 } 4110 4111 4112 /* Mark vector as not being used by any irq */ 4113 static void 4114 apic_free_vector(uchar_t vector) 4115 { 4116 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4117 } 4118 4119 /* 4120 * compute the polarity, trigger mode and vector for programming into 4121 * the I/O apic and record in airq_rdt_entry. 4122 */ 4123 static void 4124 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4125 { 4126 int ioapicindex, bus_type, vector; 4127 short intr_index; 4128 uint_t level, po, io_po; 4129 struct apic_io_intr *iointrp; 4130 4131 intr_index = irqptr->airq_mps_intr_index; 4132 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4133 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4134 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4135 4136 if (intr_index == RESERVE_INDEX) { 4137 apic_error |= APIC_ERR_INVALID_INDEX; 4138 return; 4139 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4140 return; 4141 } 4142 4143 vector = irqptr->airq_vector; 4144 ioapicindex = irqptr->airq_ioapicindex; 4145 /* Assume edge triggered by default */ 4146 level = 0; 4147 /* Assume active high by default */ 4148 po = 0; 4149 4150 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4151 ASSERT(irq < 16); 4152 if (eisa_level_intr_mask & (1 << irq)) 4153 level = AV_LEVEL; 4154 if (intr_index == FREE_INDEX && apic_defconf == 0) 4155 apic_error |= APIC_ERR_INVALID_INDEX; 4156 } else if (intr_index == ACPI_INDEX) { 4157 bus_type = irqptr->airq_iflag.bustype; 4158 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4159 if (bus_type == BUS_PCI) 4160 level = AV_LEVEL; 4161 } else 4162 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4163 AV_LEVEL : 0; 4164 if (level && 4165 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4166 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4167 bus_type == BUS_PCI))) 4168 po = AV_ACTIVE_LOW; 4169 } else { 4170 iointrp = apic_io_intrp + intr_index; 4171 bus_type = apic_find_bus(iointrp->intr_busid); 4172 if (iointrp->intr_el == INTR_EL_CONFORM) { 4173 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4174 level = AV_LEVEL; 4175 else if (bus_type == BUS_PCI) 4176 level = AV_LEVEL; 4177 } else 4178 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4179 AV_LEVEL : 0; 4180 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4181 (iointrp->intr_po == INTR_PO_CONFORM && 4182 bus_type == BUS_PCI))) 4183 po = AV_ACTIVE_LOW; 4184 } 4185 if (level) 4186 apic_level_intr[irq] = 1; 4187 /* 4188 * The 82489DX External APIC cannot do active low polarity interrupts. 4189 */ 4190 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4191 io_po = po; 4192 else 4193 io_po = 0; 4194 4195 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4196 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4197 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4198 4199 irqptr->airq_rdt_entry = level|io_po|vector; 4200 } 4201 4202 /* 4203 * Call rebind to do the actual programming. 4204 */ 4205 static int 4206 apic_setup_io_intr(apic_irq_t *irqptr, int irq) 4207 { 4208 int rv; 4209 4210 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4211 IMMEDIATE)) 4212 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4213 rv = apic_rebind(irqptr, 0, 1, IMMEDIATE); 4214 4215 return (rv); 4216 } 4217 4218 /* 4219 * Deferred reprogramming: Call apic_rebind to do the real work. 4220 */ 4221 static int 4222 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq) 4223 { 4224 int rv; 4225 4226 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4227 DEFERRED)) 4228 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4229 rv = apic_rebind(irqptr, 0, 1, DEFERRED); 4230 4231 return (rv); 4232 } 4233 4234 /* 4235 * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock 4236 * if false (0) means lock is already held (e.g: in rebind_all). 4237 */ 4238 static int 4239 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when) 4240 { 4241 int intin_no; 4242 volatile int32_t *ioapic; 4243 uchar_t airq_temp_cpu; 4244 apic_cpus_info_t *cpu_infop; 4245 int iflag; 4246 int which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4247 boolean_t restore_iflag = B_TRUE; 4248 4249 intin_no = irq_ptr->airq_intin_no; 4250 ioapic = apicioadr[irq_ptr->airq_ioapicindex]; 4251 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4252 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4253 if (airq_temp_cpu & IRQ_USER_BOUND) 4254 /* Mask off high bit so it can be used as array index */ 4255 airq_temp_cpu &= ~IRQ_USER_BOUND; 4256 4257 ASSERT(airq_temp_cpu < apic_nproc); 4258 } 4259 4260 iflag = intr_clear(); 4261 4262 if (acquire_lock) 4263 lock_set(&apic_ioapic_lock); 4264 4265 /* 4266 * Can't bind to a CPU that's not online: 4267 */ 4268 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4269 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) { 4270 4271 if (acquire_lock) 4272 lock_clear(&apic_ioapic_lock); 4273 4274 intr_restore(iflag); 4275 return (1); 4276 } 4277 4278 /* 4279 * If this is a deferred reprogramming attempt, ensure we have 4280 * not been passed stale data: 4281 */ 4282 if ((when == DEFERRED) && 4283 (apic_reprogram_info[which_irq].valid == 0)) { 4284 /* stale info, so just return */ 4285 if (acquire_lock) 4286 lock_clear(&apic_ioapic_lock); 4287 4288 intr_restore(iflag); 4289 return (0); 4290 } 4291 4292 /* 4293 * If this interrupt has been delivered to a CPU and that CPU 4294 * has not handled it yet, we cannot reprogram the IOAPIC now: 4295 */ 4296 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) && 4297 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu, 4298 ioapic, intin_no, which_irq, iflag, &restore_iflag) != 0) { 4299 4300 if (acquire_lock) 4301 lock_clear(&apic_ioapic_lock); 4302 4303 if (restore_iflag) 4304 intr_restore(iflag); 4305 return (0); 4306 } 4307 4308 /* 4309 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4310 * come in before we have a chance to reprogram it below. The 4311 * reprogramming below will simultaneously change and unmask the 4312 * RDT entry. 4313 */ 4314 4315 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4316 /* Write the RDT entry -- no specific CPU binding */ 4317 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4318 4319 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4320 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4321 4322 /* Write the vector, trigger, and polarity portion of the RDT */ 4323 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4324 AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry); 4325 if (acquire_lock) 4326 lock_clear(&apic_ioapic_lock); 4327 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4328 intr_restore(iflag); 4329 return (0); 4330 } 4331 4332 if (bind_cpu & IRQ_USER_BOUND) { 4333 cpu_infop->aci_bound++; 4334 } else { 4335 cpu_infop->aci_temp_bound++; 4336 } 4337 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4338 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4339 /* Write the RDT entry -- bind to a specific CPU: */ 4340 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4341 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4342 } 4343 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4344 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4345 } 4346 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4347 /* Write the vector, trigger, and polarity portion of the RDT */ 4348 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4349 AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry); 4350 } else { 4351 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4352 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 4353 (void) apic_pci_msi_disable_mode(irq_ptr->airq_dip, type, 4354 irq_ptr->airq_ioapicindex); 4355 if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) { 4356 /* first one */ 4357 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4358 "apic_pci_msi_enable_vector\n")); 4359 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type, 4360 which_irq, irq_ptr->airq_vector, 4361 irq_ptr->airq_intin_no, 4362 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4363 cmn_err(CE_WARN, "pcplusmp: " 4364 "apic_pci_msi_enable_vector " 4365 "returned PSM_FAILURE"); 4366 } 4367 } 4368 if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) == 4369 irq_ptr->airq_origirq) { /* last one */ 4370 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4371 "pci_msi_enable_mode\n")); 4372 if (apic_pci_msi_enable_mode(irq_ptr->airq_dip, 4373 type, which_irq) != PSM_SUCCESS) { 4374 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4375 "pci_msi_enable failed\n")); 4376 (void) apic_pci_msi_unconfigure( 4377 irq_ptr->airq_dip, type, which_irq); 4378 } 4379 } 4380 } 4381 if (acquire_lock) 4382 lock_clear(&apic_ioapic_lock); 4383 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4384 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4385 intr_restore(iflag); 4386 return (0); 4387 } 4388 4389 /* 4390 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4391 * bit set. Sets up a timeout to perform the reprogramming at a later time 4392 * if it cannot wait for the Remote IRR bit to clear (or if waiting did not 4393 * result in the bit's clearing). 4394 * 4395 * This function will mask the RDT entry if the Remote IRR bit is set. 4396 * 4397 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4398 */ 4399 static int 4400 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4401 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq, 4402 int iflag, boolean_t *intr_restorep) 4403 { 4404 int32_t rdt_entry; 4405 int waited; 4406 4407 /* Mask the RDT entry, but only if it's a level-triggered interrupt */ 4408 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4409 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4410 4411 /* Mask it */ 4412 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4413 AV_MASK | rdt_entry); 4414 } 4415 4416 /* 4417 * Wait for the delivery pending bit to clear. 4418 */ 4419 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4420 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4421 4422 /* 4423 * If we're still waiting on the delivery of this interrupt, 4424 * continue to wait here until it is delivered (this should be 4425 * a very small amount of time, but include a timeout just in 4426 * case). 4427 */ 4428 for (waited = 0; waited < apic_max_usecs_clear_pending; 4429 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4430 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4431 & AV_PENDING) == 0) { 4432 break; 4433 } 4434 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4435 } 4436 4437 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4438 AV_PENDING) != 0) { 4439 cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not " 4440 "deliver interrupt to local APIC within " 4441 "%d usecs.", irq_ptr->airq_ioapicindex, 4442 irq_ptr->airq_intin_no, 4443 apic_max_usecs_clear_pending); 4444 } 4445 } 4446 4447 /* 4448 * If the remote IRR bit is set, then the interrupt has been sent 4449 * to a CPU for processing. We have no choice but to wait for 4450 * that CPU to process the interrupt, at which point the remote IRR 4451 * bit will be cleared. 4452 */ 4453 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4454 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4455 4456 /* 4457 * If the CPU that this RDT is bound to is NOT the current 4458 * CPU, wait until that CPU handles the interrupt and ACKs 4459 * it. If this interrupt is not bound to any CPU (that is, 4460 * if it's bound to the logical destination of "anyone"), it 4461 * may have been delivered to the current CPU so handle that 4462 * case by deferring the reprogramming (below). 4463 */ 4464 kpreempt_disable(); 4465 if ((old_bind_cpu != IRQ_UNBOUND) && 4466 (old_bind_cpu != IRQ_UNINIT) && 4467 (old_bind_cpu != psm_get_cpu_id())) { 4468 for (waited = 0; waited < apic_max_usecs_clear_pending; 4469 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4470 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4471 intin_no) & AV_REMOTE_IRR) == 0) { 4472 4473 /* Clear the reprogramming state: */ 4474 lock_set(&apic_ioapic_reprogram_lock); 4475 4476 apic_reprogram_info[which_irq].valid 4477 = 0; 4478 apic_reprogram_info[which_irq].bindcpu 4479 = 0; 4480 apic_reprogram_info[which_irq].timeouts 4481 = 0; 4482 4483 lock_clear(&apic_ioapic_reprogram_lock); 4484 4485 /* Remote IRR has cleared! */ 4486 kpreempt_enable(); 4487 return (0); 4488 } 4489 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4490 } 4491 } 4492 kpreempt_enable(); 4493 4494 /* 4495 * If we waited and the Remote IRR bit is still not cleared, 4496 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4497 * times for this interrupt, try the last-ditch workarounds: 4498 */ 4499 if (apic_reprogram_info[which_irq].timeouts >= 4500 APIC_REPROGRAM_MAX_TIMEOUTS) { 4501 4502 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4503 & AV_REMOTE_IRR) != 0) { 4504 /* 4505 * Trying to clear the bit through normal 4506 * channels has failed. So as a last-ditch 4507 * effort, try to set the trigger mode to 4508 * edge, then to level. This has been 4509 * observed to work on many systems. 4510 */ 4511 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4512 intin_no, 4513 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4514 intin_no) & ~AV_LEVEL); 4515 4516 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4517 intin_no, 4518 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4519 intin_no) | AV_LEVEL); 4520 4521 /* 4522 * If the bit's STILL set, declare total and 4523 * utter failure 4524 */ 4525 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4526 intin_no) & AV_REMOTE_IRR) != 0) { 4527 cmn_err(CE_WARN, "!IOAPIC %d intin %d: " 4528 "Remote IRR failed to reset " 4529 "within %d usecs. Interrupts to " 4530 "this pin may cease to function.", 4531 irq_ptr->airq_ioapicindex, 4532 irq_ptr->airq_intin_no, 4533 apic_max_usecs_clear_pending); 4534 } 4535 } 4536 /* Clear the reprogramming state: */ 4537 lock_set(&apic_ioapic_reprogram_lock); 4538 4539 apic_reprogram_info[which_irq].valid = 0; 4540 apic_reprogram_info[which_irq].bindcpu = 0; 4541 apic_reprogram_info[which_irq].timeouts = 0; 4542 4543 lock_clear(&apic_ioapic_reprogram_lock); 4544 } else { 4545 #ifdef DEBUG 4546 cmn_err(CE_WARN, "Deferring reprogramming of irq %d", 4547 which_irq); 4548 #endif /* DEBUG */ 4549 /* 4550 * If waiting for the Remote IRR bit (above) didn't 4551 * allow it to clear, defer the reprogramming: 4552 */ 4553 lock_set(&apic_ioapic_reprogram_lock); 4554 4555 apic_reprogram_info[which_irq].valid = 1; 4556 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4557 apic_reprogram_info[which_irq].timeouts++; 4558 4559 lock_clear(&apic_ioapic_reprogram_lock); 4560 4561 *intr_restorep = B_FALSE; 4562 intr_restore(iflag); 4563 4564 /* Fire up a timeout to handle this later */ 4565 (void) timeout(apic_reprogram_timeout_handler, 4566 (void *) 0, 4567 drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY)); 4568 4569 /* Inform caller to defer IOAPIC programming: */ 4570 return (1); 4571 } 4572 } 4573 return (0); 4574 } 4575 4576 /* 4577 * Timeout handler that performs the APIC reprogramming 4578 */ 4579 /*ARGSUSED*/ 4580 static void 4581 apic_reprogram_timeout_handler(void *arg) 4582 { 4583 /*LINTED: set but not used in function*/ 4584 int i, result; 4585 4586 /* Serialize access to this function */ 4587 mutex_enter(&apic_reprogram_timeout_mutex); 4588 4589 /* 4590 * For each entry in the reprogramming state that's valid, 4591 * try the reprogramming again: 4592 */ 4593 for (i = 0; i < APIC_MAX_VECTOR; i++) { 4594 if (apic_reprogram_info[i].valid == 0) 4595 continue; 4596 /* 4597 * Though we can't really do anything about errors 4598 * at this point, keep track of them for reporting. 4599 * Note that it is very possible for apic_setup_io_intr 4600 * to re-register this very timeout if the Remote IRR bit 4601 * has not yet cleared. 4602 */ 4603 result = apic_setup_io_intr_deferred(apic_irq_table[i], i); 4604 4605 #ifdef DEBUG 4606 if (result) 4607 cmn_err(CE_WARN, "apic_reprogram_timeout: " 4608 "apic_setup_io_intr returned nonzero for " 4609 "irq=%d!", i); 4610 #endif /* DEBUG */ 4611 } 4612 4613 mutex_exit(&apic_reprogram_timeout_mutex); 4614 } 4615 4616 4617 /* 4618 * Called to migrate all interrupts at an irq to another cpu. safe 4619 * if true means we are not being called from an interrupt 4620 * context and hence it is safe to do a lock_set. If false 4621 * do only a lock_try and return failure ( non 0 ) if we cannot get it 4622 */ 4623 int 4624 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe) 4625 { 4626 apic_irq_t *irqptr = irq_ptr; 4627 int retval = 0; 4628 int iflag; 4629 4630 iflag = intr_clear(); 4631 if (!safe) { 4632 if (lock_try(&apic_ioapic_lock) == 0) { 4633 intr_restore(iflag); 4634 return (1); 4635 } 4636 } else 4637 lock_set(&apic_ioapic_lock); 4638 4639 while (irqptr) { 4640 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4641 retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE); 4642 irqptr = irqptr->airq_next; 4643 } 4644 lock_clear(&apic_ioapic_lock); 4645 intr_restore(iflag); 4646 return (retval); 4647 } 4648 4649 /* 4650 * apic_intr_redistribute does all the messy computations for identifying 4651 * which interrupt to move to which CPU. Currently we do just one interrupt 4652 * at a time. This reduces the time we spent doing all this within clock 4653 * interrupt. When it is done in idle, we could do more than 1. 4654 * First we find the most busy and the most free CPU (time in ISR only) 4655 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4656 * Then we look for IRQs which are closest to the difference between the 4657 * most busy CPU and the average ISR load. We try to find one whose load 4658 * is less than difference.If none exists, then we chose one larger than the 4659 * difference, provided it does not make the most idle CPU worse than the 4660 * most busy one. In the end, we clear all the busy fields for CPUs. For 4661 * IRQs, they are cleared as they are scanned. 4662 */ 4663 static void 4664 apic_intr_redistribute() 4665 { 4666 int busiest_cpu, most_free_cpu; 4667 int cpu_free, cpu_busy, max_busy, min_busy; 4668 int min_free, diff; 4669 int average_busy, cpus_online; 4670 int i, busy; 4671 apic_cpus_info_t *cpu_infop; 4672 apic_irq_t *min_busy_irq = NULL; 4673 apic_irq_t *max_busy_irq = NULL; 4674 4675 busiest_cpu = most_free_cpu = -1; 4676 cpu_free = cpu_busy = max_busy = average_busy = 0; 4677 min_free = apic_sample_factor_redistribution; 4678 cpus_online = 0; 4679 /* 4680 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4681 * without ioapic_lock. That is OK as we are just doing statistical 4682 * sampling anyway and any inaccuracy now will get corrected next time 4683 * The call to rebind which actually changes things will make sure 4684 * we are consistent. 4685 */ 4686 for (i = 0; i < apic_nproc; i++) { 4687 if (!(apic_redist_cpu_skip & (1 << i)) && 4688 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4689 4690 cpu_infop = &apic_cpus[i]; 4691 /* 4692 * If no unbound interrupts or only 1 total on this 4693 * CPU, skip 4694 */ 4695 if (!cpu_infop->aci_temp_bound || 4696 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4697 == 1) { 4698 apic_redist_cpu_skip |= 1 << i; 4699 continue; 4700 } 4701 4702 busy = cpu_infop->aci_busy; 4703 average_busy += busy; 4704 cpus_online++; 4705 if (max_busy < busy) { 4706 max_busy = busy; 4707 busiest_cpu = i; 4708 } 4709 if (min_free > busy) { 4710 min_free = busy; 4711 most_free_cpu = i; 4712 } 4713 if (busy > apic_int_busy_mark) { 4714 cpu_busy |= 1 << i; 4715 } else { 4716 if (busy < apic_int_free_mark) 4717 cpu_free |= 1 << i; 4718 } 4719 } 4720 } 4721 if ((cpu_busy && cpu_free) || 4722 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4723 4724 apic_num_imbalance++; 4725 #ifdef DEBUG 4726 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4727 prom_printf( 4728 "redistribute busy=%x free=%x max=%x min=%x", 4729 cpu_busy, cpu_free, max_busy, min_free); 4730 } 4731 #endif /* DEBUG */ 4732 4733 4734 average_busy /= cpus_online; 4735 4736 diff = max_busy - average_busy; 4737 min_busy = max_busy; /* start with the max possible value */ 4738 max_busy = 0; 4739 min_busy_irq = max_busy_irq = NULL; 4740 i = apic_min_device_irq; 4741 for (; i < apic_max_device_irq; i++) { 4742 apic_irq_t *irq_ptr; 4743 /* Change to linked list per CPU ? */ 4744 if ((irq_ptr = apic_irq_table[i]) == NULL) 4745 continue; 4746 /* Check for irq_busy & decide which one to move */ 4747 /* Also zero them for next round */ 4748 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4749 irq_ptr->airq_busy) { 4750 if (irq_ptr->airq_busy < diff) { 4751 /* 4752 * Check for least busy CPU, 4753 * best fit or what ? 4754 */ 4755 if (max_busy < irq_ptr->airq_busy) { 4756 /* 4757 * Most busy within the 4758 * required differential 4759 */ 4760 max_busy = irq_ptr->airq_busy; 4761 max_busy_irq = irq_ptr; 4762 } 4763 } else { 4764 if (min_busy > irq_ptr->airq_busy) { 4765 /* 4766 * least busy, but more than 4767 * the reqd diff 4768 */ 4769 if (min_busy < 4770 (diff + average_busy - 4771 min_free)) { 4772 /* 4773 * Making sure new cpu 4774 * will not end up 4775 * worse 4776 */ 4777 min_busy = 4778 irq_ptr->airq_busy; 4779 4780 min_busy_irq = irq_ptr; 4781 } 4782 } 4783 } 4784 } 4785 irq_ptr->airq_busy = 0; 4786 } 4787 4788 if (max_busy_irq != NULL) { 4789 #ifdef DEBUG 4790 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4791 prom_printf("rebinding %x to %x", 4792 max_busy_irq->airq_vector, most_free_cpu); 4793 } 4794 #endif /* DEBUG */ 4795 if (apic_rebind_all(max_busy_irq, most_free_cpu, 0) 4796 == 0) 4797 /* Make change permenant */ 4798 max_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4799 } else if (min_busy_irq != NULL) { 4800 #ifdef DEBUG 4801 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4802 prom_printf("rebinding %x to %x", 4803 min_busy_irq->airq_vector, most_free_cpu); 4804 } 4805 #endif /* DEBUG */ 4806 4807 if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) == 4808 0) 4809 /* Make change permenant */ 4810 min_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4811 } else { 4812 if (cpu_busy != (1 << busiest_cpu)) { 4813 apic_redist_cpu_skip |= 1 << busiest_cpu; 4814 /* 4815 * We leave cpu_skip set so that next time we 4816 * can choose another cpu 4817 */ 4818 } 4819 } 4820 apic_num_rebind++; 4821 } else { 4822 /* 4823 * found nothing. Could be that we skipped over valid CPUs 4824 * or we have balanced everything. If we had a variable 4825 * ticks_for_redistribution, it could be increased here. 4826 * apic_int_busy, int_free etc would also need to be 4827 * changed. 4828 */ 4829 if (apic_redist_cpu_skip) 4830 apic_redist_cpu_skip = 0; 4831 } 4832 for (i = 0; i < apic_nproc; i++) { 4833 apic_cpus[i].aci_busy = 0; 4834 } 4835 } 4836 4837 static void 4838 apic_cleanup_busy() 4839 { 4840 int i; 4841 apic_irq_t *irq_ptr; 4842 4843 for (i = 0; i < apic_nproc; i++) { 4844 apic_cpus[i].aci_busy = 0; 4845 } 4846 4847 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 4848 if ((irq_ptr = apic_irq_table[i]) != NULL) 4849 irq_ptr->airq_busy = 0; 4850 } 4851 apic_skipped_redistribute = 0; 4852 } 4853 4854 4855 /* 4856 * This function will reprogram the timer. 4857 * 4858 * When in oneshot mode the argument is the absolute time in future to 4859 * generate the interrupt at. 4860 * 4861 * When in periodic mode, the argument is the interval at which the 4862 * interrupts should be generated. There is no need to support the periodic 4863 * mode timer change at this time. 4864 */ 4865 static void 4866 apic_timer_reprogram(hrtime_t time) 4867 { 4868 hrtime_t now; 4869 uint_t ticks; 4870 int64_t delta; 4871 4872 /* 4873 * We should be called from high PIL context (CBE_HIGH_PIL), 4874 * so kpreempt is disabled. 4875 */ 4876 4877 if (!apic_oneshot) { 4878 /* time is the interval for periodic mode */ 4879 ticks = APIC_NSECS_TO_TICKS(time); 4880 } else { 4881 /* one shot mode */ 4882 4883 now = gethrtime(); 4884 delta = time - now; 4885 4886 if (delta <= 0) { 4887 /* 4888 * requested to generate an interrupt in the past 4889 * generate an interrupt as soon as possible 4890 */ 4891 ticks = apic_min_timer_ticks; 4892 } else if (delta > apic_nsec_max) { 4893 /* 4894 * requested to generate an interrupt at a time 4895 * further than what we are capable of. Set to max 4896 * the hardware can handle 4897 */ 4898 4899 ticks = APIC_MAXVAL; 4900 #ifdef DEBUG 4901 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 4902 " %lld too far in future, current time" 4903 " %lld \n", time, now); 4904 #endif 4905 } else 4906 ticks = APIC_NSECS_TO_TICKS(delta); 4907 } 4908 4909 if (ticks < apic_min_timer_ticks) 4910 ticks = apic_min_timer_ticks; 4911 4912 apicadr[APIC_INIT_COUNT] = ticks; 4913 4914 } 4915 4916 /* 4917 * This function will enable timer interrupts. 4918 */ 4919 static void 4920 apic_timer_enable(void) 4921 { 4922 /* 4923 * We should be Called from high PIL context (CBE_HIGH_PIL), 4924 * so kpreempt is disabled. 4925 */ 4926 4927 if (!apic_oneshot) 4928 apicadr[APIC_LOCAL_TIMER] = 4929 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 4930 else { 4931 /* one shot */ 4932 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 4933 } 4934 } 4935 4936 /* 4937 * This function will disable timer interrupts. 4938 */ 4939 static void 4940 apic_timer_disable(void) 4941 { 4942 /* 4943 * We should be Called from high PIL context (CBE_HIGH_PIL), 4944 * so kpreempt is disabled. 4945 */ 4946 4947 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 4948 } 4949 4950 4951 cyclic_id_t apic_cyclic_id; 4952 4953 /* 4954 * If this module needs to be a consumer of cyclic subsystem, they 4955 * can be added here, since at this time kernel cyclic subsystem is initialized 4956 * argument is not currently used, and is reserved for future. 4957 */ 4958 static void 4959 apic_post_cyclic_setup(void *arg) 4960 { 4961 _NOTE(ARGUNUSED(arg)) 4962 cyc_handler_t hdlr; 4963 cyc_time_t when; 4964 4965 /* cpu_lock is held */ 4966 4967 /* set up cyclics for intr redistribution */ 4968 4969 /* 4970 * In peridoc mode intr redistribution processing is done in 4971 * apic_intr_enter during clk intr processing 4972 */ 4973 if (!apic_oneshot) 4974 return; 4975 4976 hdlr.cyh_level = CY_LOW_LEVEL; 4977 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 4978 hdlr.cyh_arg = NULL; 4979 4980 when.cyt_when = 0; 4981 when.cyt_interval = apic_redistribute_sample_interval; 4982 apic_cyclic_id = cyclic_add(&hdlr, &when); 4983 4984 4985 } 4986 4987 static void 4988 apic_redistribute_compute(void) 4989 { 4990 int i, j, max_busy; 4991 4992 if (apic_enable_dynamic_migration) { 4993 if (++apic_nticks == apic_sample_factor_redistribution) { 4994 /* 4995 * Time to call apic_intr_redistribute(). 4996 * reset apic_nticks. This will cause max_busy 4997 * to be calculated below and if it is more than 4998 * apic_int_busy, we will do the whole thing 4999 */ 5000 apic_nticks = 0; 5001 } 5002 max_busy = 0; 5003 for (i = 0; i < apic_nproc; i++) { 5004 5005 /* 5006 * Check if curipl is non zero & if ISR is in 5007 * progress 5008 */ 5009 if (((j = apic_cpus[i].aci_curipl) != 0) && 5010 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 5011 5012 int irq; 5013 apic_cpus[i].aci_busy++; 5014 irq = apic_cpus[i].aci_current[j]; 5015 apic_irq_table[irq]->airq_busy++; 5016 } 5017 5018 if (!apic_nticks && 5019 (apic_cpus[i].aci_busy > max_busy)) 5020 max_busy = apic_cpus[i].aci_busy; 5021 } 5022 if (!apic_nticks) { 5023 if (max_busy > apic_int_busy_mark) { 5024 /* 5025 * We could make the following check be 5026 * skipped > 1 in which case, we get a 5027 * redistribution at half the busy mark (due to 5028 * double interval). Need to be able to collect 5029 * more empirical data to decide if that is a 5030 * good strategy. Punt for now. 5031 */ 5032 if (apic_skipped_redistribute) 5033 apic_cleanup_busy(); 5034 else 5035 apic_intr_redistribute(); 5036 } else 5037 apic_skipped_redistribute++; 5038 } 5039 } 5040 } 5041 5042 5043 static int 5044 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5045 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5046 { 5047 5048 int status; 5049 acpi_psm_lnk_t acpipsmlnk; 5050 5051 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5052 intr_flagp)) == ACPI_PSM_SUCCESS) { 5053 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5054 "from cache for device %s, instance #%d\n", *pci_irqp, 5055 ddi_get_name(dip), ddi_get_instance(dip))); 5056 return (status); 5057 } 5058 5059 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5060 5061 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5062 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5063 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5064 " acpi_translate_pci_irq failed for device %s, instance" 5065 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5066 return (status); 5067 } 5068 5069 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5070 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5071 intr_flagp); 5072 if (status != ACPI_PSM_SUCCESS) { 5073 status = acpi_get_current_irq_resource(&acpipsmlnk, 5074 pci_irqp, intr_flagp); 5075 } 5076 } 5077 5078 if (status == ACPI_PSM_SUCCESS) { 5079 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5080 intr_flagp, &acpipsmlnk); 5081 5082 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5083 "new irq %d for device %s, instance #%d\n", 5084 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5085 } 5086 5087 return (status); 5088 } 5089 5090 /* 5091 * Configures the irq for the interrupt link device identified by 5092 * acpipsmlnkp. 5093 * 5094 * Gets the current and the list of possible irq settings for the 5095 * device. If apic_unconditional_srs is not set, and the current 5096 * resource setting is in the list of possible irq settings, 5097 * current irq resource setting is passed to the caller. 5098 * 5099 * Otherwise, picks an irq number from the list of possible irq 5100 * settings, and sets the irq of the device to this value. 5101 * If prefer_crs is set, among a set of irq numbers in the list that have 5102 * the least number of devices sharing the interrupt, we pick current irq 5103 * resource setting if it is a member of this set. 5104 * 5105 * Passes the irq number in the value pointed to by pci_irqp, and 5106 * polarity and sensitivity in the structure pointed to by dipintrflagp 5107 * to the caller. 5108 * 5109 * Note that if setting the irq resource failed, but successfuly obtained 5110 * the current irq resource settings, passes the current irq resources 5111 * and considers it a success. 5112 * 5113 * Returns: 5114 * ACPI_PSM_SUCCESS on success. 5115 * 5116 * ACPI_PSM_FAILURE if an error occured during the configuration or 5117 * if a suitable irq was not found for this device, or if setting the 5118 * irq resource and obtaining the current resource fails. 5119 * 5120 */ 5121 static int 5122 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5123 int *pci_irqp, iflag_t *dipintr_flagp) 5124 { 5125 5126 int i, min_share, foundnow, done = 0; 5127 int32_t irq; 5128 int32_t share_irq = -1; 5129 int32_t chosen_irq = -1; 5130 int cur_irq = -1; 5131 acpi_irqlist_t *irqlistp; 5132 acpi_irqlist_t *irqlistent; 5133 5134 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5135 == ACPI_PSM_FAILURE) { 5136 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5137 "or assign IRQ for device %s, instance #%d: The system was " 5138 "unable to get the list of potential IRQs from ACPI.", 5139 ddi_get_name(dip), ddi_get_instance(dip))); 5140 5141 return (ACPI_PSM_FAILURE); 5142 } 5143 5144 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5145 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5146 (cur_irq > 0)) { 5147 /* 5148 * If an IRQ is set in CRS and that IRQ exists in the set 5149 * returned from _PRS, return that IRQ, otherwise print 5150 * a warning 5151 */ 5152 5153 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5154 == ACPI_PSM_SUCCESS) { 5155 5156 acpi_free_irqlist(irqlistp); 5157 ASSERT(pci_irqp != NULL); 5158 *pci_irqp = cur_irq; 5159 return (ACPI_PSM_SUCCESS); 5160 } 5161 5162 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5163 "current irq %d for device %s, instance #%d in ACPI's " 5164 "list of possible irqs for this device. Picking one from " 5165 " the latter list.", cur_irq, ddi_get_name(dip), 5166 ddi_get_instance(dip))); 5167 } 5168 5169 irqlistent = irqlistp; 5170 min_share = 255; 5171 5172 while (irqlistent != NULL) { 5173 irqlistent->intr_flags.bustype = BUS_PCI; 5174 5175 for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) { 5176 5177 irq = irqlistent->irqs[i]; 5178 5179 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5180 continue; 5181 5182 if (irq == 0) { 5183 /* invalid irq number */ 5184 continue; 5185 } 5186 5187 if ((apic_irq_table[irq] == NULL) || 5188 (apic_irq_table[irq]->airq_dip == dip)) { 5189 chosen_irq = irq; 5190 foundnow = 1; 5191 /* 5192 * If we do not prefer current irq from crs 5193 * or if we do and this irq is the same as 5194 * current irq from crs, this is the one 5195 * to pick. 5196 */ 5197 if (!(apic_prefer_crs) || (irq == cur_irq)) { 5198 done = 1; 5199 break; 5200 } 5201 continue; 5202 } 5203 5204 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5205 continue; 5206 5207 if (!acpi_intr_compatible(irqlistent->intr_flags, 5208 apic_irq_table[irq]->airq_iflag)) 5209 continue; 5210 5211 if ((apic_irq_table[irq]->airq_share < min_share) || 5212 ((apic_irq_table[irq]->airq_share == min_share) && 5213 (cur_irq == irq) && (apic_prefer_crs))) { 5214 min_share = apic_irq_table[irq]->airq_share; 5215 share_irq = irq; 5216 foundnow = 1; 5217 } 5218 } 5219 5220 /* 5221 * If we found an IRQ in the inner loop this time, save the 5222 * details from the irqlist for later use. 5223 */ 5224 if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) { 5225 /* 5226 * Copy the acpi_prs_private_t and flags from this 5227 * irq list entry, since we found an irq from this 5228 * entry. 5229 */ 5230 acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv; 5231 *dipintr_flagp = irqlistent->intr_flags; 5232 } 5233 5234 if (done) 5235 break; 5236 5237 /* Go to the next irqlist entry */ 5238 irqlistent = irqlistent->next; 5239 } 5240 5241 5242 acpi_free_irqlist(irqlistp); 5243 if (chosen_irq != -1) 5244 irq = chosen_irq; 5245 else if (share_irq != -1) 5246 irq = share_irq; 5247 else { 5248 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5249 "suitable irq from the list of possible irqs for device " 5250 "%s, instance #%d in ACPI's list of possible irqs", 5251 ddi_get_name(dip), ddi_get_instance(dip))); 5252 return (ACPI_PSM_FAILURE); 5253 } 5254 5255 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s " 5256 "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip))); 5257 5258 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) { 5259 /* 5260 * setting irq was successful, check to make sure CRS 5261 * reflects that. If CRS does not agree with what we 5262 * set, return the irq that was set. 5263 */ 5264 5265 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5266 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5267 5268 if (cur_irq != irq) 5269 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: " 5270 "IRQ resource set (irqno %d) for device %s " 5271 "instance #%d, differs from current " 5272 "setting irqno %d", 5273 irq, ddi_get_name(dip), 5274 ddi_get_instance(dip), cur_irq)); 5275 } 5276 5277 /* 5278 * return the irq that was set, and not what CRS reports, 5279 * since CRS has been seen to be bogus on some systems 5280 */ 5281 cur_irq = irq; 5282 } else { 5283 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d " 5284 "failed for device %s instance #%d", 5285 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5286 5287 if (cur_irq == -1) 5288 return (ACPI_PSM_FAILURE); 5289 } 5290 5291 ASSERT(pci_irqp != NULL); 5292 *pci_irqp = cur_irq; 5293 return (ACPI_PSM_SUCCESS); 5294 } 5295