1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 30 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 31 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 32 * PSMI 1.5 extensions are supported in Solaris Nevada. 33 */ 34 #define PSMI_1_5 35 36 #include <sys/processor.h> 37 #include <sys/time.h> 38 #include <sys/psm.h> 39 #include <sys/smp_impldefs.h> 40 #include <sys/cram.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/psm_common.h> 44 #include "apic.h" 45 #include <sys/pit.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/ddi_impldefs.h> 49 #include <sys/pci.h> 50 #include <sys/promif.h> 51 #include <sys/x86_archext.h> 52 #include <sys/cpc_impl.h> 53 #include <sys/uadmin.h> 54 #include <sys/panic.h> 55 #include <sys/debug.h> 56 #include <sys/archsystm.h> 57 #include <sys/trap.h> 58 #include <sys/machsystm.h> 59 #include <sys/cpuvar.h> 60 #include <sys/rm_platter.h> 61 #include <sys/privregs.h> 62 #include <sys/cyclic.h> 63 #include <sys/note.h> 64 #include <sys/pci_intr_lib.h> 65 #include <sys/sunndi.h> 66 67 struct ioapic_reprogram_data; 68 69 /* 70 * Local Function Prototypes 71 */ 72 static void apic_init_intr(); 73 static void apic_ret(); 74 static int apic_handle_defconf(); 75 static int apic_parse_mpct(caddr_t mpct, int bypass); 76 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 77 static int apic_checksum(caddr_t bptr, int len); 78 static int get_apic_cmd1(); 79 static int get_apic_pri(); 80 static int apic_find_bus_type(char *bus); 81 static int apic_find_bus(int busid); 82 static int apic_find_bus_id(int bustype); 83 static struct apic_io_intr *apic_find_io_intr(int irqno); 84 int apic_allocate_irq(int irq); 85 static int apic_find_free_irq(int start, int end); 86 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 87 static void apic_modify_vector(uchar_t vector, int irq); 88 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 89 static uchar_t apic_xlate_vector(uchar_t oldvector); 90 static void apic_xlate_vector_free_timeout_handler(void *arg); 91 static void apic_free_vector(uchar_t vector); 92 static void apic_reprogram_timeout_handler(void *arg); 93 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 94 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq, 95 struct ioapic_reprogram_data *drep); 96 static int apic_setup_io_intr(void *p, int irq, boolean_t deferred); 97 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 98 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 99 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 100 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 101 int child_ipin, struct apic_io_intr **intrp); 102 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 103 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 104 int type); 105 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 106 iflag_t *intr_flagp); 107 static void apic_nmi_intr(caddr_t arg); 108 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 109 uchar_t intin); 110 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, 111 struct ioapic_reprogram_data *drep); 112 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu); 113 static void apic_intr_redistribute(); 114 static void apic_cleanup_busy(); 115 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 116 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 117 static void apic_try_deferred_reprogram(int ipl, int vect); 118 static void delete_defer_repro_ent(int which_irq); 119 static void apic_ioapic_wait_pending_clear(volatile int32_t *ioapic, 120 int intin_no); 121 122 /* ACPI support routines */ 123 static int acpi_probe(void); 124 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 125 int *pci_irqp, iflag_t *intr_flagp); 126 127 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 128 int ipin, int *pci_irqp, iflag_t *intr_flagp); 129 static uchar_t acpi_find_ioapic(int irq); 130 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 131 132 /* 133 * standard MP entries 134 */ 135 static int apic_probe(); 136 static int apic_clkinit(); 137 static int apic_getclkirq(int ipl); 138 static uint_t apic_calibrate(volatile uint32_t *addr, 139 uint16_t *pit_ticks_adj); 140 static hrtime_t apic_gettime(); 141 static hrtime_t apic_gethrtime(); 142 static void apic_init(); 143 static void apic_picinit(void); 144 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 145 static int apic_post_cpu_start(void); 146 static void apic_send_ipi(int cpun, int ipl); 147 static void apic_set_softintr(int softintr); 148 static void apic_set_idlecpu(processorid_t cpun); 149 static void apic_unset_idlecpu(processorid_t cpun); 150 static int apic_softlvl_to_irq(int ipl); 151 static int apic_intr_enter(int ipl, int *vect); 152 static void apic_intr_exit(int ipl, int vect); 153 static void apic_setspl(int ipl); 154 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 155 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 156 static void apic_shutdown(int cmd, int fcn); 157 static void apic_preshutdown(int cmd, int fcn); 158 static int apic_disable_intr(processorid_t cpun); 159 static void apic_enable_intr(processorid_t cpun); 160 static processorid_t apic_get_next_processorid(processorid_t cpun); 161 static int apic_get_ipivect(int ipl, int type); 162 static void apic_timer_reprogram(hrtime_t time); 163 static void apic_timer_enable(void); 164 static void apic_timer_disable(void); 165 static void apic_post_cyclic_setup(void *arg); 166 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 167 psm_intr_op_t, int *); 168 169 static int apic_oneshot = 0; 170 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 171 172 /* 173 * These variables are frequently accessed in apic_intr_enter(), 174 * apic_intr_exit and apic_setspl, so group them together 175 */ 176 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 177 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 178 int apic_clkvect; 179 180 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 181 int apic_sci_vect = -1; 182 iflag_t apic_sci_flags; 183 184 /* vector at which error interrupts come in */ 185 int apic_errvect; 186 int apic_enable_error_intr = 1; 187 int apic_error_display_delay = 100; 188 189 /* vector at which performance counter overflow interrupts come in */ 190 int apic_cpcovf_vect; 191 int apic_enable_cpcovf_intr = 1; 192 193 /* Max wait time (in repetitions) for flags to clear in an RDT entry. */ 194 static int apic_max_reps_clear_pending = 1000; 195 196 /* Maximum number of times to retry reprogramming at apic_intr_exit time */ 197 #define APIC_REPROGRAM_MAX_TRIES 10000 198 199 /* 200 * number of bits per byte, from <sys/param.h> 201 */ 202 #define UCHAR_MAX ((1 << NBBY) - 1) 203 204 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ + 1]; 205 206 /* 207 * The following vector assignments influence the value of ipltopri and 208 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 209 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 210 * we care to do so in future. Note some IPLs which are rarely used 211 * will share the vector ranges and heavily used IPLs (5 and 6) have 212 * a wide range. 213 * IPL Vector range. as passed to intr_enter 214 * 0 none. 215 * 1,2,3 0x20-0x2f 0x0-0xf 216 * 4 0x30-0x3f 0x10-0x1f 217 * 5 0x40-0x5f 0x20-0x3f 218 * 6 0x60-0x7f 0x40-0x5f 219 * 7,8,9 0x80-0x8f 0x60-0x6f 220 * 10 0x90-0x9f 0x70-0x7f 221 * 11 0xa0-0xaf 0x80-0x8f 222 * ... ... 223 * 16 0xf0-0xff 0xd0-0xdf 224 */ 225 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 226 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 227 }; 228 /* 229 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 230 * NOTE that this is vector as passed into intr_enter which is 231 * programmed vector - 0x20 (APIC_BASE_VECT) 232 */ 233 234 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 235 /* The taskpri to be programmed into apic to mask given ipl */ 236 237 #if defined(__amd64) 238 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 239 #endif 240 241 /* 242 * Patchable global variables. 243 */ 244 int apic_forceload = 0; 245 246 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 247 #define INTR_ROUND_ROBIN 1 248 #define INTR_LOWEST_PRIORITY 2 249 250 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 251 252 static int apic_next_bind_cpu = 1; /* For round robin assignment */ 253 /* start with cpu 1 */ 254 255 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 256 /* 1 - use gettime() for performance */ 257 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 258 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 259 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 260 int apic_panic_on_nmi = 0; 261 int apic_panic_on_apic_error = 0; 262 263 int apic_verbose = 0; 264 265 /* Flag definitions for apic_verbose */ 266 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 267 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 268 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 269 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 270 271 272 #define APIC_VERBOSE_IOAPIC(fmt) \ 273 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 274 cmn_err fmt; 275 276 #define APIC_VERBOSE_IRQ(fmt) \ 277 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 278 cmn_err fmt; 279 280 #define APIC_VERBOSE_POWEROFF(fmt) \ 281 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 282 prom_printf fmt; 283 284 285 /* Now the ones for Dynamic Interrupt distribution */ 286 int apic_enable_dynamic_migration = 0; 287 288 /* 289 * If enabled, the distribution works as follows: 290 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 291 * and the irq corresponding to the ipl is also set in the aci_current array. 292 * interrupt exit and setspl (due to soft interrupts) will cause the current 293 * ipl to be be changed. This is cache friendly as these frequently used 294 * paths write into a per cpu structure. 295 * 296 * Sampling is done by checking the structures for all CPUs and incrementing 297 * the busy field of the irq (if any) executing on each CPU and the busy field 298 * of the corresponding CPU. 299 * In periodic mode this is done on every clock interrupt. 300 * In one-shot mode, this is done thru a cyclic with an interval of 301 * apic_redistribute_sample_interval (default 10 milli sec). 302 * 303 * Every apic_sample_factor_redistribution times we sample, we do computations 304 * to decide which interrupt needs to be migrated (see comments 305 * before apic_intr_redistribute(). 306 */ 307 308 /* 309 * Following 3 variables start as % and can be patched or set using an 310 * API to be defined in future. They will be scaled to 311 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 312 * mode), or 101 in one-shot mode to stagger it away from one sec processing 313 */ 314 315 int apic_int_busy_mark = 60; 316 int apic_int_free_mark = 20; 317 int apic_diff_for_redistribution = 10; 318 319 /* sampling interval for interrupt redistribution for dynamic migration */ 320 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 321 322 /* 323 * number of times we sample before deciding to redistribute interrupts 324 * for dynamic migration 325 */ 326 int apic_sample_factor_redistribution = 101; 327 328 /* timeout for xlate_vector, mark_vector */ 329 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 330 331 int apic_redist_cpu_skip = 0; 332 int apic_num_imbalance = 0; 333 int apic_num_rebind = 0; 334 335 int apic_nproc = 0; 336 int apic_defconf = 0; 337 int apic_irq_translate = 0; 338 int apic_spec_rev = 0; 339 int apic_imcrp = 0; 340 341 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 342 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 343 344 /* 345 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 346 * will be assigned (via _SRS). If it is not set, use the current 347 * irq setting (via _CRS), but only if that irq is in the set of possible 348 * irqs (returned by _PRS) for the device. 349 */ 350 int apic_unconditional_srs = 1; 351 352 /* 353 * For interrupt link devices, if apic_prefer_crs is set when we are 354 * assigning an IRQ resource to a device, prefer the current IRQ setting 355 * over other possible irq settings under same conditions. 356 */ 357 358 int apic_prefer_crs = 1; 359 360 361 /* minimum number of timer ticks to program to */ 362 int apic_min_timer_ticks = 1; 363 /* 364 * Local static data 365 */ 366 static struct psm_ops apic_ops = { 367 apic_probe, 368 369 apic_init, 370 apic_picinit, 371 apic_intr_enter, 372 apic_intr_exit, 373 apic_setspl, 374 apic_addspl, 375 apic_delspl, 376 apic_disable_intr, 377 apic_enable_intr, 378 apic_softlvl_to_irq, 379 apic_set_softintr, 380 381 apic_set_idlecpu, 382 apic_unset_idlecpu, 383 384 apic_clkinit, 385 apic_getclkirq, 386 (void (*)(void))NULL, /* psm_hrtimeinit */ 387 apic_gethrtime, 388 389 apic_get_next_processorid, 390 apic_cpu_start, 391 apic_post_cpu_start, 392 apic_shutdown, 393 apic_get_ipivect, 394 apic_send_ipi, 395 396 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 397 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 398 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 399 (void (*)(int, char *))NULL, /* psm_notify_error */ 400 (void (*)(int))NULL, /* psm_notify_func */ 401 apic_timer_reprogram, 402 apic_timer_enable, 403 apic_timer_disable, 404 apic_post_cyclic_setup, 405 apic_preshutdown, 406 apic_intr_ops /* Advanced DDI Interrupt framework */ 407 }; 408 409 410 static struct psm_info apic_psm_info = { 411 PSM_INFO_VER01_5, /* version */ 412 PSM_OWN_EXCLUSIVE, /* ownership */ 413 (struct psm_ops *)&apic_ops, /* operation */ 414 "pcplusmp", /* machine name */ 415 "pcplusmp v1.4 compatible %I%", 416 }; 417 418 static void *apic_hdlp; 419 420 #ifdef DEBUG 421 #define DENT 0x0001 422 int apic_debug = 0; 423 /* 424 * set apic_restrict_vector to the # of vectors we want to allow per range 425 * useful in testing shared interrupt logic by setting it to 2 or 3 426 */ 427 int apic_restrict_vector = 0; 428 429 #define APIC_DEBUG_MSGBUFSIZE 2048 430 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 431 int apic_debug_msgbufindex = 0; 432 433 /* 434 * Put "int" info into debug buffer. No MP consistency, but light weight. 435 * Good enough for most debugging. 436 */ 437 #define APIC_DEBUG_BUF_PUT(x) \ 438 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 439 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 440 apic_debug_msgbufindex = 0; 441 442 #endif /* DEBUG */ 443 444 apic_cpus_info_t *apic_cpus; 445 446 static cpuset_t apic_cpumask; 447 static uint_t apic_flag; 448 449 /* Flag to indicate that we need to shut down all processors */ 450 static uint_t apic_shutdown_processors; 451 452 uint_t apic_nsec_per_intr = 0; 453 454 /* 455 * apic_let_idle_redistribute can have the following values: 456 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 457 * apic_redistribute_lock prevents multiple idle cpus from redistributing 458 */ 459 int apic_num_idle_redistributions = 0; 460 static int apic_let_idle_redistribute = 0; 461 static uint_t apic_nticks = 0; 462 static uint_t apic_skipped_redistribute = 0; 463 464 /* to gather intr data and redistribute */ 465 static void apic_redistribute_compute(void); 466 467 static uint_t last_count_read = 0; 468 static lock_t apic_gethrtime_lock; 469 volatile int apic_hrtime_stamp = 0; 470 volatile hrtime_t apic_nsec_since_boot = 0; 471 static uint_t apic_hertz_count; 472 473 uint64_t apic_ticks_per_SFnsecs; /* # of ticks in SF nsecs */ 474 475 static hrtime_t apic_nsec_max; 476 477 static hrtime_t apic_last_hrtime = 0; 478 int apic_hrtime_error = 0; 479 int apic_remote_hrterr = 0; 480 int apic_num_nmis = 0; 481 int apic_apic_error = 0; 482 int apic_num_apic_errors = 0; 483 int apic_num_cksum_errors = 0; 484 485 static uchar_t apic_io_id[MAX_IO_APIC]; 486 static uchar_t apic_io_ver[MAX_IO_APIC]; 487 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 488 static uchar_t apic_io_vectend[MAX_IO_APIC]; 489 volatile int32_t *apicioadr[MAX_IO_APIC]; 490 491 /* 492 * First available slot to be used as IRQ index into the apic_irq_table 493 * for those interrupts (like MSI/X) that don't have a physical IRQ. 494 */ 495 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 496 497 /* 498 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 499 * and bound elements of cpus_info and the temp_cpu element of irq_struct 500 */ 501 lock_t apic_ioapic_lock; 502 503 /* 504 * apic_defer_reprogram_lock ensures that only one processor is handling 505 * deferred interrupt programming at apic_intr_exit time. 506 */ 507 static lock_t apic_defer_reprogram_lock; 508 509 /* 510 * The current number of deferred reprogrammings outstanding 511 */ 512 uint_t apic_reprogram_outstanding = 0; 513 514 #ifdef DEBUG 515 /* 516 * Counters that keep track of deferred reprogramming stats 517 */ 518 uint_t apic_intr_deferrals = 0; 519 uint_t apic_intr_deliver_timeouts = 0; 520 uint_t apic_last_ditch_reprogram_failures = 0; 521 uint_t apic_deferred_setup_failures = 0; 522 uint_t apic_defer_repro_total_retries = 0; 523 uint_t apic_defer_repro_successes = 0; 524 uint_t apic_deferred_spurious_enters = 0; 525 #endif 526 527 static int apic_io_max = 0; /* no. of i/o apics enabled */ 528 529 static struct apic_io_intr *apic_io_intrp = 0; 530 static struct apic_bus *apic_busp; 531 532 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 533 static uchar_t apic_resv_vector[MAXIPL+1]; 534 535 static char apic_level_intr[APIC_MAX_VECTOR+1]; 536 static int apic_error = 0; 537 /* values which apic_error can take. Not catastrophic, but may help debug */ 538 #define APIC_ERR_BOOT_EOI 0x1 539 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 540 #define APIC_ERR_INVALID_INDEX 0x4 541 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 542 #define APIC_ERR_APIC_ERROR 0x40000000 543 #define APIC_ERR_NMI 0x80000000 544 545 static int apic_cmos_ssb_set = 0; 546 547 static uint32_t eisa_level_intr_mask = 0; 548 /* At least MSB will be set if EISA bus */ 549 550 static int apic_pci_bus_total = 0; 551 static uchar_t apic_single_pci_busid = 0; 552 553 554 /* 555 * airq_mutex protects additions to the apic_irq_table - the first 556 * pointer and any airq_nexts off of that one. It also protects 557 * apic_max_device_irq & apic_min_device_irq. It also guarantees 558 * that share_id is unique as new ids are generated only when new 559 * irq_t structs are linked in. Once linked in the structs are never 560 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 561 * or allocated. Note that there is a slight gap between allocating in 562 * apic_introp_xlate and programming in addspl. 563 */ 564 kmutex_t airq_mutex; 565 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 566 int apic_max_device_irq = 0; 567 int apic_min_device_irq = APIC_MAX_VECTOR; 568 569 /* use to make sure only one cpu handles the nmi */ 570 static lock_t apic_nmi_lock; 571 /* use to make sure only one cpu handles the error interrupt */ 572 static lock_t apic_error_lock; 573 574 /* 575 * Following declarations are for revectoring; used when ISRs at different 576 * IPLs share an irq. 577 */ 578 static lock_t apic_revector_lock; 579 static int apic_revector_pending = 0; 580 static uchar_t *apic_oldvec_to_newvec; 581 static uchar_t *apic_newvec_to_oldvec; 582 583 static struct ioapic_reprogram_data { 584 boolean_t done; 585 apic_irq_t *irqp; 586 /* The CPU to which the int will be bound */ 587 int bindcpu; 588 /* # times the reprogram timeout was called */ 589 unsigned tries; 590 591 /* The irq # is implicit in the array index: */ 592 } apic_reprogram_info[APIC_MAX_VECTOR + 1]; 593 594 /* 595 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 596 * is indexed by IRQ number, NOT by vector number. 597 */ 598 599 typedef struct prs_irq_list_ent { 600 int list_prio; 601 int32_t irq; 602 iflag_t intrflags; 603 acpi_prs_private_t prsprv; 604 struct prs_irq_list_ent *next; 605 } prs_irq_list_t; 606 607 /* 608 * The following added to identify a software poweroff method if available. 609 */ 610 611 static struct { 612 int poweroff_method; 613 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 614 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 615 } apic_mps_ids[] = { 616 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 617 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 618 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 619 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 620 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 621 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 622 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 623 }; 624 625 int apic_poweroff_method = APIC_POWEROFF_NONE; 626 627 static struct { 628 uchar_t cntl; 629 uchar_t data; 630 } aspen_bmc[] = { 631 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 632 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 633 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 634 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 635 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 636 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 637 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 638 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 639 640 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 641 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 642 }; 643 644 static struct { 645 int port; 646 uchar_t data; 647 } sitka_bmc[] = { 648 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 649 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 650 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 651 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 652 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 653 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 654 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 655 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 656 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 657 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 658 659 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 660 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 661 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 662 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 663 }; 664 665 666 /* Patchable global variables. */ 667 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 668 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 669 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ 670 671 /* 672 * ACPI definitions 673 */ 674 /* _PIC method arguments */ 675 #define ACPI_PIC_MODE 0 676 #define ACPI_APIC_MODE 1 677 678 /* APIC error flags we care about */ 679 #define APIC_SEND_CS_ERROR 0x01 680 #define APIC_RECV_CS_ERROR 0x02 681 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 682 683 /* 684 * ACPI variables 685 */ 686 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 687 static int apic_enable_acpi = 0; 688 689 /* ACPI Multiple APIC Description Table ptr */ 690 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 691 692 /* ACPI Interrupt Source Override Structure ptr */ 693 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 694 static int acpi_iso_cnt = 0; 695 696 /* ACPI Non-maskable Interrupt Sources ptr */ 697 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 698 static int acpi_nmi_scnt = 0; 699 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 700 static int acpi_nmi_ccnt = 0; 701 702 /* 703 * extern declarations 704 */ 705 extern int intr_clear(void); 706 extern void intr_restore(uint_t); 707 #if defined(__amd64) 708 extern int intpri_use_cr8; 709 #endif /* __amd64 */ 710 711 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 712 int, int, int); 713 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 714 extern int apic_pci_msi_unconfigure(dev_info_t *, int, int); 715 extern int apic_pci_msi_disable_mode(dev_info_t *, int, int); 716 extern int apic_pci_msi_enable_mode(dev_info_t *, int, int); 717 718 /* 719 * This is the loadable module wrapper 720 */ 721 722 int 723 _init(void) 724 { 725 if (apic_coarse_hrtime) 726 apic_ops.psm_gethrtime = &apic_gettime; 727 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 728 } 729 730 int 731 _fini(void) 732 { 733 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 734 } 735 736 int 737 _info(struct modinfo *modinfop) 738 { 739 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 740 } 741 742 /* 743 * Auto-configuration routines 744 */ 745 746 /* 747 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 748 * May work with 1.1 - but not guaranteed. 749 * According to the MP Spec, the MP floating pointer structure 750 * will be searched in the order described below: 751 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 752 * 2. Within the last kilobyte of system base memory 753 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 754 * Once we find the right signature with proper checksum, we call 755 * either handle_defconf or parse_mpct to get all info necessary for 756 * subsequent operations. 757 */ 758 static int 759 apic_probe() 760 { 761 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 762 caddr_t biosdatap; 763 caddr_t mpct; 764 caddr_t fptr; 765 int i, mpct_size, mapsize, retval = PSM_FAILURE; 766 ushort_t ebda_seg, base_mem_size; 767 struct apic_mpfps_hdr *fpsp; 768 struct apic_mp_cnf_hdr *hdrp; 769 int bypass_cpu_and_ioapics_in_mptables; 770 int acpi_user_options; 771 772 if (apic_forceload < 0) 773 return (retval); 774 775 /* Allow override for MADT-only mode */ 776 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 777 "acpi-user-options", 0); 778 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 779 780 /* Allow apic_use_acpi to override MADT-only mode */ 781 if (!apic_use_acpi) 782 apic_use_acpi_madt_only = 0; 783 784 retval = acpi_probe(); 785 786 /* 787 * mapin the bios data area 40:0 788 * 40:13h - two-byte location reports the base memory size 789 * 40:0Eh - two-byte location for the exact starting address of 790 * the EBDA segment for EISA 791 */ 792 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 793 if (!biosdatap) 794 return (retval); 795 fpsp = (struct apic_mpfps_hdr *)NULL; 796 mapsize = MPFPS_RAM_WIN_LEN; 797 /*LINTED: pointer cast may result in improper alignment */ 798 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 799 /* check the 1k of EBDA */ 800 if (ebda_seg) { 801 ebda_start = ((uint32_t)ebda_seg) << 4; 802 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 803 if (fptr) { 804 if (!(fpsp = 805 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 806 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 807 } 808 } 809 /* If not in EBDA, check the last k of system base memory */ 810 if (!fpsp) { 811 /*LINTED: pointer cast may result in improper alignment */ 812 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 813 814 if (base_mem_size > 512) 815 base_mem_end = 639 * 1024; 816 else 817 base_mem_end = 511 * 1024; 818 /* if ebda == last k of base mem, skip to check BIOS ROM */ 819 if (base_mem_end != ebda_start) { 820 821 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 822 PROT_READ); 823 824 if (fptr) { 825 if (!(fpsp = apic_find_fps_sig(fptr, 826 MPFPS_RAM_WIN_LEN))) 827 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 828 } 829 } 830 } 831 psm_unmap_phys(biosdatap, 0x20); 832 833 /* If still cannot find it, check the BIOS ROM space */ 834 if (!fpsp) { 835 mapsize = MPFPS_ROM_WIN_LEN; 836 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 837 MPFPS_ROM_WIN_LEN, PROT_READ); 838 if (fptr) { 839 if (!(fpsp = 840 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 841 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 842 return (retval); 843 } 844 } 845 } 846 847 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 848 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 849 return (retval); 850 } 851 852 apic_spec_rev = fpsp->mpfps_spec_rev; 853 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 854 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 855 return (retval); 856 } 857 858 /* check IMCR is present or not */ 859 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 860 861 /* check default configuration (dual CPUs) */ 862 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 863 psm_unmap_phys(fptr, mapsize); 864 return (apic_handle_defconf()); 865 } 866 867 /* MP Configuration Table */ 868 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 869 870 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 871 872 /* 873 * Map in enough memory for the MP Configuration Table Header. 874 * Use this table to read the total length of the BIOS data and 875 * map in all the info 876 */ 877 /*LINTED: pointer cast may result in improper alignment */ 878 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 879 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 880 if (!hdrp) 881 return (retval); 882 883 /* check mp configuration table signature PCMP */ 884 if (hdrp->mpcnf_sig != 0x504d4350) { 885 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 886 return (retval); 887 } 888 mpct_size = (int)hdrp->mpcnf_tbl_length; 889 890 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 891 892 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 893 894 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 895 /* This is an ACPI machine No need for further checks */ 896 return (retval); 897 } 898 899 /* 900 * Map in the entries for this machine, ie. Processor 901 * Entry Tables, Bus Entry Tables, etc. 902 * They are in fixed order following one another 903 */ 904 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 905 if (!mpct) 906 return (retval); 907 908 if (apic_checksum(mpct, mpct_size) != 0) 909 goto apic_fail1; 910 911 912 /*LINTED: pointer cast may result in improper alignment */ 913 hdrp = (struct apic_mp_cnf_hdr *)mpct; 914 /*LINTED: pointer cast may result in improper alignment */ 915 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 916 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 917 if (!apicadr) 918 goto apic_fail1; 919 920 /* Parse all information in the tables */ 921 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 922 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 923 PSM_SUCCESS) 924 return (PSM_SUCCESS); 925 926 for (i = 0; i < apic_io_max; i++) 927 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 928 if (apic_cpus) 929 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 930 if (apicadr) 931 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 932 apic_fail1: 933 psm_unmap_phys(mpct, mpct_size); 934 return (retval); 935 } 936 937 static void 938 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 939 { 940 int i; 941 942 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 943 i++) { 944 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 945 strlen(apic_mps_ids[i].oem_id)) == 0) && 946 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 947 strlen(apic_mps_ids[i].prod_id)) == 0)) { 948 949 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 950 break; 951 } 952 } 953 954 if (apic_debug_mps_id != 0) { 955 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 956 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 957 hdrp->mpcnf_oem_str[0], 958 hdrp->mpcnf_oem_str[1], 959 hdrp->mpcnf_oem_str[2], 960 hdrp->mpcnf_oem_str[3], 961 hdrp->mpcnf_oem_str[4], 962 hdrp->mpcnf_oem_str[5], 963 hdrp->mpcnf_oem_str[6], 964 hdrp->mpcnf_oem_str[7], 965 hdrp->mpcnf_prod_str[0], 966 hdrp->mpcnf_prod_str[1], 967 hdrp->mpcnf_prod_str[2], 968 hdrp->mpcnf_prod_str[3], 969 hdrp->mpcnf_prod_str[4], 970 hdrp->mpcnf_prod_str[5], 971 hdrp->mpcnf_prod_str[6], 972 hdrp->mpcnf_prod_str[7], 973 hdrp->mpcnf_prod_str[8], 974 hdrp->mpcnf_prod_str[9], 975 hdrp->mpcnf_prod_str[10], 976 hdrp->mpcnf_prod_str[11]); 977 } 978 } 979 980 static int 981 acpi_probe(void) 982 { 983 int i, id, intmax, ver, index, rv; 984 int acpi_verboseflags = 0; 985 int madt_seen, madt_size; 986 APIC_HEADER *ap; 987 MADT_PROCESSOR_APIC *mpa; 988 MADT_IO_APIC *mia; 989 MADT_IO_SAPIC *misa; 990 MADT_INTERRUPT_OVERRIDE *mio; 991 MADT_NMI_SOURCE *mns; 992 MADT_INTERRUPT_SOURCE *mis; 993 MADT_LOCAL_APIC_NMI *mlan; 994 MADT_ADDRESS_OVERRIDE *mao; 995 ACPI_OBJECT_LIST arglist; 996 ACPI_OBJECT arg; 997 int sci; 998 iflag_t sci_flags; 999 volatile int32_t *ioapic; 1000 char local_ids[NCPU]; 1001 char proc_ids[NCPU]; 1002 uchar_t hid; 1003 1004 if (!apic_use_acpi) 1005 return (PSM_FAILURE); 1006 1007 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 1008 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 1009 return (PSM_FAILURE); 1010 1011 apicadr = (uint32_t *)psm_map_phys( 1012 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 1013 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1014 if (!apicadr) 1015 return (PSM_FAILURE); 1016 1017 id = apicadr[APIC_LID_REG]; 1018 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 1019 apic_nproc = index = 1; 1020 CPUSET_ONLY(apic_cpumask, 0); 1021 apic_io_max = 0; 1022 1023 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 1024 madt_size = acpi_mapic_dtp->Length; 1025 madt_seen = sizeof (*acpi_mapic_dtp); 1026 1027 while (madt_seen < madt_size) { 1028 switch (ap->Type) { 1029 case APIC_PROCESSOR: 1030 mpa = (MADT_PROCESSOR_APIC *) ap; 1031 if (mpa->ProcessorEnabled) { 1032 if (mpa->LocalApicId == local_ids[0]) 1033 proc_ids[0] = mpa->ProcessorId; 1034 else if (apic_nproc < NCPU) { 1035 local_ids[index] = mpa->LocalApicId; 1036 proc_ids[index] = mpa->ProcessorId; 1037 CPUSET_ADD(apic_cpumask, index); 1038 index++; 1039 apic_nproc++; 1040 } else 1041 cmn_err(CE_WARN, "pcplusmp: exceeded " 1042 "maximum no. of CPUs (= %d)", NCPU); 1043 } 1044 break; 1045 1046 case APIC_IO: 1047 mia = (MADT_IO_APIC *) ap; 1048 if (apic_io_max < MAX_IO_APIC) { 1049 apic_io_id[apic_io_max] = mia->IoApicId; 1050 apic_io_vectbase[apic_io_max] = 1051 mia->Interrupt; 1052 ioapic = apicioadr[apic_io_max] = 1053 (int32_t *)psm_map_phys( 1054 (uint32_t)mia->Address, 1055 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1056 if (!ioapic) 1057 goto cleanup; 1058 apic_io_max++; 1059 } 1060 break; 1061 1062 case APIC_XRUPT_OVERRIDE: 1063 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1064 if (acpi_isop == NULL) 1065 acpi_isop = mio; 1066 acpi_iso_cnt++; 1067 break; 1068 1069 case APIC_NMI: 1070 /* UNIMPLEMENTED */ 1071 mns = (MADT_NMI_SOURCE *) ap; 1072 if (acpi_nmi_sp == NULL) 1073 acpi_nmi_sp = mns; 1074 acpi_nmi_scnt++; 1075 1076 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1077 mns->Interrupt, mns->Polarity, 1078 mns->TriggerMode); 1079 break; 1080 1081 case APIC_LOCAL_NMI: 1082 /* UNIMPLEMENTED */ 1083 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1084 if (acpi_nmi_cp == NULL) 1085 acpi_nmi_cp = mlan; 1086 acpi_nmi_ccnt++; 1087 1088 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1089 mlan->ProcessorId, mlan->Polarity, 1090 mlan->TriggerMode, mlan->Lint); 1091 break; 1092 1093 case APIC_ADDRESS_OVERRIDE: 1094 /* UNIMPLEMENTED */ 1095 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1096 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1097 (long)mao->Address); 1098 break; 1099 1100 case APIC_IO_SAPIC: 1101 /* UNIMPLEMENTED */ 1102 misa = (MADT_IO_SAPIC *) ap; 1103 1104 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1105 misa->IoSapicId, misa->InterruptBase, 1106 (long)misa->Address); 1107 break; 1108 1109 case APIC_XRUPT_SOURCE: 1110 /* UNIMPLEMENTED */ 1111 mis = (MADT_INTERRUPT_SOURCE *) ap; 1112 1113 cmn_err(CE_NOTE, 1114 "!apic: irq source: %d %d %d %d %d %d %d\n", 1115 mis->ProcessorId, mis->ProcessorEid, 1116 mis->Interrupt, mis->Polarity, 1117 mis->TriggerMode, mis->InterruptType, 1118 mis->IoSapicVector); 1119 break; 1120 case APIC_RESERVED: 1121 default: 1122 break; /* ignore unknown items as per ACPI spec */ 1123 } 1124 1125 /* advance to next entry */ 1126 madt_seen += ap->Length; 1127 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1128 } 1129 1130 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1131 KM_NOSLEEP)) == NULL) 1132 goto cleanup; 1133 1134 /* 1135 * ACPI doesn't provide the local apic ver, get it directly from the 1136 * local apic 1137 */ 1138 ver = apicadr[APIC_VERS_REG]; 1139 for (i = 0; i < apic_nproc; i++) { 1140 apic_cpus[i].aci_local_id = local_ids[i]; 1141 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1142 } 1143 for (i = 0; i < apic_io_max; i++) { 1144 ioapic = apicioadr[i]; 1145 1146 /* 1147 * need to check Sitka on the following acpi problem 1148 * On the Sitka, the ioapic's apic_id field isn't reporting 1149 * the actual io apic id. We have reported this problem 1150 * to Intel. Until they fix the problem, we will get the 1151 * actual id directly from the ioapic. 1152 */ 1153 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1154 id = ioapic[APIC_IO_DATA]; 1155 hid = (uchar_t)(((uint_t)id) >> 24); 1156 1157 if (hid != apic_io_id[i]) { 1158 if (apic_io_id[i] == 0) 1159 apic_io_id[i] = hid; 1160 else { /* set ioapic id to whatever reported by ACPI */ 1161 id = ((int32_t)apic_io_id[i]) << 24; 1162 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1163 ioapic[APIC_IO_DATA] = id; 1164 } 1165 } 1166 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1167 ver = ioapic[APIC_IO_DATA]; 1168 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1169 intmax = (ver >> 16) & 0xff; 1170 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1171 if (apic_first_avail_irq <= apic_io_vectend[i]) 1172 apic_first_avail_irq = apic_io_vectend[i] + 1; 1173 } 1174 1175 1176 /* 1177 * Process SCI configuration here 1178 * An error may be returned here if 1179 * acpi-user-options specifies legacy mode 1180 * (no SCI, no ACPI mode) 1181 */ 1182 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1183 sci = -1; 1184 1185 /* 1186 * Now call acpi_init() to generate namespaces 1187 * If this fails, we don't attempt to use ACPI 1188 * even if we were able to get a MADT above 1189 */ 1190 if (acpica_init() != AE_OK) 1191 goto cleanup; 1192 1193 /* 1194 * Squirrel away the SCI and flags for later on 1195 * in apic_picinit() when we're ready 1196 */ 1197 apic_sci_vect = sci; 1198 apic_sci_flags = sci_flags; 1199 1200 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1201 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1202 1203 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1204 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1205 1206 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1207 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1208 1209 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1210 ACPI_PSM_FAILURE) 1211 goto cleanup; 1212 1213 /* Enable ACPI APIC interrupt routing */ 1214 arglist.Count = 1; 1215 arglist.Pointer = &arg; 1216 arg.Type = ACPI_TYPE_INTEGER; 1217 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1218 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1219 if (rv == AE_OK) { 1220 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1221 apic_enable_acpi = 1; 1222 if (apic_use_acpi_madt_only) { 1223 cmn_err(CE_CONT, 1224 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1225 } 1226 return (PSM_SUCCESS); 1227 } 1228 /* if setting APIC mode failed above, we fall through to cleanup */ 1229 1230 cleanup: 1231 if (apicadr != NULL) { 1232 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1233 apicadr = NULL; 1234 } 1235 apic_nproc = 0; 1236 for (i = 0; i < apic_io_max; i++) { 1237 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1238 apicioadr[i] = NULL; 1239 } 1240 apic_io_max = 0; 1241 acpi_isop = NULL; 1242 acpi_iso_cnt = 0; 1243 acpi_nmi_sp = NULL; 1244 acpi_nmi_scnt = 0; 1245 acpi_nmi_cp = NULL; 1246 acpi_nmi_ccnt = 0; 1247 return (PSM_FAILURE); 1248 } 1249 1250 /* 1251 * Handle default configuration. Fill in reqd global variables & tables 1252 * Fill all details as MP table does not give any more info 1253 */ 1254 static int 1255 apic_handle_defconf() 1256 { 1257 uint_t lid; 1258 1259 /*LINTED: pointer cast may result in improper alignment */ 1260 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1261 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1262 /*LINTED: pointer cast may result in improper alignment */ 1263 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1264 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1265 apic_cpus = (apic_cpus_info_t *) 1266 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1267 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1268 goto apic_handle_defconf_fail; 1269 CPUSET_ONLY(apic_cpumask, 0); 1270 CPUSET_ADD(apic_cpumask, 1); 1271 apic_nproc = 2; 1272 lid = apicadr[APIC_LID_REG]; 1273 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1274 /* 1275 * According to the PC+MP spec 1.1, the local ids 1276 * for the default configuration has to be 0 or 1 1277 */ 1278 if (apic_cpus[0].aci_local_id == 1) 1279 apic_cpus[1].aci_local_id = 0; 1280 else if (apic_cpus[0].aci_local_id == 0) 1281 apic_cpus[1].aci_local_id = 1; 1282 else 1283 goto apic_handle_defconf_fail; 1284 1285 apic_io_id[0] = 2; 1286 apic_io_max = 1; 1287 if (apic_defconf >= 5) { 1288 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1289 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1290 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1291 } else { 1292 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1293 apic_cpus[1].aci_local_ver = 0; 1294 apic_io_ver[0] = 0; 1295 } 1296 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1297 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1298 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1299 return (PSM_SUCCESS); 1300 1301 apic_handle_defconf_fail: 1302 if (apic_cpus) 1303 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1304 if (apicadr) 1305 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1306 if (apicioadr[0]) 1307 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1308 return (PSM_FAILURE); 1309 } 1310 1311 /* Parse the entries in MP configuration table and collect info that we need */ 1312 static int 1313 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1314 { 1315 struct apic_procent *procp; 1316 struct apic_bus *busp; 1317 struct apic_io_entry *ioapicp; 1318 struct apic_io_intr *intrp; 1319 volatile int32_t *ioapic; 1320 uint_t lid; 1321 int id; 1322 uchar_t hid; 1323 1324 /*LINTED: pointer cast may result in improper alignment */ 1325 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1326 1327 /* No need to count cpu entries if we won't use them */ 1328 if (!bypass_cpus_and_ioapics) { 1329 1330 /* Find max # of CPUS and allocate structure accordingly */ 1331 apic_nproc = 0; 1332 CPUSET_ZERO(apic_cpumask); 1333 while (procp->proc_entry == APIC_CPU_ENTRY) { 1334 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1335 if (apic_nproc < NCPU) 1336 CPUSET_ADD(apic_cpumask, apic_nproc); 1337 apic_nproc++; 1338 } 1339 procp++; 1340 } 1341 if (apic_nproc > NCPU) 1342 cmn_err(CE_WARN, "pcplusmp: exceeded " 1343 "maximum no. of CPUs (= %d)", NCPU); 1344 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1345 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1346 return (PSM_FAILURE); 1347 } 1348 1349 /*LINTED: pointer cast may result in improper alignment */ 1350 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1351 1352 /* 1353 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1354 * if we're bypassing this information, it has already been filled 1355 * in by acpi_probe(), so don't overwrite it. 1356 */ 1357 if (!bypass_cpus_and_ioapics) 1358 apic_nproc = 1; 1359 1360 while (procp->proc_entry == APIC_CPU_ENTRY) { 1361 /* check whether the cpu exists or not */ 1362 if (!bypass_cpus_and_ioapics && 1363 procp->proc_cpuflags & CPUFLAGS_EN) { 1364 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1365 lid = apicadr[APIC_LID_REG]; 1366 apic_cpus[0].aci_local_id = procp->proc_apicid; 1367 if (apic_cpus[0].aci_local_id != 1368 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1369 return (PSM_FAILURE); 1370 } 1371 apic_cpus[0].aci_local_ver = 1372 procp->proc_version; 1373 } else { 1374 1375 apic_cpus[apic_nproc].aci_local_id = 1376 procp->proc_apicid; 1377 apic_cpus[apic_nproc].aci_local_ver = 1378 procp->proc_version; 1379 apic_nproc++; 1380 1381 } 1382 } 1383 procp++; 1384 } 1385 1386 /* 1387 * Save start of bus entries for later use. 1388 * Get EISA level cntrl if EISA bus is present. 1389 * Also get the CPI bus id for single CPI bus case 1390 */ 1391 apic_busp = busp = (struct apic_bus *)procp; 1392 while (busp->bus_entry == APIC_BUS_ENTRY) { 1393 lid = apic_find_bus_type((char *)&busp->bus_str1); 1394 if (lid == BUS_EISA) { 1395 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1396 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1397 } else if (lid == BUS_PCI) { 1398 /* 1399 * apic_single_pci_busid will be used only if 1400 * apic_pic_bus_total is equal to 1 1401 */ 1402 apic_pci_bus_total++; 1403 apic_single_pci_busid = busp->bus_id; 1404 } 1405 busp++; 1406 } 1407 1408 ioapicp = (struct apic_io_entry *)busp; 1409 1410 if (!bypass_cpus_and_ioapics) 1411 apic_io_max = 0; 1412 do { 1413 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1414 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1415 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1416 apic_io_ver[apic_io_max] = ioapicp->io_version; 1417 /*LINTED: pointer cast may result in improper alignment */ 1418 apicioadr[apic_io_max] = 1419 (int32_t *)psm_map_phys( 1420 (uint32_t)ioapicp->io_apic_addr, 1421 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1422 1423 if (!apicioadr[apic_io_max]) 1424 return (PSM_FAILURE); 1425 1426 ioapic = apicioadr[apic_io_max]; 1427 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1428 id = ioapic[APIC_IO_DATA]; 1429 hid = (uchar_t)(((uint_t)id) >> 24); 1430 1431 if (hid != apic_io_id[apic_io_max]) { 1432 if (apic_io_id[apic_io_max] == 0) 1433 apic_io_id[apic_io_max] = hid; 1434 else { 1435 /* 1436 * set ioapic id to whatever 1437 * reported by MPS 1438 * 1439 * may not need to set index 1440 * again ??? 1441 * take it out and try 1442 */ 1443 1444 id = ((int32_t) 1445 apic_io_id[apic_io_max]) << 1446 24; 1447 1448 ioapic[APIC_IO_REG] = 1449 APIC_ID_CMD; 1450 1451 ioapic[APIC_IO_DATA] = id; 1452 1453 } 1454 } 1455 apic_io_max++; 1456 } 1457 } 1458 ioapicp++; 1459 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1460 1461 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1462 1463 intrp = apic_io_intrp; 1464 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1465 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1466 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1467 apic_irq_translate = 1; 1468 break; 1469 } 1470 intrp++; 1471 } 1472 1473 return (PSM_SUCCESS); 1474 } 1475 1476 boolean_t 1477 apic_cpu_in_range(int cpu) 1478 { 1479 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1480 } 1481 1482 static struct apic_mpfps_hdr * 1483 apic_find_fps_sig(caddr_t cptr, int len) 1484 { 1485 int i; 1486 1487 /* Look for the pattern "_MP_" */ 1488 for (i = 0; i < len; i += 16) { 1489 if ((*(cptr+i) == '_') && 1490 (*(cptr+i+1) == 'M') && 1491 (*(cptr+i+2) == 'P') && 1492 (*(cptr+i+3) == '_')) 1493 /*LINTED: pointer cast may result in improper alignment */ 1494 return ((struct apic_mpfps_hdr *)(cptr + i)); 1495 } 1496 return (NULL); 1497 } 1498 1499 static int 1500 apic_checksum(caddr_t bptr, int len) 1501 { 1502 int i; 1503 uchar_t cksum; 1504 1505 cksum = 0; 1506 for (i = 0; i < len; i++) 1507 cksum += *bptr++; 1508 return ((int)cksum); 1509 } 1510 1511 1512 /* 1513 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1514 * are also set to NULL. vector->irq is set to a value which cannot map 1515 * to a real irq to show that it is free. 1516 */ 1517 void 1518 apic_init() 1519 { 1520 int i; 1521 int *iptr; 1522 1523 int j = 1; 1524 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1525 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1526 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1527 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1528 /* get to highest vector at the same ipl */ 1529 continue; 1530 for (; j <= apic_vectortoipl[i]; j++) { 1531 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1532 APIC_BASE_VECT; 1533 } 1534 } 1535 for (; j < MAXIPL + 1; j++) 1536 /* fill up any empty ipltopri slots */ 1537 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1538 1539 /* cpu 0 is always up */ 1540 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1541 1542 iptr = (int *)&apic_irq_table[0]; 1543 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1544 apic_level_intr[i] = 0; 1545 *iptr++ = NULL; 1546 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1547 1548 /* These *must* be initted to B_TRUE! */ 1549 apic_reprogram_info[i].done = B_TRUE; 1550 apic_reprogram_info[i].irqp = NULL; 1551 apic_reprogram_info[i].tries = 0; 1552 apic_reprogram_info[i].bindcpu = 0; 1553 } 1554 1555 /* 1556 * Allocate a dummy irq table entry for the reserved entry. 1557 * This takes care of the race between removing an irq and 1558 * clock detecting a CPU in that irq during interrupt load 1559 * sampling. 1560 */ 1561 apic_irq_table[APIC_RESV_IRQ] = 1562 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1563 1564 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1565 #if defined(__amd64) 1566 /* 1567 * Make cpu-specific interrupt info point to cr8pri vector 1568 */ 1569 for (i = 0; i <= MAXIPL; i++) 1570 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1571 CPU->cpu_pri_data = apic_cr8pri; 1572 intpri_use_cr8 = 1; 1573 #endif /* __amd64 */ 1574 } 1575 1576 /* 1577 * handler for APIC Error interrupt. Just print a warning and continue 1578 */ 1579 static int 1580 apic_error_intr() 1581 { 1582 uint_t error0, error1, error; 1583 uint_t i; 1584 1585 /* 1586 * We need to write before read as per 7.4.17 of system prog manual. 1587 * We do both and or the results to be safe 1588 */ 1589 error0 = apicadr[APIC_ERROR_STATUS]; 1590 apicadr[APIC_ERROR_STATUS] = 0; 1591 error1 = apicadr[APIC_ERROR_STATUS]; 1592 error = error0 | error1; 1593 1594 /* 1595 * Clear the APIC error status (do this on all cpus that enter here) 1596 * (two writes are required due to the semantics of accessing the 1597 * error status register.) 1598 */ 1599 apicadr[APIC_ERROR_STATUS] = 0; 1600 apicadr[APIC_ERROR_STATUS] = 0; 1601 1602 /* 1603 * Prevent more than 1 CPU from handling error interrupt causing 1604 * double printing (interleave of characters from multiple 1605 * CPU's when using prom_printf) 1606 */ 1607 if (lock_try(&apic_error_lock) == 0) 1608 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1609 if (error) { 1610 #if DEBUG 1611 if (apic_debug) 1612 debug_enter("pcplusmp: APIC Error interrupt received"); 1613 #endif /* DEBUG */ 1614 if (apic_panic_on_apic_error) 1615 cmn_err(CE_PANIC, 1616 "APIC Error interrupt on CPU %d. Status = %x\n", 1617 psm_get_cpu_id(), error); 1618 else { 1619 if ((error & ~APIC_CS_ERRORS) == 0) { 1620 /* cksum error only */ 1621 apic_error |= APIC_ERR_APIC_ERROR; 1622 apic_apic_error |= error; 1623 apic_num_apic_errors++; 1624 apic_num_cksum_errors++; 1625 } else { 1626 /* 1627 * prom_printf is the best shot we have of 1628 * something which is problem free from 1629 * high level/NMI type of interrupts 1630 */ 1631 prom_printf("APIC Error interrupt on CPU %d. " 1632 "Status 0 = %x, Status 1 = %x\n", 1633 psm_get_cpu_id(), error0, error1); 1634 apic_error |= APIC_ERR_APIC_ERROR; 1635 apic_apic_error |= error; 1636 apic_num_apic_errors++; 1637 for (i = 0; i < apic_error_display_delay; i++) { 1638 tenmicrosec(); 1639 } 1640 /* 1641 * provide more delay next time limited to 1642 * roughly 1 clock tick time 1643 */ 1644 if (apic_error_display_delay < 500) 1645 apic_error_display_delay *= 2; 1646 } 1647 } 1648 lock_clear(&apic_error_lock); 1649 return (DDI_INTR_CLAIMED); 1650 } else { 1651 lock_clear(&apic_error_lock); 1652 return (DDI_INTR_UNCLAIMED); 1653 } 1654 /* NOTREACHED */ 1655 } 1656 1657 /* 1658 * Turn off the mask bit in the performance counter Local Vector Table entry. 1659 */ 1660 static void 1661 apic_cpcovf_mask_clear(void) 1662 { 1663 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1664 } 1665 1666 static void 1667 apic_init_intr() 1668 { 1669 processorid_t cpun = psm_get_cpu_id(); 1670 1671 #if defined(__amd64) 1672 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1673 #else 1674 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1675 #endif 1676 1677 if (apic_flat_model) 1678 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1679 else 1680 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1681 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1682 1683 /* need to enable APIC before unmasking NMI */ 1684 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1685 1686 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1687 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1688 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1689 1690 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1691 return; 1692 1693 /* Enable performance counter overflow interrupt */ 1694 1695 if ((x86_feature & X86_MSR) != X86_MSR) 1696 apic_enable_cpcovf_intr = 0; 1697 if (apic_enable_cpcovf_intr) { 1698 if (apic_cpcovf_vect == 0) { 1699 int ipl = APIC_PCINT_IPL; 1700 int irq = apic_get_ipivect(ipl, -1); 1701 1702 ASSERT(irq != -1); 1703 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1704 ASSERT(apic_cpcovf_vect); 1705 (void) add_avintr(NULL, ipl, 1706 (avfunc)kcpc_hw_overflow_intr, 1707 "apic pcint", irq, NULL, NULL, NULL, NULL); 1708 kcpc_hw_overflow_intr_installed = 1; 1709 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1710 } 1711 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1712 } 1713 1714 /* Enable error interrupt */ 1715 1716 if (apic_enable_error_intr) { 1717 if (apic_errvect == 0) { 1718 int ipl = 0xf; /* get highest priority intr */ 1719 int irq = apic_get_ipivect(ipl, -1); 1720 1721 ASSERT(irq != -1); 1722 apic_errvect = apic_irq_table[irq]->airq_vector; 1723 ASSERT(apic_errvect); 1724 /* 1725 * Not PSMI compliant, but we are going to merge 1726 * with ON anyway 1727 */ 1728 (void) add_avintr((void *)NULL, ipl, 1729 (avfunc)apic_error_intr, "apic error intr", 1730 irq, NULL, NULL, NULL, NULL); 1731 } 1732 apicadr[APIC_ERR_VECT] = apic_errvect; 1733 apicadr[APIC_ERROR_STATUS] = 0; 1734 apicadr[APIC_ERROR_STATUS] = 0; 1735 } 1736 } 1737 1738 static void 1739 apic_disable_local_apic() 1740 { 1741 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1742 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1743 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1744 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1745 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1746 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1747 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1748 } 1749 1750 static void 1751 apic_picinit(void) 1752 { 1753 int i, j, iflag; 1754 uint_t isr; 1755 volatile int32_t *ioapic; 1756 apic_irq_t *irqptr; 1757 struct intrspec ispec; 1758 1759 /* 1760 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1761 * bit on without clearing it with EOI. Since softint 1762 * uses vector 0x20 to interrupt itself, so softint will 1763 * not work on this machine. In order to fix this problem 1764 * a check is made to verify all the isr bits are clear. 1765 * If not, EOIs are issued to clear the bits. 1766 */ 1767 for (i = 7; i >= 1; i--) { 1768 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1769 for (j = 0; ((j < 32) && (isr != 0)); j++) 1770 if (isr & (1 << j)) { 1771 apicadr[APIC_EOI_REG] = 0; 1772 isr &= ~(1 << j); 1773 apic_error |= APIC_ERR_BOOT_EOI; 1774 } 1775 } 1776 1777 /* set a flag so we know we have run apic_picinit() */ 1778 apic_flag = 1; 1779 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1780 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1781 LOCK_INIT_CLEAR(&apic_revector_lock); 1782 LOCK_INIT_CLEAR(&apic_defer_reprogram_lock); 1783 LOCK_INIT_CLEAR(&apic_error_lock); 1784 1785 picsetup(); /* initialise the 8259 */ 1786 1787 /* add nmi handler - least priority nmi handler */ 1788 LOCK_INIT_CLEAR(&apic_nmi_lock); 1789 1790 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1791 "pcplusmp NMI handler", (caddr_t)NULL)) 1792 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1793 1794 apic_init_intr(); 1795 1796 /* enable apic mode if imcr present */ 1797 if (apic_imcrp) { 1798 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1799 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1800 } 1801 1802 /* mask interrupt vectors */ 1803 for (j = 0; j < apic_io_max; j++) { 1804 int intin_max; 1805 ioapic = apicioadr[j]; 1806 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1807 /* Bits 23-16 define the maximum redirection entries */ 1808 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1809 for (i = 0; i < intin_max; i++) { 1810 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1811 ioapic[APIC_IO_DATA] = AV_MASK; 1812 } 1813 } 1814 1815 /* 1816 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1817 */ 1818 if (apic_sci_vect > 0) { 1819 /* 1820 * acpica has already done add_avintr(); we just 1821 * to finish the job by mimicing translate_irq() 1822 * 1823 * Fake up an intrspec and setup the tables 1824 */ 1825 ispec.intrspec_vec = apic_sci_vect; 1826 ispec.intrspec_pri = SCI_IPL; 1827 1828 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1829 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1830 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1831 return; 1832 } 1833 irqptr = apic_irq_table[apic_sci_vect]; 1834 1835 iflag = intr_clear(); 1836 lock_set(&apic_ioapic_lock); 1837 1838 /* Program I/O APIC */ 1839 (void) apic_setup_io_intr(irqptr, apic_sci_vect, B_FALSE); 1840 1841 lock_clear(&apic_ioapic_lock); 1842 intr_restore(iflag); 1843 1844 irqptr->airq_share++; 1845 } 1846 } 1847 1848 1849 static void 1850 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1851 { 1852 int loop_count; 1853 uint32_t vector; 1854 uint_t cpu_id, iflag; 1855 1856 cpu_id = apic_cpus[cpun].aci_local_id; 1857 1858 apic_cmos_ssb_set = 1; 1859 1860 /* 1861 * Interrupts on BSP cpu will be disabled during these startup 1862 * steps in order to avoid unwanted side effects from 1863 * executing interrupt handlers on a problematic BIOS. 1864 */ 1865 1866 iflag = intr_clear(); 1867 outb(CMOS_ADDR, SSB); 1868 outb(CMOS_DATA, BIOS_SHUTDOWN); 1869 1870 while (get_apic_cmd1() & AV_PENDING) 1871 apic_ret(); 1872 1873 /* for integrated - make sure there is one INIT IPI in buffer */ 1874 /* for external - it will wake up the cpu */ 1875 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1876 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1877 1878 /* If only 1 CPU is installed, PENDING bit will not go low */ 1879 for (loop_count = 0x1000; loop_count; loop_count--) 1880 if (get_apic_cmd1() & AV_PENDING) 1881 apic_ret(); 1882 else 1883 break; 1884 1885 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1886 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1887 1888 drv_usecwait(20000); /* 20 milli sec */ 1889 1890 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1891 /* integrated apic */ 1892 1893 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1894 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1895 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1896 1897 /* to offset the INIT IPI queue up in the buffer */ 1898 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1899 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1900 1901 drv_usecwait(200); /* 20 micro sec */ 1902 1903 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1904 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1905 1906 drv_usecwait(200); /* 20 micro sec */ 1907 } 1908 intr_restore(iflag); 1909 } 1910 1911 1912 #ifdef DEBUG 1913 int apic_break_on_cpu = 9; 1914 int apic_stretch_interrupts = 0; 1915 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1916 1917 void 1918 apic_break() 1919 { 1920 } 1921 #endif /* DEBUG */ 1922 1923 /* 1924 * platform_intr_enter 1925 * 1926 * Called at the beginning of the interrupt service routine to 1927 * mask all level equal to and below the interrupt priority 1928 * of the interrupting vector. An EOI should be given to 1929 * the interrupt controller to enable other HW interrupts. 1930 * 1931 * Return -1 for spurious interrupts 1932 * 1933 */ 1934 /*ARGSUSED*/ 1935 static int 1936 apic_intr_enter(int ipl, int *vectorp) 1937 { 1938 uchar_t vector; 1939 int nipl; 1940 int irq, iflag; 1941 apic_cpus_info_t *cpu_infop; 1942 1943 /* 1944 * The real vector programmed in APIC is *vectorp + 0x20 1945 * But, cmnint code subtracts 0x20 before pushing it. 1946 * Hence APIC_BASE_VECT is 0x20. 1947 */ 1948 1949 vector = (uchar_t)*vectorp; 1950 1951 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1952 if (vector == apic_clkvect) { 1953 if (!apic_oneshot) { 1954 /* NOTE: this is not MT aware */ 1955 apic_hrtime_stamp++; 1956 apic_nsec_since_boot += apic_nsec_per_intr; 1957 apic_hrtime_stamp++; 1958 last_count_read = apic_hertz_count; 1959 apic_redistribute_compute(); 1960 } 1961 1962 /* We will avoid all the book keeping overhead for clock */ 1963 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1964 #if defined(__amd64) 1965 setcr8((ulong_t)apic_cr8pri[nipl]); 1966 #else 1967 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1968 #endif 1969 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1970 apicadr[APIC_EOI_REG] = 0; 1971 return (nipl); 1972 } 1973 1974 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1975 1976 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1977 cpu_infop->aci_spur_cnt++; 1978 return (APIC_INT_SPURIOUS); 1979 } 1980 1981 /* Check if the vector we got is really what we need */ 1982 if (apic_revector_pending) { 1983 /* 1984 * Disable interrupts for the duration of 1985 * the vector translation to prevent a self-race for 1986 * the apic_revector_lock. This cannot be done 1987 * in apic_xlate_vector because it is recursive and 1988 * we want the vector translation to be atomic with 1989 * respect to other (higher-priority) interrupts. 1990 */ 1991 iflag = intr_clear(); 1992 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1993 APIC_BASE_VECT; 1994 intr_restore(iflag); 1995 } 1996 1997 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1998 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1999 2000 #if defined(__amd64) 2001 setcr8((ulong_t)apic_cr8pri[nipl]); 2002 #else 2003 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 2004 #endif 2005 2006 cpu_infop->aci_current[nipl] = (uchar_t)irq; 2007 cpu_infop->aci_curipl = (uchar_t)nipl; 2008 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 2009 2010 /* 2011 * apic_level_intr could have been assimilated into the irq struct. 2012 * but, having it as a character array is more efficient in terms of 2013 * cache usage. So, we leave it as is. 2014 */ 2015 if (!apic_level_intr[irq]) 2016 apicadr[APIC_EOI_REG] = 0; 2017 2018 #ifdef DEBUG 2019 APIC_DEBUG_BUF_PUT(vector); 2020 APIC_DEBUG_BUF_PUT(irq); 2021 APIC_DEBUG_BUF_PUT(nipl); 2022 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 2023 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 2024 drv_usecwait(apic_stretch_interrupts); 2025 2026 if (apic_break_on_cpu == psm_get_cpu_id()) 2027 apic_break(); 2028 #endif /* DEBUG */ 2029 return (nipl); 2030 } 2031 2032 static void 2033 apic_intr_exit(int prev_ipl, int irq) 2034 { 2035 apic_cpus_info_t *cpu_infop; 2036 2037 #if defined(__amd64) 2038 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 2039 #else 2040 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 2041 #endif 2042 2043 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 2044 if (apic_level_intr[irq]) 2045 apicadr[APIC_EOI_REG] = 0; 2046 2047 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 2048 /* ISR above current pri could not be in progress */ 2049 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 2050 } 2051 2052 /* 2053 * Mask all interrupts below or equal to the given IPL 2054 */ 2055 static void 2056 apic_setspl(int ipl) 2057 { 2058 2059 #if defined(__amd64) 2060 setcr8((ulong_t)apic_cr8pri[ipl]); 2061 #else 2062 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 2063 #endif 2064 2065 /* interrupts at ipl above this cannot be in progress */ 2066 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2067 /* 2068 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2069 * have enough time to come in before the priority is raised again 2070 * during the idle() loop. 2071 */ 2072 if (apic_setspl_delay) 2073 (void) get_apic_pri(); 2074 } 2075 2076 /* 2077 * trigger a software interrupt at the given IPL 2078 */ 2079 static void 2080 apic_set_softintr(int ipl) 2081 { 2082 int vector; 2083 uint_t flag; 2084 2085 vector = apic_resv_vector[ipl]; 2086 2087 flag = intr_clear(); 2088 2089 while (get_apic_cmd1() & AV_PENDING) 2090 apic_ret(); 2091 2092 /* generate interrupt at vector on itself only */ 2093 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2094 2095 intr_restore(flag); 2096 } 2097 2098 /* 2099 * generates an interprocessor interrupt to another CPU 2100 */ 2101 static void 2102 apic_send_ipi(int cpun, int ipl) 2103 { 2104 int vector; 2105 uint_t flag; 2106 2107 vector = apic_resv_vector[ipl]; 2108 2109 flag = intr_clear(); 2110 2111 while (get_apic_cmd1() & AV_PENDING) 2112 apic_ret(); 2113 2114 apicadr[APIC_INT_CMD2] = 2115 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2116 apicadr[APIC_INT_CMD1] = vector; 2117 2118 intr_restore(flag); 2119 } 2120 2121 2122 /*ARGSUSED*/ 2123 static void 2124 apic_set_idlecpu(processorid_t cpun) 2125 { 2126 } 2127 2128 /*ARGSUSED*/ 2129 static void 2130 apic_unset_idlecpu(processorid_t cpun) 2131 { 2132 } 2133 2134 2135 static void 2136 apic_ret() 2137 { 2138 } 2139 2140 static int 2141 get_apic_cmd1() 2142 { 2143 return (apicadr[APIC_INT_CMD1]); 2144 } 2145 2146 static int 2147 get_apic_pri() 2148 { 2149 #if defined(__amd64) 2150 return ((int)getcr8()); 2151 #else 2152 return (apicadr[APIC_TASK_REG]); 2153 #endif 2154 } 2155 2156 /* 2157 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2158 * apic_gethrtime(). This is used for performance instead of accuracy. 2159 */ 2160 2161 static hrtime_t 2162 apic_gettime() 2163 { 2164 int old_hrtime_stamp; 2165 hrtime_t temp; 2166 2167 /* 2168 * In one-shot mode, we do not keep time, so if anyone 2169 * calls psm_gettime() directly, we vector over to 2170 * gethrtime(). 2171 * one-shot mode MUST NOT be enabled if this psm is the source of 2172 * hrtime. 2173 */ 2174 2175 if (apic_oneshot) 2176 return (gethrtime()); 2177 2178 2179 gettime_again: 2180 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2181 apic_ret(); 2182 2183 temp = apic_nsec_since_boot; 2184 2185 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2186 goto gettime_again; 2187 } 2188 return (temp); 2189 } 2190 2191 /* 2192 * Here we return the number of nanoseconds since booting. Note every 2193 * clock interrupt increments apic_nsec_since_boot by the appropriate 2194 * amount. 2195 */ 2196 static hrtime_t 2197 apic_gethrtime() 2198 { 2199 int curr_timeval, countval, elapsed_ticks, oflags; 2200 int old_hrtime_stamp, status; 2201 hrtime_t temp; 2202 uchar_t cpun; 2203 2204 2205 /* 2206 * In one-shot mode, we do not keep time, so if anyone 2207 * calls psm_gethrtime() directly, we vector over to 2208 * gethrtime(). 2209 * one-shot mode MUST NOT be enabled if this psm is the source of 2210 * hrtime. 2211 */ 2212 2213 if (apic_oneshot) 2214 return (gethrtime()); 2215 2216 oflags = intr_clear(); /* prevent migration */ 2217 2218 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2219 2220 lock_set(&apic_gethrtime_lock); 2221 2222 gethrtime_again: 2223 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2224 apic_ret(); 2225 2226 /* 2227 * Check to see which CPU we are on. Note the time is kept on 2228 * the local APIC of CPU 0. If on CPU 0, simply read the current 2229 * counter. If on another CPU, issue a remote read command to CPU 0. 2230 */ 2231 if (cpun == apic_cpus[0].aci_local_id) { 2232 countval = apicadr[APIC_CURR_COUNT]; 2233 } else { 2234 while (get_apic_cmd1() & AV_PENDING) 2235 apic_ret(); 2236 2237 apicadr[APIC_INT_CMD2] = 2238 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2239 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2240 2241 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2242 apic_ret(); 2243 2244 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2245 countval = apicadr[APIC_REMOTE_READ]; 2246 else { /* 0 = invalid */ 2247 apic_remote_hrterr++; 2248 /* 2249 * return last hrtime right now, will need more 2250 * testing if change to retry 2251 */ 2252 temp = apic_last_hrtime; 2253 2254 lock_clear(&apic_gethrtime_lock); 2255 2256 intr_restore(oflags); 2257 2258 return (temp); 2259 } 2260 } 2261 if (countval > last_count_read) 2262 countval = 0; 2263 else 2264 last_count_read = countval; 2265 2266 elapsed_ticks = apic_hertz_count - countval; 2267 2268 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks); 2269 temp = apic_nsec_since_boot + curr_timeval; 2270 2271 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2272 /* we might have clobbered last_count_read. Restore it */ 2273 last_count_read = apic_hertz_count; 2274 goto gethrtime_again; 2275 } 2276 2277 if (temp < apic_last_hrtime) { 2278 /* return last hrtime if error occurs */ 2279 apic_hrtime_error++; 2280 temp = apic_last_hrtime; 2281 } 2282 else 2283 apic_last_hrtime = temp; 2284 2285 lock_clear(&apic_gethrtime_lock); 2286 intr_restore(oflags); 2287 2288 return (temp); 2289 } 2290 2291 /* apic NMI handler */ 2292 /*ARGSUSED*/ 2293 static void 2294 apic_nmi_intr(caddr_t arg) 2295 { 2296 if (apic_shutdown_processors) { 2297 apic_disable_local_apic(); 2298 return; 2299 } 2300 2301 if (lock_try(&apic_nmi_lock)) { 2302 if (apic_kmdb_on_nmi) { 2303 if (psm_debugger() == 0) { 2304 cmn_err(CE_PANIC, 2305 "NMI detected, kmdb is not available."); 2306 } else { 2307 debug_enter("\nNMI detected, entering kmdb.\n"); 2308 } 2309 } else { 2310 if (apic_panic_on_nmi) { 2311 /* Keep panic from entering kmdb. */ 2312 nopanicdebug = 1; 2313 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2314 } else { 2315 /* 2316 * prom_printf is the best shot we have 2317 * of something which is problem free from 2318 * high level/NMI type of interrupts 2319 */ 2320 prom_printf("pcplusmp: NMI received\n"); 2321 apic_error |= APIC_ERR_NMI; 2322 apic_num_nmis++; 2323 } 2324 } 2325 lock_clear(&apic_nmi_lock); 2326 } 2327 } 2328 2329 /* 2330 * Add mask bits to disable interrupt vector from happening 2331 * at or above IPL. In addition, it should remove mask bits 2332 * to enable interrupt vectors below the given IPL. 2333 * 2334 * Both add and delspl are complicated by the fact that different interrupts 2335 * may share IRQs. This can happen in two ways. 2336 * 1. The same H/W line is shared by more than 1 device 2337 * 1a. with interrupts at different IPLs 2338 * 1b. with interrupts at same IPL 2339 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2340 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2341 * will get called often when no interrupt is pending for the device. 2342 * For 1a, we just hope that the machine blows up with the person who 2343 * set it up that way!. In the meantime, we handle it at the higher IPL. 2344 */ 2345 /*ARGSUSED*/ 2346 static int 2347 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2348 { 2349 uchar_t vector; 2350 int iflag; 2351 apic_irq_t *irqptr, *irqheadptr; 2352 int irqindex; 2353 2354 ASSERT(max_ipl <= UCHAR_MAX); 2355 irqindex = IRQINDEX(irqno); 2356 2357 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2358 return (PSM_FAILURE); 2359 2360 mutex_enter(&airq_mutex); 2361 irqptr = irqheadptr = apic_irq_table[irqindex]; 2362 2363 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2364 "vector=0x%x\n", (void *)irqptr->airq_dip, 2365 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2366 2367 while (irqptr) { 2368 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2369 break; 2370 irqptr = irqptr->airq_next; 2371 } 2372 irqptr->airq_share++; 2373 2374 mutex_exit(&airq_mutex); 2375 2376 /* return if it is not hardware interrupt */ 2377 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2378 return (PSM_SUCCESS); 2379 2380 /* Or if there are more interupts at a higher IPL */ 2381 if (ipl != max_ipl) 2382 return (PSM_SUCCESS); 2383 2384 /* 2385 * if apic_picinit() has not been called yet, just return. 2386 * At the end of apic_picinit(), we will call setup_io_intr(). 2387 */ 2388 2389 if (!apic_flag) 2390 return (PSM_SUCCESS); 2391 2392 /* 2393 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2394 * return failure. Not very elegant, but then we hope the 2395 * machine will blow up with ... 2396 */ 2397 if (irqptr->airq_ipl != max_ipl) { 2398 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2399 if (vector == 0) { 2400 irqptr->airq_share--; 2401 return (PSM_FAILURE); 2402 } 2403 irqptr = irqheadptr; 2404 apic_mark_vector(irqptr->airq_vector, vector); 2405 while (irqptr) { 2406 irqptr->airq_vector = vector; 2407 irqptr->airq_ipl = (uchar_t)max_ipl; 2408 /* 2409 * reprogram irq being added and every one else 2410 * who is not in the UNINIT state 2411 */ 2412 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2413 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2414 apic_record_rdt_entry(irqptr, irqindex); 2415 2416 iflag = intr_clear(); 2417 lock_set(&apic_ioapic_lock); 2418 2419 (void) apic_setup_io_intr(irqptr, irqindex, 2420 B_FALSE); 2421 2422 lock_clear(&apic_ioapic_lock); 2423 intr_restore(iflag); 2424 } 2425 irqptr = irqptr->airq_next; 2426 } 2427 return (PSM_SUCCESS); 2428 } 2429 2430 ASSERT(irqptr); 2431 2432 iflag = intr_clear(); 2433 lock_set(&apic_ioapic_lock); 2434 2435 (void) apic_setup_io_intr(irqptr, irqindex, B_FALSE); 2436 2437 lock_clear(&apic_ioapic_lock); 2438 intr_restore(iflag); 2439 2440 return (PSM_SUCCESS); 2441 } 2442 2443 /* 2444 * Recompute mask bits for the given interrupt vector. 2445 * If there is no interrupt servicing routine for this 2446 * vector, this function should disable interrupt vector 2447 * from happening at all IPLs. If there are still 2448 * handlers using the given vector, this function should 2449 * disable the given vector from happening below the lowest 2450 * IPL of the remaining hadlers. 2451 */ 2452 /*ARGSUSED*/ 2453 static int 2454 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2455 { 2456 uchar_t vector, bind_cpu; 2457 int iflag, intin, irqindex; 2458 volatile int32_t *ioapic; 2459 apic_irq_t *irqptr, *irqheadptr; 2460 2461 mutex_enter(&airq_mutex); 2462 irqindex = IRQINDEX(irqno); 2463 irqptr = irqheadptr = apic_irq_table[irqindex]; 2464 2465 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2466 "vector=0x%x\n", (void *)irqptr->airq_dip, 2467 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2468 2469 while (irqptr) { 2470 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2471 break; 2472 irqptr = irqptr->airq_next; 2473 } 2474 ASSERT(irqptr); 2475 2476 irqptr->airq_share--; 2477 2478 mutex_exit(&airq_mutex); 2479 2480 if (ipl < max_ipl) 2481 return (PSM_SUCCESS); 2482 2483 /* return if it is not hardware interrupt */ 2484 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2485 return (PSM_SUCCESS); 2486 2487 if (!apic_flag) { 2488 /* 2489 * Clear irq_struct. If two devices shared an intpt 2490 * line & 1 unloaded before picinit, we are hosed. But, then 2491 * we hope the machine will ... 2492 */ 2493 irqptr->airq_mps_intr_index = FREE_INDEX; 2494 irqptr->airq_temp_cpu = IRQ_UNINIT; 2495 apic_free_vector(irqptr->airq_vector); 2496 return (PSM_SUCCESS); 2497 } 2498 /* 2499 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2500 * use old IPL. Not very elegant, but then we hope ... 2501 */ 2502 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2503 apic_irq_t *irqp; 2504 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2505 apic_mark_vector(irqheadptr->airq_vector, vector); 2506 irqp = irqheadptr; 2507 while (irqp) { 2508 irqp->airq_vector = vector; 2509 irqp->airq_ipl = (uchar_t)max_ipl; 2510 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2511 apic_record_rdt_entry(irqp, irqindex); 2512 2513 iflag = intr_clear(); 2514 lock_set(&apic_ioapic_lock); 2515 2516 (void) apic_setup_io_intr(irqp, 2517 irqindex, B_FALSE); 2518 2519 lock_clear(&apic_ioapic_lock); 2520 intr_restore(iflag); 2521 } 2522 irqp = irqp->airq_next; 2523 } 2524 } 2525 } 2526 2527 if (irqptr->airq_share) 2528 return (PSM_SUCCESS); 2529 2530 iflag = intr_clear(); 2531 lock_set(&apic_ioapic_lock); 2532 2533 /* Disable the MSI/X vector */ 2534 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2535 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2536 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2537 2538 /* 2539 * Make sure we only disable on the last 2540 * of the multi-MSI support 2541 */ 2542 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2543 (void) apic_pci_msi_unconfigure(irqptr->airq_dip, 2544 type, irqptr->airq_ioapicindex); 2545 (void) apic_pci_msi_disable_mode(irqptr->airq_dip, 2546 type, irqptr->airq_ioapicindex); 2547 } 2548 } else { 2549 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2550 intin = irqptr->airq_intin_no; 2551 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2552 ioapic[APIC_IO_DATA] = AV_MASK; 2553 } 2554 2555 if (max_ipl == PSM_INVALID_IPL) { 2556 ASSERT(irqheadptr == irqptr); 2557 bind_cpu = irqptr->airq_temp_cpu; 2558 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2559 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2560 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2561 if (bind_cpu & IRQ_USER_BOUND) { 2562 /* If hardbound, temp_cpu == cpu */ 2563 bind_cpu &= ~IRQ_USER_BOUND; 2564 apic_cpus[bind_cpu].aci_bound--; 2565 } else 2566 apic_cpus[bind_cpu].aci_temp_bound--; 2567 } 2568 irqptr->airq_temp_cpu = IRQ_UNINIT; 2569 irqptr->airq_mps_intr_index = FREE_INDEX; 2570 lock_clear(&apic_ioapic_lock); 2571 intr_restore(iflag); 2572 apic_free_vector(irqptr->airq_vector); 2573 return (PSM_SUCCESS); 2574 } 2575 lock_clear(&apic_ioapic_lock); 2576 intr_restore(iflag); 2577 2578 mutex_enter(&airq_mutex); 2579 if ((irqptr == apic_irq_table[irqindex])) { 2580 apic_irq_t *oldirqptr; 2581 /* Move valid irq entry to the head */ 2582 irqheadptr = oldirqptr = irqptr; 2583 irqptr = irqptr->airq_next; 2584 ASSERT(irqptr); 2585 while (irqptr) { 2586 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2587 break; 2588 oldirqptr = irqptr; 2589 irqptr = irqptr->airq_next; 2590 } 2591 /* remove all invalid ones from the beginning */ 2592 apic_irq_table[irqindex] = irqptr; 2593 /* 2594 * and link them back after the head. The invalid ones 2595 * begin with irqheadptr and end at oldirqptr 2596 */ 2597 oldirqptr->airq_next = irqptr->airq_next; 2598 irqptr->airq_next = irqheadptr; 2599 } 2600 mutex_exit(&airq_mutex); 2601 2602 irqptr->airq_temp_cpu = IRQ_UNINIT; 2603 irqptr->airq_mps_intr_index = FREE_INDEX; 2604 2605 return (PSM_SUCCESS); 2606 } 2607 2608 /* 2609 * Return HW interrupt number corresponding to the given IPL 2610 */ 2611 /*ARGSUSED*/ 2612 static int 2613 apic_softlvl_to_irq(int ipl) 2614 { 2615 /* 2616 * Do not use apic to trigger soft interrupt. 2617 * It will cause the system to hang when 2 hardware interrupts 2618 * at the same priority with the softint are already accepted 2619 * by the apic. Cause the AV_PENDING bit will not be cleared 2620 * until one of the hardware interrupt is eoi'ed. If we need 2621 * to send an ipi at this time, we will end up looping forever 2622 * to wait for the AV_PENDING bit to clear. 2623 */ 2624 return (PSM_SV_SOFTWARE); 2625 } 2626 2627 static int 2628 apic_post_cpu_start() 2629 { 2630 int i, cpun, iflag; 2631 apic_irq_t *irq_ptr; 2632 2633 apic_init_intr(); 2634 2635 /* 2636 * since some systems don't enable the internal cache on the non-boot 2637 * cpus, so we have to enable them here 2638 */ 2639 setcr0(getcr0() & ~(0x60000000)); 2640 2641 while (get_apic_cmd1() & AV_PENDING) 2642 apic_ret(); 2643 2644 cpun = psm_get_cpu_id(); 2645 2646 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2647 2648 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2649 irq_ptr = apic_irq_table[i]; 2650 if ((irq_ptr == NULL) || 2651 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2652 continue; 2653 2654 while (irq_ptr) { 2655 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) { 2656 iflag = intr_clear(); 2657 lock_set(&apic_ioapic_lock); 2658 2659 (void) apic_rebind(irq_ptr, cpun, NULL); 2660 2661 lock_clear(&apic_ioapic_lock); 2662 intr_restore(iflag); 2663 } 2664 irq_ptr = irq_ptr->airq_next; 2665 } 2666 } 2667 2668 2669 apicadr[APIC_DIVIDE_REG] = apic_divide_reg_init; 2670 return (PSM_SUCCESS); 2671 } 2672 2673 processorid_t 2674 apic_get_next_processorid(processorid_t cpu_id) 2675 { 2676 2677 int i; 2678 2679 if (cpu_id == -1) 2680 return ((processorid_t)0); 2681 2682 for (i = cpu_id + 1; i < NCPU; i++) { 2683 if (CPU_IN_SET(apic_cpumask, i)) 2684 return (i); 2685 } 2686 2687 return ((processorid_t)-1); 2688 } 2689 2690 2691 /* 2692 * type == -1 indicates it is an internal request. Do not change 2693 * resv_vector for these requests 2694 */ 2695 static int 2696 apic_get_ipivect(int ipl, int type) 2697 { 2698 uchar_t vector; 2699 int irq; 2700 2701 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2702 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2703 apic_irq_table[irq]->airq_mps_intr_index = 2704 RESERVE_INDEX; 2705 apic_irq_table[irq]->airq_vector = vector; 2706 if (type != -1) { 2707 apic_resv_vector[ipl] = vector; 2708 } 2709 return (irq); 2710 } 2711 } 2712 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2713 return (-1); /* shouldn't happen */ 2714 } 2715 2716 static int 2717 apic_getclkirq(int ipl) 2718 { 2719 int irq; 2720 2721 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2722 return (-1); 2723 /* 2724 * Note the vector in apic_clkvect for per clock handling. 2725 */ 2726 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2727 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2728 apic_clkvect)); 2729 return (irq); 2730 } 2731 2732 2733 /* 2734 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2735 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2736 */ 2737 static uint_t 2738 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2739 { 2740 uint8_t pit_tick_lo; 2741 uint16_t pit_tick, target_pit_tick; 2742 uint32_t start_apic_tick, end_apic_tick; 2743 int iflag; 2744 2745 addr += APIC_CURR_COUNT; 2746 2747 iflag = intr_clear(); 2748 2749 do { 2750 pit_tick_lo = inb(PITCTR0_PORT); 2751 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2752 } while (pit_tick < APIC_TIME_MIN || 2753 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2754 2755 /* 2756 * Wait for the 8254 to decrement by 5 ticks to ensure 2757 * we didn't start in the middle of a tick. 2758 * Compare with 0x10 for the wrap around case. 2759 */ 2760 target_pit_tick = pit_tick - 5; 2761 do { 2762 pit_tick_lo = inb(PITCTR0_PORT); 2763 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2764 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2765 2766 start_apic_tick = *addr; 2767 2768 /* 2769 * Wait for the 8254 to decrement by 2770 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2771 */ 2772 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2773 do { 2774 pit_tick_lo = inb(PITCTR0_PORT); 2775 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2776 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2777 2778 end_apic_tick = *addr; 2779 2780 *pit_ticks_adj = target_pit_tick - pit_tick; 2781 2782 intr_restore(iflag); 2783 2784 return (start_apic_tick - end_apic_tick); 2785 } 2786 2787 /* 2788 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2789 * frequency. Note at this stage in the boot sequence, the boot processor 2790 * is the only active processor. 2791 * hertz value of 0 indicates a one-shot mode request. In this case 2792 * the function returns the resolution (in nanoseconds) for the hardware 2793 * timer interrupt. If one-shot mode capability is not available, 2794 * the return value will be 0. apic_enable_oneshot is a global switch 2795 * for disabling the functionality. 2796 * A non-zero positive value for hertz indicates a periodic mode request. 2797 * In this case the hardware will be programmed to generate clock interrupts 2798 * at hertz frequency and returns the resolution of interrupts in 2799 * nanosecond. 2800 */ 2801 2802 static int 2803 apic_clkinit(int hertz) 2804 { 2805 2806 uint_t apic_ticks = 0; 2807 uint_t pit_ticks; 2808 int ret; 2809 uint16_t pit_ticks_adj; 2810 static int firsttime = 1; 2811 2812 if (firsttime) { 2813 /* first time calibrate on CPU0 only */ 2814 2815 apicadr[APIC_DIVIDE_REG] = apic_divide_reg_init; 2816 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; /* start counting */ 2817 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2818 2819 /* total number of PIT ticks corresponding to apic_ticks */ 2820 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj; 2821 2822 /* 2823 * Determine the number of nanoseconds per APIC clock tick 2824 * and then determine how many APIC ticks to interrupt at the 2825 * desired frequency 2826 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s 2827 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s 2828 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9) 2829 * apic_ticks_per_SFns = 2830 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9) 2831 */ 2832 apic_ticks_per_SFnsecs = 2833 ((SF * apic_ticks * PIT_HZ) / 2834 ((uint64_t)pit_ticks * NANOSEC)); 2835 2836 /* the interval timer initial count is 32 bit max */ 2837 apic_nsec_max = APIC_TICKS_TO_NSECS(APIC_MAXVAL); 2838 firsttime = 0; 2839 } 2840 2841 if (hertz != 0) { 2842 /* periodic */ 2843 apic_nsec_per_intr = NANOSEC / hertz; 2844 apic_hertz_count = APIC_NSECS_TO_TICKS(apic_nsec_per_intr); 2845 } 2846 2847 apic_int_busy_mark = (apic_int_busy_mark * 2848 apic_sample_factor_redistribution) / 100; 2849 apic_int_free_mark = (apic_int_free_mark * 2850 apic_sample_factor_redistribution) / 100; 2851 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2852 apic_sample_factor_redistribution) / 100; 2853 2854 if (hertz == 0) { 2855 /* requested one_shot */ 2856 if (!apic_oneshot_enable) 2857 return (0); 2858 apic_oneshot = 1; 2859 ret = (int)APIC_TICKS_TO_NSECS(1); 2860 } else { 2861 /* program the local APIC to interrupt at the given frequency */ 2862 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2863 apicadr[APIC_LOCAL_TIMER] = 2864 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2865 apic_oneshot = 0; 2866 ret = NANOSEC / hertz; 2867 } 2868 2869 return (ret); 2870 2871 } 2872 2873 /* 2874 * apic_preshutdown: 2875 * Called early in shutdown whilst we can still access filesystems to do 2876 * things like loading modules which will be required to complete shutdown 2877 * after filesystems are all unmounted. 2878 */ 2879 static void 2880 apic_preshutdown(int cmd, int fcn) 2881 { 2882 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2883 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2884 2885 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2886 return; 2887 } 2888 } 2889 2890 static void 2891 apic_shutdown(int cmd, int fcn) 2892 { 2893 int iflag, restarts, attempts; 2894 int i, j; 2895 volatile int32_t *ioapic; 2896 uchar_t byte; 2897 2898 /* Send NMI to all CPUs except self to do per processor shutdown */ 2899 iflag = intr_clear(); 2900 while (get_apic_cmd1() & AV_PENDING) 2901 apic_ret(); 2902 apic_shutdown_processors = 1; 2903 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2904 2905 /* restore cmos shutdown byte before reboot */ 2906 if (apic_cmos_ssb_set) { 2907 outb(CMOS_ADDR, SSB); 2908 outb(CMOS_DATA, 0); 2909 } 2910 /* Disable the I/O APIC redirection entries */ 2911 for (j = 0; j < apic_io_max; j++) { 2912 int intin_max; 2913 ioapic = apicioadr[j]; 2914 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2915 /* Bits 23-16 define the maximum redirection entries */ 2916 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2917 for (i = 0; i < intin_max; i++) { 2918 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2919 ioapic[APIC_IO_DATA] = AV_MASK; 2920 } 2921 } 2922 2923 /* disable apic mode if imcr present */ 2924 if (apic_imcrp) { 2925 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2926 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2927 } 2928 2929 apic_disable_local_apic(); 2930 2931 intr_restore(iflag); 2932 2933 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2934 return; 2935 } 2936 2937 switch (apic_poweroff_method) { 2938 case APIC_POWEROFF_VIA_RTC: 2939 2940 /* select the extended NVRAM bank in the RTC */ 2941 outb(CMOS_ADDR, RTC_REGA); 2942 byte = inb(CMOS_DATA); 2943 outb(CMOS_DATA, (byte | EXT_BANK)); 2944 2945 outb(CMOS_ADDR, PFR_REG); 2946 2947 /* for Predator must toggle the PAB bit */ 2948 byte = inb(CMOS_DATA); 2949 2950 /* 2951 * clear power active bar, wakeup alarm and 2952 * kickstart 2953 */ 2954 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2955 outb(CMOS_DATA, byte); 2956 2957 /* delay before next write */ 2958 drv_usecwait(1000); 2959 2960 /* for S40 the following would suffice */ 2961 byte = inb(CMOS_DATA); 2962 2963 /* power active bar control bit */ 2964 byte |= PAB_CBIT; 2965 outb(CMOS_DATA, byte); 2966 2967 break; 2968 2969 case APIC_POWEROFF_VIA_ASPEN_BMC: 2970 restarts = 0; 2971 restart_aspen_bmc: 2972 if (++restarts == 3) 2973 break; 2974 attempts = 0; 2975 do { 2976 byte = inb(MISMIC_FLAG_REGISTER); 2977 byte &= MISMIC_BUSY_MASK; 2978 if (byte != 0) { 2979 drv_usecwait(1000); 2980 if (attempts >= 3) 2981 goto restart_aspen_bmc; 2982 ++attempts; 2983 } 2984 } while (byte != 0); 2985 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2986 byte = inb(MISMIC_FLAG_REGISTER); 2987 byte |= 0x1; 2988 outb(MISMIC_FLAG_REGISTER, byte); 2989 i = 0; 2990 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2991 i++) { 2992 attempts = 0; 2993 do { 2994 byte = inb(MISMIC_FLAG_REGISTER); 2995 byte &= MISMIC_BUSY_MASK; 2996 if (byte != 0) { 2997 drv_usecwait(1000); 2998 if (attempts >= 3) 2999 goto restart_aspen_bmc; 3000 ++attempts; 3001 } 3002 } while (byte != 0); 3003 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 3004 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 3005 byte = inb(MISMIC_FLAG_REGISTER); 3006 byte |= 0x1; 3007 outb(MISMIC_FLAG_REGISTER, byte); 3008 } 3009 break; 3010 3011 case APIC_POWEROFF_VIA_SITKA_BMC: 3012 restarts = 0; 3013 restart_sitka_bmc: 3014 if (++restarts == 3) 3015 break; 3016 attempts = 0; 3017 do { 3018 byte = inb(SMS_STATUS_REGISTER); 3019 byte &= SMS_STATE_MASK; 3020 if ((byte == SMS_READ_STATE) || 3021 (byte == SMS_WRITE_STATE)) { 3022 drv_usecwait(1000); 3023 if (attempts >= 3) 3024 goto restart_sitka_bmc; 3025 ++attempts; 3026 } 3027 } while ((byte == SMS_READ_STATE) || 3028 (byte == SMS_WRITE_STATE)); 3029 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 3030 i = 0; 3031 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 3032 i++) { 3033 attempts = 0; 3034 do { 3035 byte = inb(SMS_STATUS_REGISTER); 3036 byte &= SMS_IBF_MASK; 3037 if (byte != 0) { 3038 drv_usecwait(1000); 3039 if (attempts >= 3) 3040 goto restart_sitka_bmc; 3041 ++attempts; 3042 } 3043 } while (byte != 0); 3044 outb(sitka_bmc[i].port, sitka_bmc[i].data); 3045 } 3046 break; 3047 3048 case APIC_POWEROFF_NONE: 3049 3050 /* If no APIC direct method, we will try using ACPI */ 3051 if (apic_enable_acpi) { 3052 if (acpi_poweroff() == 1) 3053 return; 3054 } else 3055 return; 3056 3057 break; 3058 } 3059 /* 3060 * Wait a limited time here for power to go off. 3061 * If the power does not go off, then there was a 3062 * problem and we should continue to the halt which 3063 * prints a message for the user to press a key to 3064 * reboot. 3065 */ 3066 drv_usecwait(7000000); /* wait seven seconds */ 3067 3068 } 3069 3070 /* 3071 * Try and disable all interrupts. We just assign interrupts to other 3072 * processors based on policy. If any were bound by user request, we 3073 * let them continue and return failure. We do not bother to check 3074 * for cache affinity while rebinding. 3075 */ 3076 3077 static int 3078 apic_disable_intr(processorid_t cpun) 3079 { 3080 int bind_cpu = 0, i, hardbound = 0, iflag; 3081 apic_irq_t *irq_ptr; 3082 3083 iflag = intr_clear(); 3084 lock_set(&apic_ioapic_lock); 3085 3086 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 3087 if (apic_reprogram_info[i].done == B_FALSE) { 3088 if (apic_reprogram_info[i].bindcpu == cpun) { 3089 /* 3090 * CPU is busy -- it's the target of 3091 * a pending reprogramming attempt 3092 */ 3093 lock_clear(&apic_ioapic_lock); 3094 intr_restore(iflag); 3095 return (PSM_FAILURE); 3096 } 3097 } 3098 } 3099 3100 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 3101 3102 apic_cpus[cpun].aci_curipl = 0; 3103 3104 i = apic_min_device_irq; 3105 for (; i <= apic_max_device_irq; i++) { 3106 /* 3107 * If there are bound interrupts on this cpu, then 3108 * rebind them to other processors. 3109 */ 3110 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3111 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3112 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3113 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3114 apic_nproc)); 3115 3116 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3117 hardbound = 1; 3118 continue; 3119 } 3120 3121 if (irq_ptr->airq_temp_cpu == cpun) { 3122 do { 3123 apic_next_bind_cpu += 2; 3124 bind_cpu = apic_next_bind_cpu / 2; 3125 if (bind_cpu >= apic_nproc) { 3126 apic_next_bind_cpu = 1; 3127 bind_cpu = 0; 3128 3129 } 3130 } while (apic_rebind_all(irq_ptr, bind_cpu)); 3131 } 3132 } 3133 } 3134 3135 lock_clear(&apic_ioapic_lock); 3136 intr_restore(iflag); 3137 3138 if (hardbound) { 3139 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3140 "due to user bound interrupts", cpun); 3141 return (PSM_FAILURE); 3142 } 3143 else 3144 return (PSM_SUCCESS); 3145 } 3146 3147 static void 3148 apic_enable_intr(processorid_t cpun) 3149 { 3150 int i, iflag; 3151 apic_irq_t *irq_ptr; 3152 3153 iflag = intr_clear(); 3154 lock_set(&apic_ioapic_lock); 3155 3156 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3157 3158 i = apic_min_device_irq; 3159 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3160 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3161 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3162 (void) apic_rebind_all(irq_ptr, 3163 irq_ptr->airq_cpu); 3164 } 3165 } 3166 } 3167 3168 lock_clear(&apic_ioapic_lock); 3169 intr_restore(iflag); 3170 } 3171 3172 /* 3173 * apic_introp_xlate() replaces apic_translate_irq() and is 3174 * called only from apic_intr_ops(). With the new ADII framework, 3175 * the priority can no longer be retrieved through i_ddi_get_intrspec(). 3176 * It has to be passed in from the caller. 3177 */ 3178 int 3179 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3180 { 3181 char dev_type[16]; 3182 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3183 int irqno = ispec->intrspec_vec; 3184 ddi_acc_handle_t cfg_handle; 3185 uchar_t ipin; 3186 struct apic_io_intr *intrp; 3187 iflag_t intr_flag; 3188 APIC_HEADER *hp; 3189 MADT_INTERRUPT_OVERRIDE *isop; 3190 apic_irq_t *airqp; 3191 int parent_is_pci_or_pciex = 0; 3192 int child_is_pciex = 0; 3193 3194 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3195 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3196 irqno)); 3197 3198 dev_len = sizeof (dev_type); 3199 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 3200 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3201 &dev_len) == DDI_PROP_SUCCESS) { 3202 if ((strcmp(dev_type, "pci") == 0) || 3203 (strcmp(dev_type, "pciex") == 0)) 3204 parent_is_pci_or_pciex = 1; 3205 } 3206 3207 if (parent_is_pci_or_pciex && ddi_prop_get_int(DDI_DEV_T_ANY, dip, 3208 DDI_PROP_DONTPASS, "pcie-capid-pointer", PCI_CAP_NEXT_PTR_NULL) != 3209 PCI_CAP_NEXT_PTR_NULL) { 3210 child_is_pciex = 1; 3211 } 3212 3213 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3214 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) { 3215 airqp->airq_iflag.bustype = 3216 child_is_pciex ? BUS_PCIE : BUS_PCI; 3217 return (apic_vector_to_irq[airqp->airq_vector]); 3218 } 3219 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3220 NULL, type)); 3221 } 3222 3223 bustype = 0; 3224 3225 /* check if we have already translated this irq */ 3226 mutex_enter(&airq_mutex); 3227 newirq = apic_min_device_irq; 3228 for (; newirq <= apic_max_device_irq; newirq++) { 3229 airqp = apic_irq_table[newirq]; 3230 while (airqp) { 3231 if ((airqp->airq_dip == dip) && 3232 (airqp->airq_origirq == irqno) && 3233 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3234 3235 mutex_exit(&airq_mutex); 3236 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3237 } 3238 airqp = airqp->airq_next; 3239 } 3240 } 3241 mutex_exit(&airq_mutex); 3242 3243 if (apic_defconf) 3244 goto defconf; 3245 3246 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3247 goto nonpci; 3248 3249 if (parent_is_pci_or_pciex) { 3250 /* pci device */ 3251 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3252 goto nonpci; 3253 if (busid == 0 && apic_pci_bus_total == 1) 3254 busid = (int)apic_single_pci_busid; 3255 3256 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3257 goto nonpci; 3258 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3259 pci_config_teardown(&cfg_handle); 3260 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3261 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3262 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3263 goto nonpci; 3264 3265 intr_flag.bustype = child_is_pciex ? BUS_PCIE : BUS_PCI; 3266 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3267 ispec, &intr_flag, type)) == -1) 3268 goto nonpci; 3269 return (newirq); 3270 } else { 3271 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3272 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3273 == NULL) { 3274 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3275 devid, ipin, &intrp)) == -1) 3276 goto nonpci; 3277 } 3278 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3279 ispec, NULL, type)) == -1) 3280 goto nonpci; 3281 return (newirq); 3282 } 3283 } else if (strcmp(dev_type, "isa") == 0) 3284 bustype = BUS_ISA; 3285 else if (strcmp(dev_type, "eisa") == 0) 3286 bustype = BUS_EISA; 3287 3288 nonpci: 3289 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3290 /* search iso entries first */ 3291 if (acpi_iso_cnt != 0) { 3292 hp = (APIC_HEADER *)acpi_isop; 3293 i = 0; 3294 while (i < acpi_iso_cnt) { 3295 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3296 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3297 if (isop->Bus == 0 && 3298 isop->Source == irqno) { 3299 newirq = isop->Interrupt; 3300 intr_flag.intr_po = 3301 isop->Polarity; 3302 intr_flag.intr_el = 3303 isop->TriggerMode; 3304 intr_flag.bustype = BUS_ISA; 3305 3306 return (apic_setup_irq_table( 3307 dip, newirq, NULL, ispec, 3308 &intr_flag, type)); 3309 3310 } 3311 i++; 3312 } 3313 hp = (APIC_HEADER *)(((char *)hp) + 3314 hp->Length); 3315 } 3316 } 3317 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3318 intr_flag.intr_el = INTR_EL_EDGE; 3319 intr_flag.bustype = BUS_ISA; 3320 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3321 &intr_flag, type)); 3322 } else { 3323 if (bustype == 0) 3324 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3325 for (i = 0; i < 2; i++) { 3326 if (((busid = apic_find_bus_id(bustype)) != -1) && 3327 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3328 != NULL)) { 3329 if ((newirq = apic_setup_irq_table(dip, irqno, 3330 intrp, ispec, NULL, type)) != -1) { 3331 return (newirq); 3332 } 3333 goto defconf; 3334 } 3335 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3336 } 3337 } 3338 3339 /* MPS default configuration */ 3340 defconf: 3341 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3342 if (newirq == -1) 3343 return (newirq); 3344 ASSERT(IRQINDEX(newirq) == irqno); 3345 ASSERT(apic_irq_table[irqno]); 3346 return (newirq); 3347 } 3348 3349 3350 3351 3352 3353 3354 /* 3355 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3356 * needs special handling. We may need to chase up the device tree, 3357 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3358 * to find the IPIN at the root bus that relates to the IPIN on the 3359 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3360 * in the MP table or the ACPI namespace for this device itself. 3361 * We handle both cases in the search below. 3362 */ 3363 /* this is the non-acpi version */ 3364 static int 3365 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3366 struct apic_io_intr **intrp) 3367 { 3368 dev_info_t *dipp, *dip; 3369 int pci_irq; 3370 ddi_acc_handle_t cfg_handle; 3371 int bridge_devno, bridge_bus; 3372 int ipin; 3373 3374 dip = idip; 3375 3376 /*CONSTCOND*/ 3377 while (1) { 3378 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3379 return (-1); 3380 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3381 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3382 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3383 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3384 pci_config_teardown(&cfg_handle); 3385 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3386 NULL) != 0) 3387 return (-1); 3388 /* 3389 * This is the rotating scheme that Compaq is using 3390 * and documented in the pci to pci spec. Also, if 3391 * the pci to pci bridge is behind another pci to 3392 * pci bridge, then it need to keep transversing 3393 * up until an interrupt entry is found or reach 3394 * the top of the tree 3395 */ 3396 ipin = (child_devno + child_ipin) % PCI_INTD; 3397 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3398 bridge_bus = (int)apic_single_pci_busid; 3399 pci_irq = ((bridge_devno & 0x1f) << 2) | 3400 (ipin & 0x3); 3401 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3402 bridge_bus)) != NULL) { 3403 return (pci_irq); 3404 } 3405 dip = dipp; 3406 child_devno = bridge_devno; 3407 child_ipin = ipin; 3408 } else 3409 return (-1); 3410 } 3411 /*LINTED: function will not fall off the bottom */ 3412 } 3413 3414 3415 3416 3417 static uchar_t 3418 acpi_find_ioapic(int irq) 3419 { 3420 int i; 3421 3422 for (i = 0; i < apic_io_max; i++) { 3423 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3424 return (i); 3425 } 3426 return (0xFF); /* shouldn't happen */ 3427 } 3428 3429 /* 3430 * See if two irqs are compatible for sharing a vector. 3431 * Currently we only support sharing of PCI devices. 3432 */ 3433 static int 3434 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3435 { 3436 uint_t level1, po1; 3437 uint_t level2, po2; 3438 3439 /* Assume active high by default */ 3440 po1 = 0; 3441 po2 = 0; 3442 3443 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3444 return (0); 3445 3446 if (iflag1.intr_el == INTR_EL_CONFORM) 3447 level1 = AV_LEVEL; 3448 else 3449 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3450 3451 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3452 (iflag1.intr_po == INTR_PO_CONFORM))) 3453 po1 = AV_ACTIVE_LOW; 3454 3455 if (iflag2.intr_el == INTR_EL_CONFORM) 3456 level2 = AV_LEVEL; 3457 else 3458 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3459 3460 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3461 (iflag2.intr_po == INTR_PO_CONFORM))) 3462 po2 = AV_ACTIVE_LOW; 3463 3464 if ((level1 == level2) && (po1 == po2)) 3465 return (1); 3466 3467 return (0); 3468 } 3469 3470 /* 3471 * Attempt to share vector with someone else 3472 */ 3473 static int 3474 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3475 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3476 { 3477 #ifdef DEBUG 3478 apic_irq_t *tmpirqp = NULL; 3479 #endif /* DEBUG */ 3480 apic_irq_t *irqptr, dummyirq; 3481 int newirq, chosen_irq = -1, share = 127; 3482 int lowest, highest, i; 3483 uchar_t share_id; 3484 3485 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3486 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3487 3488 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3489 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3490 3491 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3492 lowest -= APIC_VECTOR_PER_IPL; 3493 dummyirq.airq_mps_intr_index = intr_index; 3494 dummyirq.airq_ioapicindex = ioapicindex; 3495 dummyirq.airq_intin_no = ipin; 3496 if (intr_flagp) 3497 dummyirq.airq_iflag = *intr_flagp; 3498 apic_record_rdt_entry(&dummyirq, irqno); 3499 for (i = lowest; i <= highest; i++) { 3500 newirq = apic_vector_to_irq[i]; 3501 if (newirq == APIC_RESV_IRQ) 3502 continue; 3503 irqptr = apic_irq_table[newirq]; 3504 3505 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3506 (irqptr->airq_rdt_entry & 0xFF00)) 3507 /* not compatible */ 3508 continue; 3509 3510 if (irqptr->airq_share < share) { 3511 share = irqptr->airq_share; 3512 chosen_irq = newirq; 3513 } 3514 } 3515 if (chosen_irq != -1) { 3516 /* 3517 * Assign a share id which is free or which is larger 3518 * than the largest one. 3519 */ 3520 share_id = 1; 3521 mutex_enter(&airq_mutex); 3522 irqptr = apic_irq_table[chosen_irq]; 3523 while (irqptr) { 3524 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3525 share_id = irqptr->airq_share_id; 3526 break; 3527 } 3528 if (share_id <= irqptr->airq_share_id) 3529 share_id = irqptr->airq_share_id + 1; 3530 #ifdef DEBUG 3531 tmpirqp = irqptr; 3532 #endif /* DEBUG */ 3533 irqptr = irqptr->airq_next; 3534 } 3535 if (!irqptr) { 3536 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3537 irqptr->airq_temp_cpu = IRQ_UNINIT; 3538 irqptr->airq_next = 3539 apic_irq_table[chosen_irq]->airq_next; 3540 apic_irq_table[chosen_irq]->airq_next = irqptr; 3541 #ifdef DEBUG 3542 tmpirqp = apic_irq_table[chosen_irq]; 3543 #endif /* DEBUG */ 3544 } 3545 irqptr->airq_mps_intr_index = intr_index; 3546 irqptr->airq_ioapicindex = ioapicindex; 3547 irqptr->airq_intin_no = ipin; 3548 if (intr_flagp) 3549 irqptr->airq_iflag = *intr_flagp; 3550 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3551 irqptr->airq_share_id = share_id; 3552 apic_record_rdt_entry(irqptr, irqno); 3553 *irqptrp = irqptr; 3554 #ifdef DEBUG 3555 /* shuffle the pointers to test apic_delspl path */ 3556 if (tmpirqp) { 3557 tmpirqp->airq_next = irqptr->airq_next; 3558 irqptr->airq_next = apic_irq_table[chosen_irq]; 3559 apic_irq_table[chosen_irq] = irqptr; 3560 } 3561 #endif /* DEBUG */ 3562 mutex_exit(&airq_mutex); 3563 return (VIRTIRQ(chosen_irq, share_id)); 3564 } 3565 return (-1); 3566 } 3567 3568 /* 3569 * 3570 */ 3571 static int 3572 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3573 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3574 { 3575 int origirq = ispec->intrspec_vec; 3576 uchar_t ipl = ispec->intrspec_pri; 3577 int newirq, intr_index; 3578 uchar_t ipin, ioapic, ioapicindex, vector; 3579 apic_irq_t *irqptr; 3580 major_t major; 3581 dev_info_t *sdip; 3582 3583 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3584 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3585 3586 ASSERT(ispec != NULL); 3587 3588 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3589 3590 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3591 /* MSI/X doesn't need to setup ioapic stuffs */ 3592 ioapicindex = 0xff; 3593 ioapic = 0xff; 3594 ipin = (uchar_t)0xff; 3595 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3596 MSIX_INDEX; 3597 mutex_enter(&airq_mutex); 3598 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 3599 mutex_exit(&airq_mutex); 3600 /* need an irq for MSI/X to index into autovect[] */ 3601 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3602 ddi_get_name(dip), ddi_get_instance(dip)); 3603 return (-1); 3604 } 3605 mutex_exit(&airq_mutex); 3606 3607 } else if (intrp != NULL) { 3608 intr_index = (int)(intrp - apic_io_intrp); 3609 ioapic = intrp->intr_destid; 3610 ipin = intrp->intr_destintin; 3611 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3612 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3613 if (apic_io_id[ioapicindex] == ioapic) 3614 break; 3615 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3616 (ioapic == INTR_ALL_APIC)); 3617 3618 /* check whether this intin# has been used by another irqno */ 3619 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3620 return (newirq); 3621 } 3622 3623 } else if (intr_flagp != NULL) { 3624 /* ACPI case */ 3625 intr_index = ACPI_INDEX; 3626 ioapicindex = acpi_find_ioapic(irqno); 3627 ASSERT(ioapicindex != 0xFF); 3628 ioapic = apic_io_id[ioapicindex]; 3629 ipin = irqno - apic_io_vectbase[ioapicindex]; 3630 if (apic_irq_table[irqno] && 3631 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3632 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3633 apic_irq_table[irqno]->airq_ioapicindex == 3634 ioapicindex); 3635 return (irqno); 3636 } 3637 3638 } else { 3639 /* default configuration */ 3640 ioapicindex = 0; 3641 ioapic = apic_io_id[ioapicindex]; 3642 ipin = (uchar_t)irqno; 3643 intr_index = DEFAULT_INDEX; 3644 } 3645 3646 if (ispec == NULL) { 3647 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3648 irqno)); 3649 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3650 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3651 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3652 irqptr->airq_ipl = ipl; 3653 irqptr->airq_origirq = (uchar_t)origirq; 3654 irqptr->airq_dip = dip; 3655 irqptr->airq_major = major; 3656 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3657 /* This is OK to do really */ 3658 if (sdip == NULL) { 3659 cmn_err(CE_WARN, "Sharing vectors: %s" 3660 " instance %d and SCI", 3661 ddi_get_name(dip), ddi_get_instance(dip)); 3662 } else { 3663 cmn_err(CE_WARN, "Sharing vectors: %s" 3664 " instance %d and %s instance %d", 3665 ddi_get_name(sdip), ddi_get_instance(sdip), 3666 ddi_get_name(dip), ddi_get_instance(dip)); 3667 } 3668 return (newirq); 3669 } 3670 /* try high priority allocation now that share has failed */ 3671 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3672 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3673 ddi_get_name(dip), ddi_get_instance(dip)); 3674 return (-1); 3675 } 3676 } 3677 3678 mutex_enter(&airq_mutex); 3679 if (apic_irq_table[irqno] == NULL) { 3680 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3681 irqptr->airq_temp_cpu = IRQ_UNINIT; 3682 apic_irq_table[irqno] = irqptr; 3683 } else { 3684 irqptr = apic_irq_table[irqno]; 3685 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3686 /* 3687 * The slot is used by another irqno, so allocate 3688 * a free irqno for this interrupt 3689 */ 3690 newirq = apic_allocate_irq(apic_first_avail_irq); 3691 if (newirq == -1) { 3692 mutex_exit(&airq_mutex); 3693 return (-1); 3694 } 3695 irqno = newirq; 3696 irqptr = apic_irq_table[irqno]; 3697 if (irqptr == NULL) { 3698 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3699 KM_SLEEP); 3700 irqptr->airq_temp_cpu = IRQ_UNINIT; 3701 apic_irq_table[irqno] = irqptr; 3702 } 3703 apic_modify_vector(vector, newirq); 3704 } 3705 } 3706 apic_max_device_irq = max(irqno, apic_max_device_irq); 3707 apic_min_device_irq = min(irqno, apic_min_device_irq); 3708 mutex_exit(&airq_mutex); 3709 irqptr->airq_ioapicindex = ioapicindex; 3710 irqptr->airq_intin_no = ipin; 3711 irqptr->airq_ipl = ipl; 3712 irqptr->airq_vector = vector; 3713 irqptr->airq_origirq = (uchar_t)origirq; 3714 irqptr->airq_share_id = 0; 3715 irqptr->airq_mps_intr_index = (short)intr_index; 3716 irqptr->airq_dip = dip; 3717 irqptr->airq_major = major; 3718 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3719 if (intr_flagp) 3720 irqptr->airq_iflag = *intr_flagp; 3721 3722 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3723 /* setup I/O APIC entry for non-MSI/X interrupts */ 3724 apic_record_rdt_entry(irqptr, irqno); 3725 } 3726 return (irqno); 3727 } 3728 3729 /* 3730 * return the cpu to which this intr should be bound. 3731 * Check properties or any other mechanism to see if user wants it 3732 * bound to a specific CPU. If so, return the cpu id with high bit set. 3733 * If not, use the policy to choose a cpu and return the id. 3734 */ 3735 uchar_t 3736 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3737 { 3738 int instance, instno, prop_len, bind_cpu, count; 3739 uint_t i, rc; 3740 uchar_t cpu; 3741 major_t major; 3742 char *name, *drv_name, *prop_val, *cptr; 3743 char prop_name[32]; 3744 3745 3746 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3747 return (IRQ_UNBOUND); 3748 3749 drv_name = NULL; 3750 rc = DDI_PROP_NOT_FOUND; 3751 major = (major_t)-1; 3752 if (dip != NULL) { 3753 name = ddi_get_name(dip); 3754 major = ddi_name_to_major(name); 3755 drv_name = ddi_major_to_name(major); 3756 instance = ddi_get_instance(dip); 3757 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3758 i = apic_min_device_irq; 3759 for (; i <= apic_max_device_irq; i++) { 3760 3761 if ((i == irq) || (apic_irq_table[i] == NULL) || 3762 (apic_irq_table[i]->airq_mps_intr_index 3763 == FREE_INDEX)) 3764 continue; 3765 3766 if ((apic_irq_table[i]->airq_major == major) && 3767 (!(apic_irq_table[i]->airq_cpu & 3768 IRQ_USER_BOUND))) { 3769 3770 cpu = apic_irq_table[i]->airq_cpu; 3771 3772 cmn_err(CE_CONT, 3773 "!pcplusmp: %s (%s) instance #%d " 3774 "vector 0x%x ioapic 0x%x " 3775 "intin 0x%x is bound to cpu %d\n", 3776 name, drv_name, instance, irq, 3777 ioapicid, intin, cpu); 3778 return (cpu); 3779 } 3780 } 3781 } 3782 /* 3783 * search for "drvname"_intpt_bind_cpus property first, the 3784 * syntax of the property should be "a[,b,c,...]" where 3785 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3786 * instance 3 binds to cpu c... 3787 * ddi_getlongprop() will search /option first, then / 3788 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3789 * intpt_bind_cpus property. The syntax is the same, and 3790 * it applies to all the devices if its "drvname" specific 3791 * property doesn't exist 3792 */ 3793 (void) strcpy(prop_name, drv_name); 3794 (void) strcat(prop_name, "_intpt_bind_cpus"); 3795 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3796 (caddr_t)&prop_val, &prop_len); 3797 if (rc != DDI_PROP_SUCCESS) { 3798 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3799 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3800 } 3801 } 3802 if (rc == DDI_PROP_SUCCESS) { 3803 for (i = count = 0; i < (prop_len - 1); i++) 3804 if (prop_val[i] == ',') 3805 count++; 3806 if (prop_val[i-1] != ',') 3807 count++; 3808 /* 3809 * if somehow the binding instances defined in the 3810 * property are not enough for this instno., then 3811 * reuse the pattern for the next instance until 3812 * it reaches the requested instno 3813 */ 3814 instno = instance % count; 3815 i = 0; 3816 cptr = prop_val; 3817 while (i < instno) 3818 if (*cptr++ == ',') 3819 i++; 3820 bind_cpu = stoi(&cptr); 3821 kmem_free(prop_val, prop_len); 3822 /* if specific cpu is bogus, then default to cpu 0 */ 3823 if (bind_cpu >= apic_nproc) { 3824 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3825 prop_name, prop_val, bind_cpu); 3826 bind_cpu = 0; 3827 } else { 3828 /* indicate that we are bound at user request */ 3829 bind_cpu |= IRQ_USER_BOUND; 3830 } 3831 /* 3832 * no need to check apic_cpus[].aci_status, if specific cpu is 3833 * not up, then post_cpu_start will handle it. 3834 */ 3835 } else { 3836 bind_cpu = apic_next_bind_cpu++; 3837 if (bind_cpu >= apic_nproc) { 3838 apic_next_bind_cpu = 1; 3839 bind_cpu = 0; 3840 } 3841 } 3842 if (drv_name != NULL) 3843 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3844 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3845 name, drv_name, instance, 3846 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3847 else 3848 cmn_err(CE_CONT, "!pcplusmp: " 3849 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3850 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3851 3852 return ((uchar_t)bind_cpu); 3853 } 3854 3855 static struct apic_io_intr * 3856 apic_find_io_intr_w_busid(int irqno, int busid) 3857 { 3858 struct apic_io_intr *intrp; 3859 3860 /* 3861 * It can have more than 1 entry with same source bus IRQ, 3862 * but unique with the source bus id 3863 */ 3864 intrp = apic_io_intrp; 3865 if (intrp != NULL) { 3866 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3867 if (intrp->intr_irq == irqno && 3868 intrp->intr_busid == busid && 3869 intrp->intr_type == IO_INTR_INT) 3870 return (intrp); 3871 intrp++; 3872 } 3873 } 3874 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3875 "busid %x:%x\n", irqno, busid)); 3876 return ((struct apic_io_intr *)NULL); 3877 } 3878 3879 3880 struct mps_bus_info { 3881 char *bus_name; 3882 int bus_id; 3883 } bus_info_array[] = { 3884 "ISA ", BUS_ISA, 3885 "PCI ", BUS_PCI, 3886 "EISA ", BUS_EISA, 3887 "XPRESS", BUS_XPRESS, 3888 "PCMCIA", BUS_PCMCIA, 3889 "VL ", BUS_VL, 3890 "CBUS ", BUS_CBUS, 3891 "CBUSII", BUS_CBUSII, 3892 "FUTURE", BUS_FUTURE, 3893 "INTERN", BUS_INTERN, 3894 "MBI ", BUS_MBI, 3895 "MBII ", BUS_MBII, 3896 "MPI ", BUS_MPI, 3897 "MPSA ", BUS_MPSA, 3898 "NUBUS ", BUS_NUBUS, 3899 "TC ", BUS_TC, 3900 "VME ", BUS_VME, 3901 "PCI-E ", BUS_PCIE 3902 }; 3903 3904 static int 3905 apic_find_bus_type(char *bus) 3906 { 3907 int i = 0; 3908 3909 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3910 if (strncmp(bus, bus_info_array[i].bus_name, 3911 strlen(bus_info_array[i].bus_name)) == 0) 3912 return (bus_info_array[i].bus_id); 3913 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3914 return (0); 3915 } 3916 3917 static int 3918 apic_find_bus(int busid) 3919 { 3920 struct apic_bus *busp; 3921 3922 busp = apic_busp; 3923 while (busp->bus_entry == APIC_BUS_ENTRY) { 3924 if (busp->bus_id == busid) 3925 return (apic_find_bus_type((char *)&busp->bus_str1)); 3926 busp++; 3927 } 3928 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3929 return (0); 3930 } 3931 3932 static int 3933 apic_find_bus_id(int bustype) 3934 { 3935 struct apic_bus *busp; 3936 3937 busp = apic_busp; 3938 while (busp->bus_entry == APIC_BUS_ENTRY) { 3939 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3940 return (busp->bus_id); 3941 busp++; 3942 } 3943 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3944 bustype)); 3945 return (-1); 3946 } 3947 3948 /* 3949 * Check if a particular irq need to be reserved for any io_intr 3950 */ 3951 static struct apic_io_intr * 3952 apic_find_io_intr(int irqno) 3953 { 3954 struct apic_io_intr *intrp; 3955 3956 intrp = apic_io_intrp; 3957 if (intrp != NULL) { 3958 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3959 if (intrp->intr_irq == irqno && 3960 intrp->intr_type == IO_INTR_INT) 3961 return (intrp); 3962 intrp++; 3963 } 3964 } 3965 return ((struct apic_io_intr *)NULL); 3966 } 3967 3968 /* 3969 * Check if the given ioapicindex intin combination has already been assigned 3970 * an irq. If so return irqno. Else -1 3971 */ 3972 static int 3973 apic_find_intin(uchar_t ioapic, uchar_t intin) 3974 { 3975 apic_irq_t *irqptr; 3976 int i; 3977 3978 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3979 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3980 irqptr = apic_irq_table[i]; 3981 while (irqptr) { 3982 if ((irqptr->airq_mps_intr_index >= 0) && 3983 (irqptr->airq_intin_no == intin) && 3984 (irqptr->airq_ioapicindex == ioapic)) { 3985 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3986 "entry for ioapic:intin %x:%x " 3987 "shared interrupts ?", ioapic, intin)); 3988 return (i); 3989 } 3990 irqptr = irqptr->airq_next; 3991 } 3992 } 3993 return (-1); 3994 } 3995 3996 int 3997 apic_allocate_irq(int irq) 3998 { 3999 int freeirq, i; 4000 4001 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 4002 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 4003 (irq - 1))) == -1) { 4004 /* 4005 * if BIOS really defines every single irq in the mps 4006 * table, then don't worry about conflicting with 4007 * them, just use any free slot in apic_irq_table 4008 */ 4009 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 4010 if ((apic_irq_table[i] == NULL) || 4011 apic_irq_table[i]->airq_mps_intr_index == 4012 FREE_INDEX) { 4013 freeirq = i; 4014 break; 4015 } 4016 } 4017 if (freeirq == -1) { 4018 /* This shouldn't happen, but just in case */ 4019 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 4020 return (-1); 4021 } 4022 } 4023 if (apic_irq_table[freeirq] == NULL) { 4024 apic_irq_table[freeirq] = 4025 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 4026 if (apic_irq_table[freeirq] == NULL) { 4027 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 4028 return (-1); 4029 } 4030 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 4031 } 4032 return (freeirq); 4033 } 4034 4035 static int 4036 apic_find_free_irq(int start, int end) 4037 { 4038 int i; 4039 4040 for (i = start; i <= end; i++) 4041 /* Check if any I/O entry needs this IRQ */ 4042 if (apic_find_io_intr(i) == NULL) { 4043 /* Then see if it is free */ 4044 if ((apic_irq_table[i] == NULL) || 4045 (apic_irq_table[i]->airq_mps_intr_index == 4046 FREE_INDEX)) { 4047 return (i); 4048 } 4049 } 4050 return (-1); 4051 } 4052 4053 /* 4054 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 4055 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 4056 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 4057 * requests and allocated only when pri is set. 4058 */ 4059 static uchar_t 4060 apic_allocate_vector(int ipl, int irq, int pri) 4061 { 4062 int lowest, highest, i; 4063 4064 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 4065 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 4066 4067 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 4068 lowest -= APIC_VECTOR_PER_IPL; 4069 4070 #ifdef DEBUG 4071 if (apic_restrict_vector) /* for testing shared interrupt logic */ 4072 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 4073 #endif /* DEBUG */ 4074 if (pri == 0) 4075 highest -= APIC_HI_PRI_VECTS; 4076 4077 for (i = lowest; i < highest; i++) { 4078 if (APIC_CHECK_RESERVE_VECTORS(i)) 4079 continue; 4080 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 4081 apic_vector_to_irq[i] = (uchar_t)irq; 4082 return (i); 4083 } 4084 } 4085 4086 return (0); 4087 } 4088 4089 static void 4090 apic_modify_vector(uchar_t vector, int irq) 4091 { 4092 apic_vector_to_irq[vector] = (uchar_t)irq; 4093 } 4094 4095 /* 4096 * Mark vector as being in the process of being deleted. Interrupts 4097 * may still come in on some CPU. The moment an interrupt comes with 4098 * the new vector, we know we can free the old one. Called only from 4099 * addspl and delspl with interrupts disabled. Because an interrupt 4100 * can be shared, but no interrupt from either device may come in, 4101 * we also use a timeout mechanism, which we arbitrarily set to 4102 * apic_revector_timeout microseconds. 4103 */ 4104 static void 4105 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4106 { 4107 int iflag = intr_clear(); 4108 lock_set(&apic_revector_lock); 4109 if (!apic_oldvec_to_newvec) { 4110 apic_oldvec_to_newvec = 4111 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4112 KM_NOSLEEP); 4113 4114 if (!apic_oldvec_to_newvec) { 4115 /* 4116 * This failure is not catastrophic. 4117 * But, the oldvec will never be freed. 4118 */ 4119 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4120 lock_clear(&apic_revector_lock); 4121 intr_restore(iflag); 4122 return; 4123 } 4124 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4125 } 4126 4127 /* See if we already did this for drivers which do double addintrs */ 4128 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4129 apic_oldvec_to_newvec[oldvector] = newvector; 4130 apic_newvec_to_oldvec[newvector] = oldvector; 4131 apic_revector_pending++; 4132 } 4133 lock_clear(&apic_revector_lock); 4134 intr_restore(iflag); 4135 (void) timeout(apic_xlate_vector_free_timeout_handler, 4136 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4137 } 4138 4139 /* 4140 * xlate_vector is called from intr_enter if revector_pending is set. 4141 * It will xlate it if needed and mark the old vector as free. 4142 */ 4143 static uchar_t 4144 apic_xlate_vector(uchar_t vector) 4145 { 4146 uchar_t newvector, oldvector = 0; 4147 4148 lock_set(&apic_revector_lock); 4149 /* Do we really need to do this ? */ 4150 if (!apic_revector_pending) { 4151 lock_clear(&apic_revector_lock); 4152 return (vector); 4153 } 4154 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4155 oldvector = vector; 4156 else { 4157 /* 4158 * The incoming vector is new . See if a stale entry is 4159 * remaining 4160 */ 4161 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4162 newvector = vector; 4163 } 4164 4165 if (oldvector) { 4166 apic_revector_pending--; 4167 apic_oldvec_to_newvec[oldvector] = 0; 4168 apic_newvec_to_oldvec[newvector] = 0; 4169 apic_free_vector(oldvector); 4170 lock_clear(&apic_revector_lock); 4171 /* There could have been more than one reprogramming! */ 4172 return (apic_xlate_vector(newvector)); 4173 } 4174 lock_clear(&apic_revector_lock); 4175 return (vector); 4176 } 4177 4178 void 4179 apic_xlate_vector_free_timeout_handler(void *arg) 4180 { 4181 int iflag; 4182 uchar_t oldvector, newvector; 4183 4184 oldvector = (uchar_t)(uintptr_t)arg; 4185 iflag = intr_clear(); 4186 lock_set(&apic_revector_lock); 4187 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4188 apic_free_vector(oldvector); 4189 apic_oldvec_to_newvec[oldvector] = 0; 4190 apic_newvec_to_oldvec[newvector] = 0; 4191 apic_revector_pending--; 4192 } 4193 4194 lock_clear(&apic_revector_lock); 4195 intr_restore(iflag); 4196 } 4197 4198 4199 /* Mark vector as not being used by any irq */ 4200 static void 4201 apic_free_vector(uchar_t vector) 4202 { 4203 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4204 } 4205 4206 /* 4207 * compute the polarity, trigger mode and vector for programming into 4208 * the I/O apic and record in airq_rdt_entry. 4209 */ 4210 static void 4211 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4212 { 4213 int ioapicindex, bus_type, vector; 4214 short intr_index; 4215 uint_t level, po, io_po; 4216 struct apic_io_intr *iointrp; 4217 4218 intr_index = irqptr->airq_mps_intr_index; 4219 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4220 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4221 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4222 4223 if (intr_index == RESERVE_INDEX) { 4224 apic_error |= APIC_ERR_INVALID_INDEX; 4225 return; 4226 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4227 return; 4228 } 4229 4230 vector = irqptr->airq_vector; 4231 ioapicindex = irqptr->airq_ioapicindex; 4232 /* Assume edge triggered by default */ 4233 level = 0; 4234 /* Assume active high by default */ 4235 po = 0; 4236 4237 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4238 ASSERT(irq < 16); 4239 if (eisa_level_intr_mask & (1 << irq)) 4240 level = AV_LEVEL; 4241 if (intr_index == FREE_INDEX && apic_defconf == 0) 4242 apic_error |= APIC_ERR_INVALID_INDEX; 4243 } else if (intr_index == ACPI_INDEX) { 4244 bus_type = irqptr->airq_iflag.bustype; 4245 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4246 if (bus_type == BUS_PCI) 4247 level = AV_LEVEL; 4248 } else 4249 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4250 AV_LEVEL : 0; 4251 if (level && 4252 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4253 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4254 bus_type == BUS_PCI))) 4255 po = AV_ACTIVE_LOW; 4256 } else { 4257 iointrp = apic_io_intrp + intr_index; 4258 bus_type = apic_find_bus(iointrp->intr_busid); 4259 if (iointrp->intr_el == INTR_EL_CONFORM) { 4260 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4261 level = AV_LEVEL; 4262 else if (bus_type == BUS_PCI) 4263 level = AV_LEVEL; 4264 } else 4265 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4266 AV_LEVEL : 0; 4267 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4268 (iointrp->intr_po == INTR_PO_CONFORM && 4269 bus_type == BUS_PCI))) 4270 po = AV_ACTIVE_LOW; 4271 } 4272 if (level) 4273 apic_level_intr[irq] = 1; 4274 /* 4275 * The 82489DX External APIC cannot do active low polarity interrupts. 4276 */ 4277 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4278 io_po = po; 4279 else 4280 io_po = 0; 4281 4282 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4283 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4284 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4285 4286 irqptr->airq_rdt_entry = level|io_po|vector; 4287 } 4288 4289 static processorid_t 4290 apic_find_cpu(int flag) 4291 { 4292 processorid_t acid = 0; 4293 int i; 4294 4295 /* Find the first CPU with the passed-in flag set */ 4296 for (i = 0; i < apic_nproc; i++) { 4297 if (apic_cpus[i].aci_status & flag) { 4298 acid = i; 4299 break; 4300 } 4301 } 4302 4303 ASSERT((apic_cpus[acid].aci_status & flag) != 0); 4304 return (acid); 4305 } 4306 4307 /* 4308 * Call rebind to do the actual programming. 4309 * Must be called with interrupts disabled and apic_ioapic_lock held 4310 * 'p' is polymorphic -- if this function is called to process a deferred 4311 * reprogramming, p is of type 'struct ioapic_reprogram_data *', from which 4312 * the irq pointer is retrieved. If not doing deferred reprogramming, 4313 * p is of the type 'apic_irq_t *'. 4314 * 4315 * apic_ioapic_lock must be held across this call, as it protects apic_rebind 4316 * and it protects apic_find_cpu() from a race in which a CPU can be taken 4317 * offline after a cpu is selected, but before apic_rebind is called to 4318 * bind interrupts to it. 4319 */ 4320 static int 4321 apic_setup_io_intr(void *p, int irq, boolean_t deferred) 4322 { 4323 apic_irq_t *irqptr; 4324 struct ioapic_reprogram_data *drep = NULL; 4325 int rv; 4326 4327 if (deferred) { 4328 drep = (struct ioapic_reprogram_data *)p; 4329 ASSERT(drep != NULL); 4330 irqptr = drep->irqp; 4331 } else 4332 irqptr = (apic_irq_t *)p; 4333 4334 ASSERT(irqptr != NULL); 4335 4336 rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, drep); 4337 if (rv) { 4338 /* 4339 * CPU is not up or interrupts are disabled. Fall back to 4340 * the first available CPU 4341 */ 4342 rv = apic_rebind(irqptr, apic_find_cpu(APIC_CPU_INTR_ENABLE), 4343 drep); 4344 } 4345 4346 return (rv); 4347 } 4348 4349 /* 4350 * Bind interrupt corresponding to irq_ptr to bind_cpu. 4351 * Must be called with interrupts disabled and apic_ioapic_lock held 4352 */ 4353 static int 4354 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, 4355 struct ioapic_reprogram_data *drep) 4356 { 4357 int ioapicindex, intin_no; 4358 volatile int32_t *ioapic; 4359 uchar_t airq_temp_cpu; 4360 apic_cpus_info_t *cpu_infop; 4361 uint32_t rdt_entry; 4362 int which_irq; 4363 4364 which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4365 4366 intin_no = irq_ptr->airq_intin_no; 4367 ioapicindex = irq_ptr->airq_ioapicindex; 4368 ioapic = apicioadr[ioapicindex]; 4369 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4370 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4371 if (airq_temp_cpu & IRQ_USER_BOUND) 4372 /* Mask off high bit so it can be used as array index */ 4373 airq_temp_cpu &= ~IRQ_USER_BOUND; 4374 4375 ASSERT(airq_temp_cpu < apic_nproc); 4376 } 4377 4378 /* 4379 * Can't bind to a CPU that's not accepting interrupts: 4380 */ 4381 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4382 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) 4383 return (1); 4384 4385 /* 4386 * If we are about to change the interrupt vector for this interrupt, 4387 * and this interrupt is level-triggered, attached to an IOAPIC, 4388 * has been delivered to a CPU and that CPU has not handled it 4389 * yet, we cannot reprogram the IOAPIC now. 4390 */ 4391 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4392 4393 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4394 4395 if ((irq_ptr->airq_vector != RDT_VECTOR(rdt_entry)) && 4396 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, 4397 bind_cpu, ioapic, intin_no, which_irq, drep) != 0) { 4398 4399 return (0); 4400 } 4401 } 4402 4403 /* 4404 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4405 * come in before we have a chance to reprogram it below. The 4406 * reprogramming below will simultaneously change and unmask the 4407 * RDT entry. 4408 */ 4409 4410 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4411 4412 rdt_entry = AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry; 4413 4414 /* Write the RDT entry -- no specific CPU binding */ 4415 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4416 4417 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4418 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4419 4420 /* Write the vector, trigger, and polarity portion of the RDT */ 4421 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, rdt_entry); 4422 4423 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4424 return (0); 4425 } 4426 4427 if (bind_cpu & IRQ_USER_BOUND) { 4428 cpu_infop->aci_bound++; 4429 } else { 4430 cpu_infop->aci_temp_bound++; 4431 } 4432 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4433 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4434 /* Write the RDT entry -- bind to a specific CPU: */ 4435 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4436 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4437 } 4438 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4439 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4440 } 4441 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4442 4443 rdt_entry = AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry; 4444 4445 /* Write the vector, trigger, and polarity portion of the RDT */ 4446 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, rdt_entry); 4447 4448 } else { 4449 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4450 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 4451 (void) apic_pci_msi_disable_mode(irq_ptr->airq_dip, type, 4452 ioapicindex); 4453 if (ioapicindex == irq_ptr->airq_origirq) { 4454 /* first one */ 4455 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4456 "apic_pci_msi_enable_vector\n")); 4457 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type, 4458 which_irq, irq_ptr->airq_vector, 4459 irq_ptr->airq_intin_no, 4460 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4461 cmn_err(CE_WARN, "pcplusmp: " 4462 "apic_pci_msi_enable_vector " 4463 "returned PSM_FAILURE"); 4464 } 4465 } 4466 if ((ioapicindex + irq_ptr->airq_intin_no - 1) == 4467 irq_ptr->airq_origirq) { /* last one */ 4468 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4469 "pci_msi_enable_mode\n")); 4470 if (apic_pci_msi_enable_mode(irq_ptr->airq_dip, 4471 type, which_irq) != PSM_SUCCESS) { 4472 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4473 "pci_msi_enable failed\n")); 4474 (void) apic_pci_msi_unconfigure( 4475 irq_ptr->airq_dip, type, which_irq); 4476 } 4477 } 4478 } 4479 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4480 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4481 return (0); 4482 } 4483 4484 static void 4485 apic_last_ditch_clear_remote_irr(volatile int32_t *ioapic, int intin_no) 4486 { 4487 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4488 & AV_REMOTE_IRR) != 0) { 4489 /* 4490 * Trying to clear the bit through normal 4491 * channels has failed. So as a last-ditch 4492 * effort, try to set the trigger mode to 4493 * edge, then to level. This has been 4494 * observed to work on many systems. 4495 */ 4496 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4497 intin_no, 4498 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4499 intin_no) & ~AV_LEVEL); 4500 4501 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4502 intin_no, 4503 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4504 intin_no) | AV_LEVEL); 4505 4506 /* 4507 * If the bit's STILL set, this interrupt may 4508 * be hosed. 4509 */ 4510 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4511 intin_no) & AV_REMOTE_IRR) != 0) { 4512 4513 prom_printf("pcplusmp: Remote IRR still " 4514 "not clear for IOAPIC %p intin %d.\n" 4515 "\tInterrupts to this pin may cease " 4516 "functioning.\n", ioapic, intin_no); 4517 #ifdef DEBUG 4518 apic_last_ditch_reprogram_failures++; 4519 #endif 4520 } 4521 } 4522 } 4523 4524 /* 4525 * This function is protected by apic_ioapic_lock coupled with the 4526 * fact that interrupts are disabled. 4527 */ 4528 static void 4529 delete_defer_repro_ent(int which_irq) 4530 { 4531 ASSERT(which_irq >= 0); 4532 ASSERT(which_irq <= 255); 4533 4534 if (apic_reprogram_info[which_irq].done) 4535 return; 4536 4537 apic_reprogram_info[which_irq].done = B_TRUE; 4538 4539 #ifdef DEBUG 4540 apic_defer_repro_total_retries += 4541 apic_reprogram_info[which_irq].tries; 4542 4543 apic_defer_repro_successes++; 4544 #endif 4545 4546 if (--apic_reprogram_outstanding == 0) { 4547 4548 setlvlx = apic_intr_exit; 4549 } 4550 } 4551 4552 4553 /* 4554 * Interrupts must be disabled during this function to prevent 4555 * self-deadlock. Interrupts are disabled because this function 4556 * is called from apic_check_stuck_interrupt(), which is called 4557 * from apic_rebind(), which requires its caller to disable interrupts. 4558 */ 4559 static void 4560 add_defer_repro_ent(apic_irq_t *irq_ptr, int which_irq, int new_bind_cpu) 4561 { 4562 ASSERT(which_irq >= 0); 4563 ASSERT(which_irq <= 255); 4564 4565 /* 4566 * On the off-chance that there's already a deferred 4567 * reprogramming on this irq, check, and if so, just update the 4568 * CPU and irq pointer to which the interrupt is targeted, then return. 4569 */ 4570 if (!apic_reprogram_info[which_irq].done) { 4571 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4572 apic_reprogram_info[which_irq].irqp = irq_ptr; 4573 return; 4574 } 4575 4576 apic_reprogram_info[which_irq].irqp = irq_ptr; 4577 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4578 apic_reprogram_info[which_irq].tries = 0; 4579 /* 4580 * This must be the last thing set, since we're not 4581 * grabbing any locks, apic_try_deferred_reprogram() will 4582 * make its decision about using this entry iff done 4583 * is false. 4584 */ 4585 apic_reprogram_info[which_irq].done = B_FALSE; 4586 4587 /* 4588 * If there were previously no deferred reprogrammings, change 4589 * setlvlx to call apic_try_deferred_reprogram() 4590 */ 4591 if (++apic_reprogram_outstanding == 1) { 4592 4593 setlvlx = apic_try_deferred_reprogram; 4594 } 4595 } 4596 4597 static void 4598 apic_try_deferred_reprogram(int prev_ipl, int irq) 4599 { 4600 int reproirq, iflag; 4601 struct ioapic_reprogram_data *drep; 4602 4603 apic_intr_exit(prev_ipl, irq); 4604 4605 if (!lock_try(&apic_defer_reprogram_lock)) { 4606 return; 4607 } 4608 4609 /* 4610 * Acquire the apic_ioapic_lock so that any other operations that 4611 * may affect the apic_reprogram_info state are serialized. 4612 * It's still possible for the last deferred reprogramming to clear 4613 * between the time we entered this function and the time we get to 4614 * the for loop below. In that case, *setlvlx will have been set 4615 * back to apic_intr_exit and drep will be NULL. (There's no way to 4616 * stop that from happening -- we would need to grab a lock before 4617 * calling *setlvlx, which is neither realistic nor prudent). 4618 */ 4619 iflag = intr_clear(); 4620 lock_set(&apic_ioapic_lock); 4621 4622 /* 4623 * For each deferred RDT entry, try to reprogram it now. Note that 4624 * there is no lock acquisition to read apic_reprogram_info because 4625 * '.done' is set only after the other fields in the structure are set. 4626 */ 4627 4628 drep = NULL; 4629 for (reproirq = 0; reproirq <= APIC_MAX_VECTOR; reproirq++) { 4630 if (apic_reprogram_info[reproirq].done == B_FALSE) { 4631 drep = &apic_reprogram_info[reproirq]; 4632 break; 4633 } 4634 } 4635 4636 /* 4637 * Either we found a deferred action to perform, or 4638 * we entered this function spuriously, after *setlvlx 4639 * was restored to point to apic_intr_enter. Any other 4640 * permutation is invalid. 4641 */ 4642 ASSERT(drep != NULL || *setlvlx == apic_intr_exit); 4643 4644 /* 4645 * Though we can't really do anything about errors 4646 * at this point, keep track of them for reporting. 4647 * Note that it is very possible for apic_setup_io_intr 4648 * to re-register this very timeout if the Remote IRR bit 4649 * has not yet cleared. 4650 */ 4651 4652 #ifdef DEBUG 4653 if (drep != NULL) { 4654 if (apic_setup_io_intr(drep, reproirq, B_TRUE) != 0) { 4655 apic_deferred_setup_failures++; 4656 } 4657 } else { 4658 apic_deferred_spurious_enters++; 4659 } 4660 #else 4661 if (drep != NULL) 4662 (void) apic_setup_io_intr(drep, reproirq, B_TRUE); 4663 #endif 4664 4665 lock_clear(&apic_ioapic_lock); 4666 intr_restore(iflag); 4667 4668 lock_clear(&apic_defer_reprogram_lock); 4669 } 4670 4671 static void 4672 apic_ioapic_wait_pending_clear(volatile int32_t *ioapic, int intin_no) 4673 { 4674 int waited; 4675 4676 /* 4677 * Wait for the delivery pending bit to clear. 4678 */ 4679 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4680 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4681 4682 /* 4683 * If we're still waiting on the delivery of this interrupt, 4684 * continue to wait here until it is delivered (this should be 4685 * a very small amount of time, but include a timeout just in 4686 * case). 4687 */ 4688 for (waited = 0; waited < apic_max_reps_clear_pending; 4689 waited++) { 4690 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4691 & AV_PENDING) == 0) { 4692 break; 4693 } 4694 } 4695 } 4696 } 4697 4698 /* 4699 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4700 * bit set. Calls functions that modify the function that setlvlx points to, 4701 * so that the reprogramming can be retried very shortly. 4702 * 4703 * This function will mask the RDT entry if the interrupt is level-triggered. 4704 * (The caller is responsible for unmasking the RDT entry.) 4705 * 4706 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4707 */ 4708 static int 4709 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4710 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq, 4711 struct ioapic_reprogram_data *drep) 4712 { 4713 int32_t rdt_entry; 4714 int waited; 4715 int reps = 0; 4716 4717 /* 4718 * Wait for the delivery pending bit to clear. 4719 */ 4720 do { 4721 ++reps; 4722 4723 apic_ioapic_wait_pending_clear(ioapic, intin_no); 4724 4725 /* 4726 * Mask the RDT entry, but only if it's a level-triggered 4727 * interrupt 4728 */ 4729 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4730 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4731 4732 /* Mask it */ 4733 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4734 AV_MASK | rdt_entry); 4735 } 4736 4737 if ((rdt_entry & AV_LEVEL) == AV_LEVEL) { 4738 /* 4739 * If there was a race and an interrupt was injected 4740 * just before we masked, check for that case here. 4741 * Then, unmask the RDT entry and try again. If we're 4742 * on our last try, don't unmask (because we want the 4743 * RDT entry to remain masked for the rest of the 4744 * function). 4745 */ 4746 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4747 intin_no); 4748 if ((rdt_entry & AV_PENDING) && 4749 (reps < apic_max_reps_clear_pending)) { 4750 /* Unmask it */ 4751 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4752 intin_no, rdt_entry & ~AV_MASK); 4753 } 4754 } 4755 4756 } while ((rdt_entry & AV_PENDING) && 4757 (reps < apic_max_reps_clear_pending)); 4758 4759 #ifdef DEBUG 4760 if (rdt_entry & AV_PENDING) 4761 apic_intr_deliver_timeouts++; 4762 #endif 4763 4764 /* 4765 * If the remote IRR bit is set, then the interrupt has been sent 4766 * to a CPU for processing. We have no choice but to wait for 4767 * that CPU to process the interrupt, at which point the remote IRR 4768 * bit will be cleared. 4769 */ 4770 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4771 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4772 4773 /* 4774 * If the CPU that this RDT is bound to is NOT the current 4775 * CPU, wait until that CPU handles the interrupt and ACKs 4776 * it. If this interrupt is not bound to any CPU (that is, 4777 * if it's bound to the logical destination of "anyone"), it 4778 * may have been delivered to the current CPU so handle that 4779 * case by deferring the reprogramming (below). 4780 */ 4781 if ((old_bind_cpu != IRQ_UNBOUND) && 4782 (old_bind_cpu != IRQ_UNINIT) && 4783 (old_bind_cpu != psm_get_cpu_id())) { 4784 for (waited = 0; waited < apic_max_reps_clear_pending; 4785 waited++) { 4786 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4787 intin_no) & AV_REMOTE_IRR) == 0) { 4788 4789 delete_defer_repro_ent(which_irq); 4790 4791 /* Remote IRR has cleared! */ 4792 return (0); 4793 } 4794 } 4795 } 4796 4797 /* 4798 * If we waited and the Remote IRR bit is still not cleared, 4799 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4800 * times for this interrupt, try the last-ditch workaround: 4801 */ 4802 if (drep && drep->tries >= APIC_REPROGRAM_MAX_TRIES) { 4803 4804 apic_last_ditch_clear_remote_irr(ioapic, intin_no); 4805 4806 /* Mark this one as reprogrammed: */ 4807 delete_defer_repro_ent(which_irq); 4808 4809 return (0); 4810 } else { 4811 #ifdef DEBUG 4812 apic_intr_deferrals++; 4813 #endif 4814 4815 /* 4816 * If waiting for the Remote IRR bit (above) didn't 4817 * allow it to clear, defer the reprogramming. 4818 * Add a new deferred-programming entry if the 4819 * caller passed a NULL one (and update the existing one 4820 * in case anything changed). 4821 */ 4822 add_defer_repro_ent(irq_ptr, which_irq, new_bind_cpu); 4823 if (drep) 4824 drep->tries++; 4825 4826 /* Inform caller to defer IOAPIC programming: */ 4827 return (1); 4828 } 4829 4830 } 4831 4832 /* Remote IRR is clear */ 4833 delete_defer_repro_ent(which_irq); 4834 4835 return (0); 4836 } 4837 4838 /* 4839 * Called to migrate all interrupts at an irq to another cpu. 4840 * Must be called with interrupts disabled and apic_ioapic_lock held 4841 */ 4842 int 4843 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu) 4844 { 4845 apic_irq_t *irqptr = irq_ptr; 4846 int retval = 0; 4847 4848 while (irqptr) { 4849 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4850 retval |= apic_rebind(irqptr, bind_cpu, NULL); 4851 irqptr = irqptr->airq_next; 4852 } 4853 4854 return (retval); 4855 } 4856 4857 /* 4858 * apic_intr_redistribute does all the messy computations for identifying 4859 * which interrupt to move to which CPU. Currently we do just one interrupt 4860 * at a time. This reduces the time we spent doing all this within clock 4861 * interrupt. When it is done in idle, we could do more than 1. 4862 * First we find the most busy and the most free CPU (time in ISR only) 4863 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4864 * Then we look for IRQs which are closest to the difference between the 4865 * most busy CPU and the average ISR load. We try to find one whose load 4866 * is less than difference.If none exists, then we chose one larger than the 4867 * difference, provided it does not make the most idle CPU worse than the 4868 * most busy one. In the end, we clear all the busy fields for CPUs. For 4869 * IRQs, they are cleared as they are scanned. 4870 */ 4871 static void 4872 apic_intr_redistribute() 4873 { 4874 int busiest_cpu, most_free_cpu; 4875 int cpu_free, cpu_busy, max_busy, min_busy; 4876 int min_free, diff; 4877 int average_busy, cpus_online; 4878 int i, busy, iflag; 4879 apic_cpus_info_t *cpu_infop; 4880 apic_irq_t *min_busy_irq = NULL; 4881 apic_irq_t *max_busy_irq = NULL; 4882 4883 busiest_cpu = most_free_cpu = -1; 4884 cpu_free = cpu_busy = max_busy = average_busy = 0; 4885 min_free = apic_sample_factor_redistribution; 4886 cpus_online = 0; 4887 /* 4888 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4889 * without ioapic_lock. That is OK as we are just doing statistical 4890 * sampling anyway and any inaccuracy now will get corrected next time 4891 * The call to rebind which actually changes things will make sure 4892 * we are consistent. 4893 */ 4894 for (i = 0; i < apic_nproc; i++) { 4895 if (!(apic_redist_cpu_skip & (1 << i)) && 4896 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4897 4898 cpu_infop = &apic_cpus[i]; 4899 /* 4900 * If no unbound interrupts or only 1 total on this 4901 * CPU, skip 4902 */ 4903 if (!cpu_infop->aci_temp_bound || 4904 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4905 == 1) { 4906 apic_redist_cpu_skip |= 1 << i; 4907 continue; 4908 } 4909 4910 busy = cpu_infop->aci_busy; 4911 average_busy += busy; 4912 cpus_online++; 4913 if (max_busy < busy) { 4914 max_busy = busy; 4915 busiest_cpu = i; 4916 } 4917 if (min_free > busy) { 4918 min_free = busy; 4919 most_free_cpu = i; 4920 } 4921 if (busy > apic_int_busy_mark) { 4922 cpu_busy |= 1 << i; 4923 } else { 4924 if (busy < apic_int_free_mark) 4925 cpu_free |= 1 << i; 4926 } 4927 } 4928 } 4929 if ((cpu_busy && cpu_free) || 4930 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4931 4932 apic_num_imbalance++; 4933 #ifdef DEBUG 4934 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4935 prom_printf( 4936 "redistribute busy=%x free=%x max=%x min=%x", 4937 cpu_busy, cpu_free, max_busy, min_free); 4938 } 4939 #endif /* DEBUG */ 4940 4941 4942 average_busy /= cpus_online; 4943 4944 diff = max_busy - average_busy; 4945 min_busy = max_busy; /* start with the max possible value */ 4946 max_busy = 0; 4947 min_busy_irq = max_busy_irq = NULL; 4948 i = apic_min_device_irq; 4949 for (; i < apic_max_device_irq; i++) { 4950 apic_irq_t *irq_ptr; 4951 /* Change to linked list per CPU ? */ 4952 if ((irq_ptr = apic_irq_table[i]) == NULL) 4953 continue; 4954 /* Check for irq_busy & decide which one to move */ 4955 /* Also zero them for next round */ 4956 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4957 irq_ptr->airq_busy) { 4958 if (irq_ptr->airq_busy < diff) { 4959 /* 4960 * Check for least busy CPU, 4961 * best fit or what ? 4962 */ 4963 if (max_busy < irq_ptr->airq_busy) { 4964 /* 4965 * Most busy within the 4966 * required differential 4967 */ 4968 max_busy = irq_ptr->airq_busy; 4969 max_busy_irq = irq_ptr; 4970 } 4971 } else { 4972 if (min_busy > irq_ptr->airq_busy) { 4973 /* 4974 * least busy, but more than 4975 * the reqd diff 4976 */ 4977 if (min_busy < 4978 (diff + average_busy - 4979 min_free)) { 4980 /* 4981 * Making sure new cpu 4982 * will not end up 4983 * worse 4984 */ 4985 min_busy = 4986 irq_ptr->airq_busy; 4987 4988 min_busy_irq = irq_ptr; 4989 } 4990 } 4991 } 4992 } 4993 irq_ptr->airq_busy = 0; 4994 } 4995 4996 if (max_busy_irq != NULL) { 4997 #ifdef DEBUG 4998 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4999 prom_printf("rebinding %x to %x", 5000 max_busy_irq->airq_vector, most_free_cpu); 5001 } 5002 #endif /* DEBUG */ 5003 iflag = intr_clear(); 5004 if (lock_try(&apic_ioapic_lock)) { 5005 if (apic_rebind_all(max_busy_irq, 5006 most_free_cpu) == 0) { 5007 /* Make change permenant */ 5008 max_busy_irq->airq_cpu = 5009 (uchar_t)most_free_cpu; 5010 } 5011 lock_clear(&apic_ioapic_lock); 5012 } 5013 intr_restore(iflag); 5014 5015 } else if (min_busy_irq != NULL) { 5016 #ifdef DEBUG 5017 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 5018 prom_printf("rebinding %x to %x", 5019 min_busy_irq->airq_vector, most_free_cpu); 5020 } 5021 #endif /* DEBUG */ 5022 5023 iflag = intr_clear(); 5024 if (lock_try(&apic_ioapic_lock)) { 5025 if (apic_rebind_all(min_busy_irq, 5026 most_free_cpu) == 0) { 5027 /* Make change permenant */ 5028 min_busy_irq->airq_cpu = 5029 (uchar_t)most_free_cpu; 5030 } 5031 lock_clear(&apic_ioapic_lock); 5032 } 5033 intr_restore(iflag); 5034 5035 } else { 5036 if (cpu_busy != (1 << busiest_cpu)) { 5037 apic_redist_cpu_skip |= 1 << busiest_cpu; 5038 /* 5039 * We leave cpu_skip set so that next time we 5040 * can choose another cpu 5041 */ 5042 } 5043 } 5044 apic_num_rebind++; 5045 } else { 5046 /* 5047 * found nothing. Could be that we skipped over valid CPUs 5048 * or we have balanced everything. If we had a variable 5049 * ticks_for_redistribution, it could be increased here. 5050 * apic_int_busy, int_free etc would also need to be 5051 * changed. 5052 */ 5053 if (apic_redist_cpu_skip) 5054 apic_redist_cpu_skip = 0; 5055 } 5056 for (i = 0; i < apic_nproc; i++) { 5057 apic_cpus[i].aci_busy = 0; 5058 } 5059 } 5060 5061 static void 5062 apic_cleanup_busy() 5063 { 5064 int i; 5065 apic_irq_t *irq_ptr; 5066 5067 for (i = 0; i < apic_nproc; i++) { 5068 apic_cpus[i].aci_busy = 0; 5069 } 5070 5071 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 5072 if ((irq_ptr = apic_irq_table[i]) != NULL) 5073 irq_ptr->airq_busy = 0; 5074 } 5075 apic_skipped_redistribute = 0; 5076 } 5077 5078 5079 /* 5080 * This function will reprogram the timer. 5081 * 5082 * When in oneshot mode the argument is the absolute time in future to 5083 * generate the interrupt at. 5084 * 5085 * When in periodic mode, the argument is the interval at which the 5086 * interrupts should be generated. There is no need to support the periodic 5087 * mode timer change at this time. 5088 */ 5089 static void 5090 apic_timer_reprogram(hrtime_t time) 5091 { 5092 hrtime_t now; 5093 uint_t ticks; 5094 int64_t delta; 5095 5096 /* 5097 * We should be called from high PIL context (CBE_HIGH_PIL), 5098 * so kpreempt is disabled. 5099 */ 5100 5101 if (!apic_oneshot) { 5102 /* time is the interval for periodic mode */ 5103 ticks = APIC_NSECS_TO_TICKS(time); 5104 } else { 5105 /* one shot mode */ 5106 5107 now = gethrtime(); 5108 delta = time - now; 5109 5110 if (delta <= 0) { 5111 /* 5112 * requested to generate an interrupt in the past 5113 * generate an interrupt as soon as possible 5114 */ 5115 ticks = apic_min_timer_ticks; 5116 } else if (delta > apic_nsec_max) { 5117 /* 5118 * requested to generate an interrupt at a time 5119 * further than what we are capable of. Set to max 5120 * the hardware can handle 5121 */ 5122 5123 ticks = APIC_MAXVAL; 5124 #ifdef DEBUG 5125 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 5126 " %lld too far in future, current time" 5127 " %lld \n", time, now); 5128 #endif 5129 } else 5130 ticks = APIC_NSECS_TO_TICKS(delta); 5131 } 5132 5133 if (ticks < apic_min_timer_ticks) 5134 ticks = apic_min_timer_ticks; 5135 5136 apicadr[APIC_INIT_COUNT] = ticks; 5137 5138 } 5139 5140 /* 5141 * This function will enable timer interrupts. 5142 */ 5143 static void 5144 apic_timer_enable(void) 5145 { 5146 /* 5147 * We should be Called from high PIL context (CBE_HIGH_PIL), 5148 * so kpreempt is disabled. 5149 */ 5150 5151 if (!apic_oneshot) 5152 apicadr[APIC_LOCAL_TIMER] = 5153 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 5154 else { 5155 /* one shot */ 5156 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 5157 } 5158 } 5159 5160 /* 5161 * This function will disable timer interrupts. 5162 */ 5163 static void 5164 apic_timer_disable(void) 5165 { 5166 /* 5167 * We should be Called from high PIL context (CBE_HIGH_PIL), 5168 * so kpreempt is disabled. 5169 */ 5170 5171 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 5172 } 5173 5174 5175 cyclic_id_t apic_cyclic_id; 5176 5177 /* 5178 * If this module needs to be a consumer of cyclic subsystem, they 5179 * can be added here, since at this time kernel cyclic subsystem is initialized 5180 * argument is not currently used, and is reserved for future. 5181 */ 5182 static void 5183 apic_post_cyclic_setup(void *arg) 5184 { 5185 _NOTE(ARGUNUSED(arg)) 5186 cyc_handler_t hdlr; 5187 cyc_time_t when; 5188 5189 /* cpu_lock is held */ 5190 5191 /* set up cyclics for intr redistribution */ 5192 5193 /* 5194 * In peridoc mode intr redistribution processing is done in 5195 * apic_intr_enter during clk intr processing 5196 */ 5197 if (!apic_oneshot) 5198 return; 5199 5200 hdlr.cyh_level = CY_LOW_LEVEL; 5201 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 5202 hdlr.cyh_arg = NULL; 5203 5204 when.cyt_when = 0; 5205 when.cyt_interval = apic_redistribute_sample_interval; 5206 apic_cyclic_id = cyclic_add(&hdlr, &when); 5207 5208 5209 } 5210 5211 static void 5212 apic_redistribute_compute(void) 5213 { 5214 int i, j, max_busy; 5215 5216 if (apic_enable_dynamic_migration) { 5217 if (++apic_nticks == apic_sample_factor_redistribution) { 5218 /* 5219 * Time to call apic_intr_redistribute(). 5220 * reset apic_nticks. This will cause max_busy 5221 * to be calculated below and if it is more than 5222 * apic_int_busy, we will do the whole thing 5223 */ 5224 apic_nticks = 0; 5225 } 5226 max_busy = 0; 5227 for (i = 0; i < apic_nproc; i++) { 5228 5229 /* 5230 * Check if curipl is non zero & if ISR is in 5231 * progress 5232 */ 5233 if (((j = apic_cpus[i].aci_curipl) != 0) && 5234 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 5235 5236 int irq; 5237 apic_cpus[i].aci_busy++; 5238 irq = apic_cpus[i].aci_current[j]; 5239 apic_irq_table[irq]->airq_busy++; 5240 } 5241 5242 if (!apic_nticks && 5243 (apic_cpus[i].aci_busy > max_busy)) 5244 max_busy = apic_cpus[i].aci_busy; 5245 } 5246 if (!apic_nticks) { 5247 if (max_busy > apic_int_busy_mark) { 5248 /* 5249 * We could make the following check be 5250 * skipped > 1 in which case, we get a 5251 * redistribution at half the busy mark (due to 5252 * double interval). Need to be able to collect 5253 * more empirical data to decide if that is a 5254 * good strategy. Punt for now. 5255 */ 5256 if (apic_skipped_redistribute) 5257 apic_cleanup_busy(); 5258 else 5259 apic_intr_redistribute(); 5260 } else 5261 apic_skipped_redistribute++; 5262 } 5263 } 5264 } 5265 5266 5267 static int 5268 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5269 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5270 { 5271 5272 int status; 5273 acpi_psm_lnk_t acpipsmlnk; 5274 5275 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5276 intr_flagp)) == ACPI_PSM_SUCCESS) { 5277 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5278 "from cache for device %s, instance #%d\n", *pci_irqp, 5279 ddi_get_name(dip), ddi_get_instance(dip))); 5280 return (status); 5281 } 5282 5283 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5284 5285 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5286 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5287 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5288 " acpi_translate_pci_irq failed for device %s, instance" 5289 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5290 return (status); 5291 } 5292 5293 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5294 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5295 intr_flagp); 5296 if (status != ACPI_PSM_SUCCESS) { 5297 status = acpi_get_current_irq_resource(&acpipsmlnk, 5298 pci_irqp, intr_flagp); 5299 } 5300 } 5301 5302 if (status == ACPI_PSM_SUCCESS) { 5303 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5304 intr_flagp, &acpipsmlnk); 5305 5306 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5307 "new irq %d for device %s, instance #%d\n", 5308 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5309 } 5310 5311 return (status); 5312 } 5313 5314 /* 5315 * Adds an entry to the irq list passed in, and returns the new list. 5316 * Entries are added in priority order (lower numerical priorities are 5317 * placed closer to the head of the list) 5318 */ 5319 static prs_irq_list_t * 5320 acpi_insert_prs_irq_ent(prs_irq_list_t *listp, int priority, int irq, 5321 iflag_t *iflagp, acpi_prs_private_t *prsprvp) 5322 { 5323 struct prs_irq_list_ent *newent, *prevp = NULL, *origlistp; 5324 5325 newent = kmem_zalloc(sizeof (struct prs_irq_list_ent), KM_SLEEP); 5326 5327 newent->list_prio = priority; 5328 newent->irq = irq; 5329 newent->intrflags = *iflagp; 5330 newent->prsprv = *prsprvp; 5331 /* ->next is NULL from kmem_zalloc */ 5332 5333 /* 5334 * New list -- return the new entry as the list. 5335 */ 5336 if (listp == NULL) 5337 return (newent); 5338 5339 /* 5340 * Save original list pointer for return (since we're not modifying 5341 * the head) 5342 */ 5343 origlistp = listp; 5344 5345 /* 5346 * Insertion sort, with entries with identical keys stored AFTER 5347 * existing entries (the less-than-or-equal test of priority does 5348 * this for us). 5349 */ 5350 while (listp != NULL && listp->list_prio <= priority) { 5351 prevp = listp; 5352 listp = listp->next; 5353 } 5354 5355 newent->next = listp; 5356 5357 if (prevp == NULL) { /* Add at head of list (newent is the new head) */ 5358 return (newent); 5359 } else { 5360 prevp->next = newent; 5361 return (origlistp); 5362 } 5363 } 5364 5365 /* 5366 * Frees the list passed in, deallocating all memory and leaving *listpp 5367 * set to NULL. 5368 */ 5369 static void 5370 acpi_destroy_prs_irq_list(prs_irq_list_t **listpp) 5371 { 5372 struct prs_irq_list_ent *nextp; 5373 5374 ASSERT(listpp != NULL); 5375 5376 while (*listpp != NULL) { 5377 nextp = (*listpp)->next; 5378 kmem_free(*listpp, sizeof (struct prs_irq_list_ent)); 5379 *listpp = nextp; 5380 } 5381 } 5382 5383 /* 5384 * apic_choose_irqs_from_prs returns a list of irqs selected from the list of 5385 * irqs returned by the link device's _PRS method. The irqs are chosen 5386 * to minimize contention in situations where the interrupt link device 5387 * can be programmed to steer interrupts to different interrupt controller 5388 * inputs (some of which may already be in use). The list is sorted in order 5389 * of irqs to use, with the highest priority given to interrupt controller 5390 * inputs that are not shared. When an interrupt controller input 5391 * must be shared, apic_choose_irqs_from_prs adds the possible irqs to the 5392 * returned list in the order that minimizes sharing (thereby ensuring lowest 5393 * possible latency from interrupt trigger time to ISR execution time). 5394 */ 5395 static prs_irq_list_t * 5396 apic_choose_irqs_from_prs(acpi_irqlist_t *irqlistent, dev_info_t *dip, 5397 int crs_irq) 5398 { 5399 int32_t irq; 5400 int i; 5401 prs_irq_list_t *prsirqlistp = NULL; 5402 iflag_t iflags; 5403 5404 while (irqlistent != NULL) { 5405 irqlistent->intr_flags.bustype = BUS_PCI; 5406 5407 for (i = 0; i < irqlistent->num_irqs; i++) { 5408 5409 irq = irqlistent->irqs[i]; 5410 5411 if (irq <= 0) { 5412 /* invalid irq number */ 5413 continue; 5414 } 5415 5416 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5417 continue; 5418 5419 if ((apic_irq_table[irq] == NULL) || 5420 (apic_irq_table[irq]->airq_dip == dip)) { 5421 5422 prsirqlistp = acpi_insert_prs_irq_ent( 5423 prsirqlistp, 0 /* Highest priority */, irq, 5424 &irqlistent->intr_flags, 5425 &irqlistent->acpi_prs_prv); 5426 5427 /* 5428 * If we do not prefer the current irq from _CRS 5429 * or if we do and this irq is the same as the 5430 * current irq from _CRS, this is the one 5431 * to pick. 5432 */ 5433 if (!(apic_prefer_crs) || (irq == crs_irq)) { 5434 return (prsirqlistp); 5435 } 5436 continue; 5437 } 5438 5439 /* 5440 * Edge-triggered interrupts cannot be shared 5441 */ 5442 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5443 continue; 5444 5445 /* 5446 * To work around BIOSes that contain incorrect 5447 * interrupt polarity information in interrupt 5448 * descriptors returned by _PRS, we assume that 5449 * the polarity of the other device sharing this 5450 * interrupt controller input is compatible. 5451 * If it's not, the caller will catch it when 5452 * the caller invokes the link device's _CRS method 5453 * (after invoking its _SRS method). 5454 */ 5455 iflags = irqlistent->intr_flags; 5456 iflags.intr_po = 5457 apic_irq_table[irq]->airq_iflag.intr_po; 5458 5459 if (!acpi_intr_compatible(iflags, 5460 apic_irq_table[irq]->airq_iflag)) 5461 continue; 5462 5463 /* 5464 * If we prefer the irq from _CRS, no need 5465 * to search any further (and make sure 5466 * to add this irq with the highest priority 5467 * so it's tried first). 5468 */ 5469 if (crs_irq == irq && apic_prefer_crs) { 5470 5471 return (acpi_insert_prs_irq_ent( 5472 prsirqlistp, 5473 0 /* Highest priority */, 5474 irq, &iflags, 5475 &irqlistent->acpi_prs_prv)); 5476 } 5477 5478 /* 5479 * Priority is equal to the share count (lower 5480 * share count is higher priority). Note that 5481 * the intr flags passed in here are the ones we 5482 * changed above -- if incorrect, it will be 5483 * caught by the caller's _CRS flags comparison. 5484 */ 5485 prsirqlistp = acpi_insert_prs_irq_ent( 5486 prsirqlistp, 5487 apic_irq_table[irq]->airq_share, irq, 5488 &iflags, &irqlistent->acpi_prs_prv); 5489 } 5490 5491 /* Go to the next irqlist entry */ 5492 irqlistent = irqlistent->next; 5493 } 5494 5495 return (prsirqlistp); 5496 } 5497 5498 /* 5499 * Configures the irq for the interrupt link device identified by 5500 * acpipsmlnkp. 5501 * 5502 * Gets the current and the list of possible irq settings for the 5503 * device. If apic_unconditional_srs is not set, and the current 5504 * resource setting is in the list of possible irq settings, 5505 * current irq resource setting is passed to the caller. 5506 * 5507 * Otherwise, picks an irq number from the list of possible irq 5508 * settings, and sets the irq of the device to this value. 5509 * If prefer_crs is set, among a set of irq numbers in the list that have 5510 * the least number of devices sharing the interrupt, we pick current irq 5511 * resource setting if it is a member of this set. 5512 * 5513 * Passes the irq number in the value pointed to by pci_irqp, and 5514 * polarity and sensitivity in the structure pointed to by dipintrflagp 5515 * to the caller. 5516 * 5517 * Note that if setting the irq resource failed, but successfuly obtained 5518 * the current irq resource settings, passes the current irq resources 5519 * and considers it a success. 5520 * 5521 * Returns: 5522 * ACPI_PSM_SUCCESS on success. 5523 * 5524 * ACPI_PSM_FAILURE if an error occured during the configuration or 5525 * if a suitable irq was not found for this device, or if setting the 5526 * irq resource and obtaining the current resource fails. 5527 * 5528 */ 5529 static int 5530 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5531 int *pci_irqp, iflag_t *dipintr_flagp) 5532 { 5533 5534 int32_t irq; 5535 int cur_irq = -1; 5536 acpi_irqlist_t *irqlistp; 5537 prs_irq_list_t *prs_irq_listp, *prs_irq_entp; 5538 boolean_t found_irq = B_FALSE; 5539 5540 dipintr_flagp->bustype = BUS_PCI; 5541 5542 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5543 == ACPI_PSM_FAILURE) { 5544 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5545 "or assign IRQ for device %s, instance #%d: The system was " 5546 "unable to get the list of potential IRQs from ACPI.", 5547 ddi_get_name(dip), ddi_get_instance(dip))); 5548 5549 return (ACPI_PSM_FAILURE); 5550 } 5551 5552 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5553 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5554 (cur_irq > 0)) { 5555 /* 5556 * If an IRQ is set in CRS and that IRQ exists in the set 5557 * returned from _PRS, return that IRQ, otherwise print 5558 * a warning 5559 */ 5560 5561 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5562 == ACPI_PSM_SUCCESS) { 5563 5564 ASSERT(pci_irqp != NULL); 5565 *pci_irqp = cur_irq; 5566 acpi_free_irqlist(irqlistp); 5567 return (ACPI_PSM_SUCCESS); 5568 } 5569 5570 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5571 "current irq %d for device %s, instance #%d in ACPI's " 5572 "list of possible irqs for this device. Picking one from " 5573 " the latter list.", cur_irq, ddi_get_name(dip), 5574 ddi_get_instance(dip))); 5575 } 5576 5577 if ((prs_irq_listp = apic_choose_irqs_from_prs(irqlistp, dip, 5578 cur_irq)) == NULL) { 5579 5580 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5581 "suitable irq from the list of possible irqs for device " 5582 "%s, instance #%d in ACPI's list of possible irqs", 5583 ddi_get_name(dip), ddi_get_instance(dip))); 5584 5585 acpi_free_irqlist(irqlistp); 5586 return (ACPI_PSM_FAILURE); 5587 } 5588 5589 acpi_free_irqlist(irqlistp); 5590 5591 for (prs_irq_entp = prs_irq_listp; 5592 prs_irq_entp != NULL && found_irq == B_FALSE; 5593 prs_irq_entp = prs_irq_entp->next) { 5594 5595 acpipsmlnkp->acpi_prs_prv = prs_irq_entp->prsprv; 5596 irq = prs_irq_entp->irq; 5597 5598 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for " 5599 "device %s instance #%d\n", irq, ddi_get_name(dip), 5600 ddi_get_instance(dip))); 5601 5602 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) 5603 == ACPI_PSM_SUCCESS) { 5604 /* 5605 * setting irq was successful, check to make sure CRS 5606 * reflects that. If CRS does not agree with what we 5607 * set, return the irq that was set. 5608 */ 5609 5610 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5611 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5612 5613 if (cur_irq != irq) 5614 APIC_VERBOSE_IRQ((CE_WARN, 5615 "!pcplusmp: IRQ resource set " 5616 "(irqno %d) for device %s " 5617 "instance #%d, differs from " 5618 "current setting irqno %d", 5619 irq, ddi_get_name(dip), 5620 ddi_get_instance(dip), cur_irq)); 5621 } else { 5622 /* 5623 * On at least one system, there was a bug in 5624 * a DSDT method called by _STA, causing _STA to 5625 * indicate that the link device was disabled 5626 * (when, in fact, it was enabled). Since _SRS 5627 * succeeded, assume that _CRS is lying and use 5628 * the iflags from this _PRS interrupt choice. 5629 * If we're wrong about the flags, the polarity 5630 * will be incorrect and we may get an interrupt 5631 * storm, but there's not much else we can do 5632 * at this point. 5633 */ 5634 *dipintr_flagp = prs_irq_entp->intrflags; 5635 } 5636 5637 /* 5638 * Return the irq that was set, and not what _CRS 5639 * reports, since _CRS has been seen to return 5640 * different IRQs than what was passed to _SRS on some 5641 * systems (and just not return successfully on others). 5642 */ 5643 cur_irq = irq; 5644 found_irq = B_TRUE; 5645 } else { 5646 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource " 5647 "irq %d failed for device %s instance #%d", 5648 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5649 5650 if (cur_irq == -1) { 5651 acpi_destroy_prs_irq_list(&prs_irq_listp); 5652 return (ACPI_PSM_FAILURE); 5653 } 5654 } 5655 } 5656 5657 acpi_destroy_prs_irq_list(&prs_irq_listp); 5658 5659 if (!found_irq) 5660 return (ACPI_PSM_FAILURE); 5661 5662 ASSERT(pci_irqp != NULL); 5663 *pci_irqp = cur_irq; 5664 return (ACPI_PSM_SUCCESS); 5665 } 5666