1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 28 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 29 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 30 * PSMI 1.5 extensions are supported in Solaris Nevada. 31 * PSMI 1.6 extensions are supported in Solaris Nevada. 32 */ 33 #define PSMI_1_6 34 35 #include <sys/processor.h> 36 #include <sys/time.h> 37 #include <sys/psm.h> 38 #include <sys/smp_impldefs.h> 39 #include <sys/cram.h> 40 #include <sys/acpi/acpi.h> 41 #include <sys/acpica.h> 42 #include <sys/psm_common.h> 43 #include <sys/apic.h> 44 #include <sys/pit.h> 45 #include <sys/ddi.h> 46 #include <sys/sunddi.h> 47 #include <sys/ddi_impldefs.h> 48 #include <sys/pci.h> 49 #include <sys/promif.h> 50 #include <sys/x86_archext.h> 51 #include <sys/cpc_impl.h> 52 #include <sys/uadmin.h> 53 #include <sys/panic.h> 54 #include <sys/debug.h> 55 #include <sys/archsystm.h> 56 #include <sys/trap.h> 57 #include <sys/machsystm.h> 58 #include <sys/cpuvar.h> 59 #include <sys/rm_platter.h> 60 #include <sys/privregs.h> 61 #include <sys/cyclic.h> 62 #include <sys/note.h> 63 #include <sys/pci_intr_lib.h> 64 #include <sys/sunndi.h> 65 #if !defined(__xpv) 66 #include <sys/hpet.h> 67 #include <sys/clock.h> 68 #endif 69 70 /* 71 * Local Function Prototypes 72 */ 73 static int apic_handle_defconf(); 74 static int apic_parse_mpct(caddr_t mpct, int bypass); 75 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 76 static int apic_checksum(caddr_t bptr, int len); 77 static int apic_find_bus_type(char *bus); 78 static int apic_find_bus(int busid); 79 static int apic_find_bus_id(int bustype); 80 static struct apic_io_intr *apic_find_io_intr(int irqno); 81 static int apic_find_free_irq(int start, int end); 82 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 83 static void apic_xlate_vector_free_timeout_handler(void *arg); 84 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 85 int new_bind_cpu, int apicindex, int intin_no, int which_irq, 86 struct ioapic_reprogram_data *drep); 87 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 88 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 89 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 90 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 91 int child_ipin, struct apic_io_intr **intrp); 92 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 93 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 94 int type); 95 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 96 static void apic_try_deferred_reprogram(int ipl, int vect); 97 static void delete_defer_repro_ent(int which_irq); 98 static void apic_ioapic_wait_pending_clear(int ioapicindex, 99 int intin_no); 100 static boolean_t apic_is_ioapic_AMD_813x(uint32_t physaddr); 101 static int apic_acpi_enter_apicmode(void); 102 103 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 104 105 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 106 int apic_sci_vect = -1; 107 iflag_t apic_sci_flags; 108 109 #if !defined(__xpv) 110 /* ACPI HPET interrupt configuration; -1 if HPET not used */ 111 int apic_hpet_vect = -1; 112 iflag_t apic_hpet_flags; 113 #endif 114 115 /* 116 * psm name pointer 117 */ 118 static char *psm_name; 119 120 /* ACPI support routines */ 121 static int acpi_probe(char *); 122 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 123 int *pci_irqp, iflag_t *intr_flagp); 124 125 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 126 int ipin, int *pci_irqp, iflag_t *intr_flagp); 127 static uchar_t acpi_find_ioapic(int irq); 128 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 129 130 /* 131 * number of bits per byte, from <sys/param.h> 132 */ 133 #define UCHAR_MAX ((1 << NBBY) - 1) 134 135 /* Max wait time (in repetitions) for flags to clear in an RDT entry. */ 136 int apic_max_reps_clear_pending = 1000; 137 138 /* The irq # is implicit in the array index: */ 139 struct ioapic_reprogram_data apic_reprogram_info[APIC_MAX_VECTOR+1]; 140 /* 141 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. ioapic_reprogram_info 142 * is indexed by IRQ number, NOT by vector number. 143 */ 144 145 int apic_intr_policy = INTR_ROUND_ROBIN; 146 147 int apic_next_bind_cpu = 1; /* For round robin assignment */ 148 /* start with cpu 1 */ 149 150 /* 151 * If enabled, the distribution works as follows: 152 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 153 * and the irq corresponding to the ipl is also set in the aci_current array. 154 * interrupt exit and setspl (due to soft interrupts) will cause the current 155 * ipl to be be changed. This is cache friendly as these frequently used 156 * paths write into a per cpu structure. 157 * 158 * Sampling is done by checking the structures for all CPUs and incrementing 159 * the busy field of the irq (if any) executing on each CPU and the busy field 160 * of the corresponding CPU. 161 * In periodic mode this is done on every clock interrupt. 162 * In one-shot mode, this is done thru a cyclic with an interval of 163 * apic_redistribute_sample_interval (default 10 milli sec). 164 * 165 * Every apic_sample_factor_redistribution times we sample, we do computations 166 * to decide which interrupt needs to be migrated (see comments 167 * before apic_intr_redistribute(). 168 */ 169 170 /* 171 * Following 3 variables start as % and can be patched or set using an 172 * API to be defined in future. They will be scaled to 173 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 174 * mode), or 101 in one-shot mode to stagger it away from one sec processing 175 */ 176 177 int apic_int_busy_mark = 60; 178 int apic_int_free_mark = 20; 179 int apic_diff_for_redistribution = 10; 180 181 /* sampling interval for interrupt redistribution for dynamic migration */ 182 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 183 184 /* 185 * number of times we sample before deciding to redistribute interrupts 186 * for dynamic migration 187 */ 188 int apic_sample_factor_redistribution = 101; 189 190 /* timeout for xlate_vector, mark_vector */ 191 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 192 193 int apic_redist_cpu_skip = 0; 194 int apic_num_imbalance = 0; 195 int apic_num_rebind = 0; 196 197 int apic_nproc = 0; 198 size_t apic_cpus_size = 0; 199 int apic_defconf = 0; 200 int apic_irq_translate = 0; 201 int apic_spec_rev = 0; 202 int apic_imcrp = 0; 203 204 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 205 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 206 207 /* 208 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 209 * will be assigned (via _SRS). If it is not set, use the current 210 * irq setting (via _CRS), but only if that irq is in the set of possible 211 * irqs (returned by _PRS) for the device. 212 */ 213 int apic_unconditional_srs = 1; 214 215 /* 216 * For interrupt link devices, if apic_prefer_crs is set when we are 217 * assigning an IRQ resource to a device, prefer the current IRQ setting 218 * over other possible irq settings under same conditions. 219 */ 220 221 int apic_prefer_crs = 1; 222 223 uchar_t apic_io_id[MAX_IO_APIC]; 224 volatile uint32_t *apicioadr[MAX_IO_APIC]; 225 static uchar_t apic_io_ver[MAX_IO_APIC]; 226 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 227 static uchar_t apic_io_vectend[MAX_IO_APIC]; 228 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ + 1]; 229 uint32_t apic_physaddr[MAX_IO_APIC]; 230 231 static boolean_t ioapic_mask_workaround[MAX_IO_APIC]; 232 233 /* 234 * First available slot to be used as IRQ index into the apic_irq_table 235 * for those interrupts (like MSI/X) that don't have a physical IRQ. 236 */ 237 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 238 239 /* 240 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 241 * and bound elements of cpus_info and the temp_cpu element of irq_struct 242 */ 243 lock_t apic_ioapic_lock; 244 245 /* 246 * apic_defer_reprogram_lock ensures that only one processor is handling 247 * deferred interrupt programming at *_intr_exit time. 248 */ 249 static lock_t apic_defer_reprogram_lock; 250 251 /* 252 * The current number of deferred reprogrammings outstanding 253 */ 254 uint_t apic_reprogram_outstanding = 0; 255 256 #ifdef DEBUG 257 /* 258 * Counters that keep track of deferred reprogramming stats 259 */ 260 uint_t apic_intr_deferrals = 0; 261 uint_t apic_intr_deliver_timeouts = 0; 262 uint_t apic_last_ditch_reprogram_failures = 0; 263 uint_t apic_deferred_setup_failures = 0; 264 uint_t apic_defer_repro_total_retries = 0; 265 uint_t apic_defer_repro_successes = 0; 266 uint_t apic_deferred_spurious_enters = 0; 267 #endif 268 269 static int apic_io_max = 0; /* no. of i/o apics enabled */ 270 271 static struct apic_io_intr *apic_io_intrp = 0; 272 static struct apic_bus *apic_busp; 273 274 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 275 uchar_t apic_resv_vector[MAXIPL+1]; 276 277 char apic_level_intr[APIC_MAX_VECTOR+1]; 278 279 static uint32_t eisa_level_intr_mask = 0; 280 /* At least MSB will be set if EISA bus */ 281 282 static int apic_pci_bus_total = 0; 283 static uchar_t apic_single_pci_busid = 0; 284 285 /* 286 * airq_mutex protects additions to the apic_irq_table - the first 287 * pointer and any airq_nexts off of that one. It also protects 288 * apic_max_device_irq & apic_min_device_irq. It also guarantees 289 * that share_id is unique as new ids are generated only when new 290 * irq_t structs are linked in. Once linked in the structs are never 291 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 292 * or allocated. Note that there is a slight gap between allocating in 293 * apic_introp_xlate and programming in addspl. 294 */ 295 kmutex_t airq_mutex; 296 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 297 int apic_max_device_irq = 0; 298 int apic_min_device_irq = APIC_MAX_VECTOR; 299 300 /* 301 * Following declarations are for revectoring; used when ISRs at different 302 * IPLs share an irq. 303 */ 304 static lock_t apic_revector_lock; 305 int apic_revector_pending = 0; 306 static uchar_t *apic_oldvec_to_newvec; 307 static uchar_t *apic_newvec_to_oldvec; 308 309 typedef struct prs_irq_list_ent { 310 int list_prio; 311 int32_t irq; 312 iflag_t intrflags; 313 acpi_prs_private_t prsprv; 314 struct prs_irq_list_ent *next; 315 } prs_irq_list_t; 316 317 318 /* 319 * ACPI variables 320 */ 321 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 322 int apic_enable_acpi = 0; 323 324 /* ACPI Multiple APIC Description Table ptr */ 325 static ACPI_TABLE_MADT *acpi_mapic_dtp = NULL; 326 327 /* ACPI Interrupt Source Override Structure ptr */ 328 static ACPI_MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 329 static int acpi_iso_cnt = 0; 330 331 /* ACPI Non-maskable Interrupt Sources ptr */ 332 static ACPI_MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 333 static int acpi_nmi_scnt = 0; 334 static ACPI_MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 335 static int acpi_nmi_ccnt = 0; 336 337 /* 338 * The following added to identify a software poweroff method if available. 339 */ 340 341 static struct { 342 int poweroff_method; 343 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 344 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 345 } apic_mps_ids[] = { 346 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 347 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 348 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 349 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 350 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 351 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 352 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 353 }; 354 355 int apic_poweroff_method = APIC_POWEROFF_NONE; 356 357 /* 358 * Auto-configuration routines 359 */ 360 361 /* 362 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 363 * May work with 1.1 - but not guaranteed. 364 * According to the MP Spec, the MP floating pointer structure 365 * will be searched in the order described below: 366 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 367 * 2. Within the last kilobyte of system base memory 368 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 369 * Once we find the right signature with proper checksum, we call 370 * either handle_defconf or parse_mpct to get all info necessary for 371 * subsequent operations. 372 */ 373 int 374 apic_probe_common(char *modname) 375 { 376 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 377 caddr_t biosdatap; 378 caddr_t mpct; 379 caddr_t fptr; 380 int i, mpct_size, mapsize, retval = PSM_FAILURE; 381 ushort_t ebda_seg, base_mem_size; 382 struct apic_mpfps_hdr *fpsp; 383 struct apic_mp_cnf_hdr *hdrp; 384 int bypass_cpu_and_ioapics_in_mptables; 385 int acpi_user_options; 386 387 if (apic_forceload < 0) 388 return (retval); 389 390 /* 391 * Remember who we are 392 */ 393 psm_name = modname; 394 395 /* Allow override for MADT-only mode */ 396 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 397 "acpi-user-options", 0); 398 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 399 400 /* Allow apic_use_acpi to override MADT-only mode */ 401 if (!apic_use_acpi) 402 apic_use_acpi_madt_only = 0; 403 404 retval = acpi_probe(modname); 405 406 /* 407 * mapin the bios data area 40:0 408 * 40:13h - two-byte location reports the base memory size 409 * 40:0Eh - two-byte location for the exact starting address of 410 * the EBDA segment for EISA 411 */ 412 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 413 if (!biosdatap) 414 return (retval); 415 fpsp = (struct apic_mpfps_hdr *)NULL; 416 mapsize = MPFPS_RAM_WIN_LEN; 417 /*LINTED: pointer cast may result in improper alignment */ 418 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 419 /* check the 1k of EBDA */ 420 if (ebda_seg) { 421 ebda_start = ((uint32_t)ebda_seg) << 4; 422 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 423 if (fptr) { 424 if (!(fpsp = 425 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 426 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 427 } 428 } 429 /* If not in EBDA, check the last k of system base memory */ 430 if (!fpsp) { 431 /*LINTED: pointer cast may result in improper alignment */ 432 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 433 434 if (base_mem_size > 512) 435 base_mem_end = 639 * 1024; 436 else 437 base_mem_end = 511 * 1024; 438 /* if ebda == last k of base mem, skip to check BIOS ROM */ 439 if (base_mem_end != ebda_start) { 440 441 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 442 PROT_READ); 443 444 if (fptr) { 445 if (!(fpsp = apic_find_fps_sig(fptr, 446 MPFPS_RAM_WIN_LEN))) 447 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 448 } 449 } 450 } 451 psm_unmap_phys(biosdatap, 0x20); 452 453 /* If still cannot find it, check the BIOS ROM space */ 454 if (!fpsp) { 455 mapsize = MPFPS_ROM_WIN_LEN; 456 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 457 MPFPS_ROM_WIN_LEN, PROT_READ); 458 if (fptr) { 459 if (!(fpsp = 460 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 461 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 462 return (retval); 463 } 464 } 465 } 466 467 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 468 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 469 return (retval); 470 } 471 472 apic_spec_rev = fpsp->mpfps_spec_rev; 473 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 474 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 475 return (retval); 476 } 477 478 /* check IMCR is present or not */ 479 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 480 481 /* check default configuration (dual CPUs) */ 482 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 483 psm_unmap_phys(fptr, mapsize); 484 return (apic_handle_defconf()); 485 } 486 487 /* MP Configuration Table */ 488 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 489 490 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 491 492 /* 493 * Map in enough memory for the MP Configuration Table Header. 494 * Use this table to read the total length of the BIOS data and 495 * map in all the info 496 */ 497 /*LINTED: pointer cast may result in improper alignment */ 498 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 499 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 500 if (!hdrp) 501 return (retval); 502 503 /* check mp configuration table signature PCMP */ 504 if (hdrp->mpcnf_sig != 0x504d4350) { 505 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 506 return (retval); 507 } 508 mpct_size = (int)hdrp->mpcnf_tbl_length; 509 510 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 511 512 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 513 514 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 515 /* This is an ACPI machine No need for further checks */ 516 return (retval); 517 } 518 519 /* 520 * Map in the entries for this machine, ie. Processor 521 * Entry Tables, Bus Entry Tables, etc. 522 * They are in fixed order following one another 523 */ 524 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 525 if (!mpct) 526 return (retval); 527 528 if (apic_checksum(mpct, mpct_size) != 0) 529 goto apic_fail1; 530 531 532 /*LINTED: pointer cast may result in improper alignment */ 533 hdrp = (struct apic_mp_cnf_hdr *)mpct; 534 apicadr = (uint32_t *)mapin_apic((uint32_t)hdrp->mpcnf_local_apic, 535 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 536 if (!apicadr) 537 goto apic_fail1; 538 539 /* Parse all information in the tables */ 540 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 541 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 542 PSM_SUCCESS) 543 return (PSM_SUCCESS); 544 545 for (i = 0; i < apic_io_max; i++) 546 mapout_ioapic((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 547 if (apic_cpus) 548 kmem_free(apic_cpus, apic_cpus_size); 549 if (apicadr) 550 mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 551 apic_fail1: 552 psm_unmap_phys(mpct, mpct_size); 553 return (retval); 554 } 555 556 static void 557 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 558 { 559 int i; 560 561 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 562 i++) { 563 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 564 strlen(apic_mps_ids[i].oem_id)) == 0) && 565 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 566 strlen(apic_mps_ids[i].prod_id)) == 0)) { 567 568 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 569 break; 570 } 571 } 572 573 if (apic_debug_mps_id != 0) { 574 cmn_err(CE_CONT, "%s: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 575 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 576 psm_name, 577 hdrp->mpcnf_oem_str[0], 578 hdrp->mpcnf_oem_str[1], 579 hdrp->mpcnf_oem_str[2], 580 hdrp->mpcnf_oem_str[3], 581 hdrp->mpcnf_oem_str[4], 582 hdrp->mpcnf_oem_str[5], 583 hdrp->mpcnf_oem_str[6], 584 hdrp->mpcnf_oem_str[7], 585 hdrp->mpcnf_prod_str[0], 586 hdrp->mpcnf_prod_str[1], 587 hdrp->mpcnf_prod_str[2], 588 hdrp->mpcnf_prod_str[3], 589 hdrp->mpcnf_prod_str[4], 590 hdrp->mpcnf_prod_str[5], 591 hdrp->mpcnf_prod_str[6], 592 hdrp->mpcnf_prod_str[7], 593 hdrp->mpcnf_prod_str[8], 594 hdrp->mpcnf_prod_str[9], 595 hdrp->mpcnf_prod_str[10], 596 hdrp->mpcnf_prod_str[11]); 597 } 598 } 599 600 static int 601 acpi_probe(char *modname) 602 { 603 int i, intmax, index; 604 uint32_t id, ver; 605 int acpi_verboseflags = 0; 606 int madt_seen, madt_size; 607 ACPI_SUBTABLE_HEADER *ap; 608 ACPI_MADT_LOCAL_APIC *mpa; 609 ACPI_MADT_LOCAL_X2APIC *mpx2a; 610 ACPI_MADT_IO_APIC *mia; 611 ACPI_MADT_IO_SAPIC *misa; 612 ACPI_MADT_INTERRUPT_OVERRIDE *mio; 613 ACPI_MADT_NMI_SOURCE *mns; 614 ACPI_MADT_INTERRUPT_SOURCE *mis; 615 ACPI_MADT_LOCAL_APIC_NMI *mlan; 616 ACPI_MADT_LOCAL_X2APIC_NMI *mx2alan; 617 ACPI_MADT_LOCAL_APIC_OVERRIDE *mao; 618 int sci; 619 iflag_t sci_flags; 620 volatile uint32_t *ioapic; 621 int ioapic_ix; 622 uint32_t local_ids[NCPU]; 623 uint32_t proc_ids[NCPU]; 624 uchar_t hid; 625 int warned = 0; 626 627 if (!apic_use_acpi) 628 return (PSM_FAILURE); 629 630 if (AcpiGetTable(ACPI_SIG_MADT, 1, 631 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 632 return (PSM_FAILURE); 633 634 apicadr = mapin_apic((uint32_t)acpi_mapic_dtp->Address, 635 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 636 if (!apicadr) 637 return (PSM_FAILURE); 638 639 id = apic_reg_ops->apic_read(APIC_LID_REG); 640 local_ids[0] = (uchar_t)(id >> 24); 641 apic_nproc = index = 1; 642 CPUSET_ONLY(apic_cpumask, 0); 643 apic_io_max = 0; 644 645 ap = (ACPI_SUBTABLE_HEADER *) (acpi_mapic_dtp + 1); 646 madt_size = acpi_mapic_dtp->Header.Length; 647 madt_seen = sizeof (*acpi_mapic_dtp); 648 649 while (madt_seen < madt_size) { 650 switch (ap->Type) { 651 case ACPI_MADT_TYPE_LOCAL_APIC: 652 mpa = (ACPI_MADT_LOCAL_APIC *) ap; 653 if (mpa->LapicFlags & ACPI_MADT_ENABLED) { 654 if (mpa->Id == local_ids[0]) { 655 proc_ids[0] = mpa->ProcessorId; 656 acpica_map_cpu(0, mpa->ProcessorId); 657 } else if (apic_nproc < NCPU && use_mp && 658 apic_nproc < boot_ncpus) { 659 local_ids[index] = mpa->Id; 660 proc_ids[index] = mpa->ProcessorId; 661 CPUSET_ADD(apic_cpumask, index); 662 acpica_map_cpu(index, mpa->ProcessorId); 663 index++; 664 apic_nproc++; 665 } else if (apic_nproc == NCPU && !warned) { 666 cmn_err(CE_WARN, "%s: CPU limit " 667 "exceeded" 668 #if !defined(__amd64) 669 " for 32-bit mode" 670 #endif 671 "; Solaris will use %d CPUs.", 672 psm_name, NCPU); 673 warned = 1; 674 } 675 } 676 break; 677 678 case ACPI_MADT_TYPE_IO_APIC: 679 mia = (ACPI_MADT_IO_APIC *) ap; 680 if (apic_io_max < MAX_IO_APIC) { 681 ioapic_ix = apic_io_max; 682 apic_io_id[apic_io_max] = mia->Id; 683 apic_io_vectbase[apic_io_max] = 684 mia->GlobalIrqBase; 685 apic_physaddr[apic_io_max] = 686 (uint32_t)mia->Address; 687 ioapic = apicioadr[apic_io_max] = 688 mapin_ioapic((uint32_t)mia->Address, 689 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 690 if (!ioapic) 691 goto cleanup; 692 ioapic_mask_workaround[apic_io_max] = 693 apic_is_ioapic_AMD_813x(mia->Address); 694 apic_io_max++; 695 } 696 break; 697 698 case ACPI_MADT_TYPE_INTERRUPT_OVERRIDE: 699 mio = (ACPI_MADT_INTERRUPT_OVERRIDE *) ap; 700 if (acpi_isop == NULL) 701 acpi_isop = mio; 702 acpi_iso_cnt++; 703 break; 704 705 case ACPI_MADT_TYPE_NMI_SOURCE: 706 /* UNIMPLEMENTED */ 707 mns = (ACPI_MADT_NMI_SOURCE *) ap; 708 if (acpi_nmi_sp == NULL) 709 acpi_nmi_sp = mns; 710 acpi_nmi_scnt++; 711 712 cmn_err(CE_NOTE, "!apic: nmi source: %d 0x%x\n", 713 mns->GlobalIrq, mns->IntiFlags); 714 break; 715 716 case ACPI_MADT_TYPE_LOCAL_APIC_NMI: 717 /* UNIMPLEMENTED */ 718 mlan = (ACPI_MADT_LOCAL_APIC_NMI *) ap; 719 if (acpi_nmi_cp == NULL) 720 acpi_nmi_cp = mlan; 721 acpi_nmi_ccnt++; 722 723 cmn_err(CE_NOTE, "!apic: local nmi: %d 0x%x %d\n", 724 mlan->ProcessorId, mlan->IntiFlags, 725 mlan->Lint); 726 break; 727 728 case ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE: 729 /* UNIMPLEMENTED */ 730 mao = (ACPI_MADT_LOCAL_APIC_OVERRIDE *) ap; 731 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 732 (long)mao->Address); 733 break; 734 735 case ACPI_MADT_TYPE_IO_SAPIC: 736 /* UNIMPLEMENTED */ 737 misa = (ACPI_MADT_IO_SAPIC *) ap; 738 739 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 740 misa->Id, misa->GlobalIrqBase, 741 (long)misa->Address); 742 break; 743 744 case ACPI_MADT_TYPE_INTERRUPT_SOURCE: 745 /* UNIMPLEMENTED */ 746 mis = (ACPI_MADT_INTERRUPT_SOURCE *) ap; 747 748 cmn_err(CE_NOTE, 749 "!apic: irq source: %d %d %d 0x%x %d %d\n", 750 mis->Id, mis->Eid, mis->GlobalIrq, 751 mis->IntiFlags, mis->Type, 752 mis->IoSapicVector); 753 break; 754 755 case ACPI_MADT_TYPE_LOCAL_X2APIC: 756 mpx2a = (ACPI_MADT_LOCAL_X2APIC *) ap; 757 758 /* 759 * All logical processors with APIC ID values 760 * of 255 and greater will have their APIC 761 * reported through Processor X2APIC structure. 762 * All logical processors with APIC ID less than 763 * 255 will have their APIC reported through 764 * Processor Local APIC. 765 */ 766 if ((mpx2a->LapicFlags & ACPI_MADT_ENABLED) && 767 (mpx2a->LocalApicId >> 8)) { 768 if (apic_nproc < NCPU && use_mp && 769 apic_nproc < boot_ncpus) { 770 local_ids[index] = mpx2a->LocalApicId; 771 CPUSET_ADD(apic_cpumask, index); 772 acpica_map_cpu(index, mpx2a->Uid); 773 index++; 774 apic_nproc++; 775 } else if (apic_nproc == NCPU && !warned) { 776 cmn_err(CE_WARN, "%s: CPU limit " 777 "exceeded" 778 #if !defined(__amd64) 779 " for 32-bit mode" 780 #endif 781 "; Solaris will use %d CPUs.", 782 psm_name, NCPU); 783 warned = 1; 784 } 785 } 786 787 break; 788 789 case ACPI_MADT_TYPE_LOCAL_X2APIC_NMI: 790 /* UNIMPLEMENTED */ 791 mx2alan = (ACPI_MADT_LOCAL_X2APIC_NMI *) ap; 792 if (mx2alan->Uid >> 8) 793 acpi_nmi_ccnt++; 794 795 #ifdef DEBUG 796 cmn_err(CE_NOTE, 797 "!apic: local x2apic nmi: %d 0x%x %d\n", 798 mx2alan->Uid, mx2alan->IntiFlags, mx2alan->Lint); 799 #endif 800 801 break; 802 803 case ACPI_MADT_TYPE_RESERVED: 804 default: 805 break; 806 } 807 808 /* advance to next entry */ 809 madt_seen += ap->Length; 810 ap = (ACPI_SUBTABLE_HEADER *)(((char *)ap) + ap->Length); 811 } 812 813 apic_cpus_size = apic_nproc * sizeof (*apic_cpus); 814 if ((apic_cpus = kmem_zalloc(apic_cpus_size, KM_NOSLEEP)) == NULL) 815 goto cleanup; 816 817 /* 818 * ACPI doesn't provide the local apic ver, get it directly from the 819 * local apic 820 */ 821 ver = apic_reg_ops->apic_read(APIC_VERS_REG); 822 for (i = 0; i < apic_nproc; i++) { 823 apic_cpus[i].aci_local_id = local_ids[i]; 824 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 825 } 826 827 for (i = 0; i < apic_io_max; i++) { 828 ioapic_ix = i; 829 830 /* 831 * need to check Sitka on the following acpi problem 832 * On the Sitka, the ioapic's apic_id field isn't reporting 833 * the actual io apic id. We have reported this problem 834 * to Intel. Until they fix the problem, we will get the 835 * actual id directly from the ioapic. 836 */ 837 id = ioapic_read(ioapic_ix, APIC_ID_CMD); 838 hid = (uchar_t)(id >> 24); 839 840 if (hid != apic_io_id[i]) { 841 if (apic_io_id[i] == 0) 842 apic_io_id[i] = hid; 843 else { /* set ioapic id to whatever reported by ACPI */ 844 id = ((uint32_t)apic_io_id[i]) << 24; 845 ioapic_write(ioapic_ix, APIC_ID_CMD, id); 846 } 847 } 848 ver = ioapic_read(ioapic_ix, APIC_VERS_CMD); 849 apic_io_ver[i] = (uchar_t)(ver & 0xff); 850 intmax = (ver >> 16) & 0xff; 851 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 852 if (apic_first_avail_irq <= apic_io_vectend[i]) 853 apic_first_avail_irq = apic_io_vectend[i] + 1; 854 } 855 856 857 /* 858 * Process SCI configuration here 859 * An error may be returned here if 860 * acpi-user-options specifies legacy mode 861 * (no SCI, no ACPI mode) 862 */ 863 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 864 sci = -1; 865 866 /* 867 * Now call acpi_init() to generate namespaces 868 * If this fails, we don't attempt to use ACPI 869 * even if we were able to get a MADT above 870 */ 871 if (acpica_init() != AE_OK) 872 goto cleanup; 873 874 /* 875 * Call acpica_build_processor_map() now that we have 876 * ACPI namesspace access 877 */ 878 acpica_build_processor_map(); 879 880 /* 881 * Squirrel away the SCI and flags for later on 882 * in apic_picinit() when we're ready 883 */ 884 apic_sci_vect = sci; 885 apic_sci_flags = sci_flags; 886 887 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 888 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 889 890 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 891 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 892 893 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 894 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 895 896 if (acpi_psm_init(modname, acpi_verboseflags) == ACPI_PSM_FAILURE) 897 goto cleanup; 898 899 /* Enable ACPI APIC interrupt routing */ 900 if (apic_acpi_enter_apicmode() != PSM_FAILURE) { 901 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 902 apic_enable_acpi = 1; 903 if (apic_sci_vect > 0) { 904 acpica_set_core_feature(ACPI_FEATURE_SCI_EVENT); 905 } 906 if (apic_use_acpi_madt_only) { 907 cmn_err(CE_CONT, 908 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 909 } 910 911 #if !defined(__xpv) 912 /* 913 * probe ACPI for hpet information here which is used later 914 * in apic_picinit(). 915 */ 916 if (hpet_acpi_init(&apic_hpet_vect, &apic_hpet_flags) < 0) { 917 cmn_err(CE_NOTE, "!ACPI HPET table query failed\n"); 918 } 919 #endif 920 921 return (PSM_SUCCESS); 922 } 923 /* if setting APIC mode failed above, we fall through to cleanup */ 924 925 cleanup: 926 if (apicadr != NULL) { 927 mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 928 apicadr = NULL; 929 } 930 apic_nproc = 0; 931 for (i = 0; i < apic_io_max; i++) { 932 mapout_ioapic((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 933 apicioadr[i] = NULL; 934 } 935 apic_io_max = 0; 936 acpi_isop = NULL; 937 acpi_iso_cnt = 0; 938 acpi_nmi_sp = NULL; 939 acpi_nmi_scnt = 0; 940 acpi_nmi_cp = NULL; 941 acpi_nmi_ccnt = 0; 942 return (PSM_FAILURE); 943 } 944 945 /* 946 * Handle default configuration. Fill in reqd global variables & tables 947 * Fill all details as MP table does not give any more info 948 */ 949 static int 950 apic_handle_defconf() 951 { 952 uint_t lid; 953 954 /*LINTED: pointer cast may result in improper alignment */ 955 apicioadr[0] = mapin_ioapic(APIC_IO_ADDR, 956 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 957 /*LINTED: pointer cast may result in improper alignment */ 958 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 959 APIC_LOCAL_MEMLEN, PROT_READ); 960 apic_cpus_size = 2 * sizeof (*apic_cpus); 961 apic_cpus = (apic_cpus_info_t *) 962 kmem_zalloc(apic_cpus_size, KM_NOSLEEP); 963 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 964 goto apic_handle_defconf_fail; 965 CPUSET_ONLY(apic_cpumask, 0); 966 CPUSET_ADD(apic_cpumask, 1); 967 apic_nproc = 2; 968 lid = apic_reg_ops->apic_read(APIC_LID_REG); 969 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 970 /* 971 * According to the PC+MP spec 1.1, the local ids 972 * for the default configuration has to be 0 or 1 973 */ 974 if (apic_cpus[0].aci_local_id == 1) 975 apic_cpus[1].aci_local_id = 0; 976 else if (apic_cpus[0].aci_local_id == 0) 977 apic_cpus[1].aci_local_id = 1; 978 else 979 goto apic_handle_defconf_fail; 980 981 apic_io_id[0] = 2; 982 apic_io_max = 1; 983 if (apic_defconf >= 5) { 984 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 985 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 986 apic_io_ver[0] = APIC_INTEGRATED_VERS; 987 } else { 988 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 989 apic_cpus[1].aci_local_ver = 0; 990 apic_io_ver[0] = 0; 991 } 992 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 993 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 994 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 995 return (PSM_SUCCESS); 996 997 apic_handle_defconf_fail: 998 if (apic_cpus) 999 kmem_free(apic_cpus, apic_cpus_size); 1000 if (apicadr) 1001 mapout_apic((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1002 if (apicioadr[0]) 1003 mapout_ioapic((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1004 return (PSM_FAILURE); 1005 } 1006 1007 /* Parse the entries in MP configuration table and collect info that we need */ 1008 static int 1009 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1010 { 1011 struct apic_procent *procp; 1012 struct apic_bus *busp; 1013 struct apic_io_entry *ioapicp; 1014 struct apic_io_intr *intrp; 1015 int ioapic_ix; 1016 uint_t lid; 1017 uint32_t id; 1018 uchar_t hid; 1019 int warned = 0; 1020 1021 /*LINTED: pointer cast may result in improper alignment */ 1022 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1023 1024 /* No need to count cpu entries if we won't use them */ 1025 if (!bypass_cpus_and_ioapics) { 1026 1027 /* Find max # of CPUS and allocate structure accordingly */ 1028 apic_nproc = 0; 1029 CPUSET_ZERO(apic_cpumask); 1030 while (procp->proc_entry == APIC_CPU_ENTRY) { 1031 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1032 if (apic_nproc < NCPU && use_mp && 1033 apic_nproc < boot_ncpus) { 1034 CPUSET_ADD(apic_cpumask, apic_nproc); 1035 apic_nproc++; 1036 } else if (apic_nproc == NCPU && !warned) { 1037 cmn_err(CE_WARN, "%s: CPU limit " 1038 "exceeded" 1039 #if !defined(__amd64) 1040 " for 32-bit mode" 1041 #endif 1042 "; Solaris will use %d CPUs.", 1043 psm_name, NCPU); 1044 warned = 1; 1045 } 1046 1047 } 1048 procp++; 1049 } 1050 apic_cpus_size = apic_nproc * sizeof (*apic_cpus); 1051 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1052 kmem_zalloc(apic_cpus_size, KM_NOSLEEP))) 1053 return (PSM_FAILURE); 1054 } 1055 1056 /*LINTED: pointer cast may result in improper alignment */ 1057 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1058 1059 /* 1060 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1061 * if we're bypassing this information, it has already been filled 1062 * in by acpi_probe(), so don't overwrite it. 1063 */ 1064 if (!bypass_cpus_and_ioapics) 1065 apic_nproc = 1; 1066 1067 while (procp->proc_entry == APIC_CPU_ENTRY) { 1068 /* check whether the cpu exists or not */ 1069 if (!bypass_cpus_and_ioapics && 1070 procp->proc_cpuflags & CPUFLAGS_EN) { 1071 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1072 lid = apic_reg_ops->apic_read(APIC_LID_REG); 1073 apic_cpus[0].aci_local_id = procp->proc_apicid; 1074 if (apic_cpus[0].aci_local_id != 1075 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1076 return (PSM_FAILURE); 1077 } 1078 apic_cpus[0].aci_local_ver = 1079 procp->proc_version; 1080 } else if (apic_nproc < NCPU && use_mp && 1081 apic_nproc < boot_ncpus) { 1082 apic_cpus[apic_nproc].aci_local_id = 1083 procp->proc_apicid; 1084 1085 apic_cpus[apic_nproc].aci_local_ver = 1086 procp->proc_version; 1087 apic_nproc++; 1088 1089 } 1090 } 1091 procp++; 1092 } 1093 1094 /* 1095 * Save start of bus entries for later use. 1096 * Get EISA level cntrl if EISA bus is present. 1097 * Also get the CPI bus id for single CPI bus case 1098 */ 1099 apic_busp = busp = (struct apic_bus *)procp; 1100 while (busp->bus_entry == APIC_BUS_ENTRY) { 1101 lid = apic_find_bus_type((char *)&busp->bus_str1); 1102 if (lid == BUS_EISA) { 1103 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1104 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1105 } else if (lid == BUS_PCI) { 1106 /* 1107 * apic_single_pci_busid will be used only if 1108 * apic_pic_bus_total is equal to 1 1109 */ 1110 apic_pci_bus_total++; 1111 apic_single_pci_busid = busp->bus_id; 1112 } 1113 busp++; 1114 } 1115 1116 ioapicp = (struct apic_io_entry *)busp; 1117 1118 if (!bypass_cpus_and_ioapics) 1119 apic_io_max = 0; 1120 do { 1121 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1122 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1123 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1124 apic_io_ver[apic_io_max] = ioapicp->io_version; 1125 /*LINTED: pointer cast may result in improper alignment */ 1126 apicioadr[apic_io_max] = 1127 mapin_ioapic( 1128 (uint32_t)ioapicp->io_apic_addr, 1129 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1130 1131 if (!apicioadr[apic_io_max]) 1132 return (PSM_FAILURE); 1133 1134 ioapic_mask_workaround[apic_io_max] = 1135 apic_is_ioapic_AMD_813x( 1136 ioapicp->io_apic_addr); 1137 1138 ioapic_ix = apic_io_max; 1139 id = ioapic_read(ioapic_ix, APIC_ID_CMD); 1140 hid = (uchar_t)(id >> 24); 1141 1142 if (hid != apic_io_id[apic_io_max]) { 1143 if (apic_io_id[apic_io_max] == 0) 1144 apic_io_id[apic_io_max] = hid; 1145 else { 1146 /* 1147 * set ioapic id to whatever 1148 * reported by MPS 1149 * 1150 * may not need to set index 1151 * again ??? 1152 * take it out and try 1153 */ 1154 1155 id = ((uint32_t) 1156 apic_io_id[apic_io_max]) << 1157 24; 1158 1159 ioapic_write(ioapic_ix, 1160 APIC_ID_CMD, id); 1161 } 1162 } 1163 apic_io_max++; 1164 } 1165 } 1166 ioapicp++; 1167 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1168 1169 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1170 1171 intrp = apic_io_intrp; 1172 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1173 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1174 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1175 apic_irq_translate = 1; 1176 break; 1177 } 1178 intrp++; 1179 } 1180 1181 return (PSM_SUCCESS); 1182 } 1183 1184 boolean_t 1185 apic_cpu_in_range(int cpu) 1186 { 1187 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1188 } 1189 1190 uint16_t 1191 apic_get_apic_version() 1192 { 1193 int i; 1194 uchar_t min_io_apic_ver = 0; 1195 static uint16_t version; /* Cache as value is constant */ 1196 static boolean_t found = B_FALSE; /* Accomodate zero version */ 1197 1198 if (found == B_FALSE) { 1199 found = B_TRUE; 1200 1201 /* 1202 * Don't assume all IO APICs in the system are the same. 1203 * 1204 * Set to the minimum version. 1205 */ 1206 for (i = 0; i < apic_io_max; i++) { 1207 if ((apic_io_ver[i] != 0) && 1208 ((min_io_apic_ver == 0) || 1209 (min_io_apic_ver >= apic_io_ver[i]))) 1210 min_io_apic_ver = apic_io_ver[i]; 1211 } 1212 1213 /* Assume all local APICs are of the same version. */ 1214 version = (min_io_apic_ver << 8) | apic_cpus[0].aci_local_ver; 1215 } 1216 return (version); 1217 } 1218 1219 static struct apic_mpfps_hdr * 1220 apic_find_fps_sig(caddr_t cptr, int len) 1221 { 1222 int i; 1223 1224 /* Look for the pattern "_MP_" */ 1225 for (i = 0; i < len; i += 16) { 1226 if ((*(cptr+i) == '_') && 1227 (*(cptr+i+1) == 'M') && 1228 (*(cptr+i+2) == 'P') && 1229 (*(cptr+i+3) == '_')) 1230 /*LINTED: pointer cast may result in improper alignment */ 1231 return ((struct apic_mpfps_hdr *)(cptr + i)); 1232 } 1233 return (NULL); 1234 } 1235 1236 static int 1237 apic_checksum(caddr_t bptr, int len) 1238 { 1239 int i; 1240 uchar_t cksum; 1241 1242 cksum = 0; 1243 for (i = 0; i < len; i++) 1244 cksum += *bptr++; 1245 return ((int)cksum); 1246 } 1247 1248 1249 /* 1250 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1251 * are also set to NULL. vector->irq is set to a value which cannot map 1252 * to a real irq to show that it is free. 1253 */ 1254 void 1255 apic_init_common() 1256 { 1257 int i, j, indx; 1258 int *iptr; 1259 1260 /* 1261 * Initialize apic_ipls from apic_vectortoipl. This array is 1262 * used in apic_intr_enter to determine the IPL to use for the 1263 * corresponding vector. On some systems, due to hardware errata 1264 * and interrupt sharing, the IPL may not correspond to the IPL listed 1265 * in apic_vectortoipl (see apic_addspl and apic_delspl). 1266 */ 1267 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1268 indx = i * APIC_VECTOR_PER_IPL; 1269 1270 for (j = 0; j < APIC_VECTOR_PER_IPL; j++, indx++) 1271 apic_ipls[indx] = apic_vectortoipl[i]; 1272 } 1273 1274 /* cpu 0 is always up (for now) */ 1275 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1276 1277 iptr = (int *)&apic_irq_table[0]; 1278 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1279 apic_level_intr[i] = 0; 1280 *iptr++ = NULL; 1281 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1282 1283 /* These *must* be initted to B_TRUE! */ 1284 apic_reprogram_info[i].done = B_TRUE; 1285 apic_reprogram_info[i].irqp = NULL; 1286 apic_reprogram_info[i].tries = 0; 1287 apic_reprogram_info[i].bindcpu = 0; 1288 } 1289 1290 /* 1291 * Allocate a dummy irq table entry for the reserved entry. 1292 * This takes care of the race between removing an irq and 1293 * clock detecting a CPU in that irq during interrupt load 1294 * sampling. 1295 */ 1296 apic_irq_table[APIC_RESV_IRQ] = 1297 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1298 1299 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1300 } 1301 1302 void 1303 ioapic_init_intr(int mask_apic) 1304 { 1305 int ioapic_ix; 1306 struct intrspec ispec; 1307 apic_irq_t *irqptr; 1308 int i, j; 1309 ulong_t iflag; 1310 1311 LOCK_INIT_CLEAR(&apic_revector_lock); 1312 LOCK_INIT_CLEAR(&apic_defer_reprogram_lock); 1313 1314 /* mask interrupt vectors */ 1315 for (j = 0; j < apic_io_max && mask_apic; j++) { 1316 int intin_max; 1317 1318 ioapic_ix = j; 1319 /* Bits 23-16 define the maximum redirection entries */ 1320 intin_max = (ioapic_read(ioapic_ix, APIC_VERS_CMD) >> 16) 1321 & 0xff; 1322 for (i = 0; i <= intin_max; i++) 1323 ioapic_write(ioapic_ix, APIC_RDT_CMD + 2 * i, AV_MASK); 1324 } 1325 1326 /* 1327 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1328 */ 1329 if (apic_sci_vect > 0) { 1330 /* 1331 * acpica has already done add_avintr(); we just 1332 * to finish the job by mimicing translate_irq() 1333 * 1334 * Fake up an intrspec and setup the tables 1335 */ 1336 ispec.intrspec_vec = apic_sci_vect; 1337 ispec.intrspec_pri = SCI_IPL; 1338 1339 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1340 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1341 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1342 return; 1343 } 1344 irqptr = apic_irq_table[apic_sci_vect]; 1345 1346 iflag = intr_clear(); 1347 lock_set(&apic_ioapic_lock); 1348 1349 /* Program I/O APIC */ 1350 (void) apic_setup_io_intr(irqptr, apic_sci_vect, B_FALSE); 1351 1352 lock_clear(&apic_ioapic_lock); 1353 intr_restore(iflag); 1354 1355 irqptr->airq_share++; 1356 } 1357 1358 #if !defined(__xpv) 1359 /* 1360 * Hack alert: deal with ACPI HPET interrupt chicken/egg here. 1361 */ 1362 if (apic_hpet_vect > 0) { 1363 /* 1364 * hpet has already done add_avintr(); we just need 1365 * to finish the job by mimicing translate_irq() 1366 * 1367 * Fake up an intrspec and setup the tables 1368 */ 1369 ispec.intrspec_vec = apic_hpet_vect; 1370 ispec.intrspec_pri = CBE_HIGH_PIL; 1371 1372 if (apic_setup_irq_table(NULL, apic_hpet_vect, NULL, 1373 &ispec, &apic_hpet_flags, DDI_INTR_TYPE_FIXED) < 0) { 1374 cmn_err(CE_WARN, "!apic: HPET setup failed"); 1375 return; 1376 } 1377 irqptr = apic_irq_table[apic_hpet_vect]; 1378 1379 iflag = intr_clear(); 1380 lock_set(&apic_ioapic_lock); 1381 1382 /* Program I/O APIC */ 1383 (void) apic_setup_io_intr(irqptr, apic_hpet_vect, B_FALSE); 1384 1385 lock_clear(&apic_ioapic_lock); 1386 intr_restore(iflag); 1387 1388 irqptr->airq_share++; 1389 } 1390 #endif /* !defined(__xpv) */ 1391 } 1392 1393 /* 1394 * Add mask bits to disable interrupt vector from happening 1395 * at or above IPL. In addition, it should remove mask bits 1396 * to enable interrupt vectors below the given IPL. 1397 * 1398 * Both add and delspl are complicated by the fact that different interrupts 1399 * may share IRQs. This can happen in two ways. 1400 * 1. The same H/W line is shared by more than 1 device 1401 * 1a. with interrupts at different IPLs 1402 * 1b. with interrupts at same IPL 1403 * 2. We ran out of vectors at a given IPL and started sharing vectors. 1404 * 1b and 2 should be handled gracefully, except for the fact some ISRs 1405 * will get called often when no interrupt is pending for the device. 1406 * For 1a, we just hope that the machine blows up with the person who 1407 * set it up that way!. In the meantime, we handle it at the higher IPL. 1408 */ 1409 /*ARGSUSED*/ 1410 int 1411 apic_addspl_common(int irqno, int ipl, int min_ipl, int max_ipl) 1412 { 1413 uchar_t vector; 1414 ulong_t iflag; 1415 apic_irq_t *irqptr, *irqheadptr; 1416 int irqindex; 1417 1418 ASSERT(max_ipl <= UCHAR_MAX); 1419 irqindex = IRQINDEX(irqno); 1420 1421 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 1422 return (PSM_FAILURE); 1423 1424 mutex_enter(&airq_mutex); 1425 irqptr = irqheadptr = apic_irq_table[irqindex]; 1426 1427 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 1428 "vector=0x%x\n", (void *)irqptr->airq_dip, 1429 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 1430 1431 while (irqptr) { 1432 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 1433 break; 1434 irqptr = irqptr->airq_next; 1435 } 1436 irqptr->airq_share++; 1437 1438 mutex_exit(&airq_mutex); 1439 1440 /* return if it is not hardware interrupt */ 1441 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 1442 return (PSM_SUCCESS); 1443 1444 /* Or if there are more interupts at a higher IPL */ 1445 if (ipl != max_ipl) 1446 return (PSM_SUCCESS); 1447 1448 /* 1449 * if apic_picinit() has not been called yet, just return. 1450 * At the end of apic_picinit(), we will call setup_io_intr(). 1451 */ 1452 1453 if (!apic_picinit_called) 1454 return (PSM_SUCCESS); 1455 1456 /* 1457 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 1458 * return failure. Not very elegant, but then we hope the 1459 * machine will blow up with ... 1460 */ 1461 if (irqptr->airq_ipl != max_ipl && 1462 !ioapic_mask_workaround[irqptr->airq_ioapicindex]) { 1463 1464 vector = apic_allocate_vector(max_ipl, irqindex, 1); 1465 if (vector == 0) { 1466 irqptr->airq_share--; 1467 return (PSM_FAILURE); 1468 } 1469 irqptr = irqheadptr; 1470 apic_mark_vector(irqptr->airq_vector, vector); 1471 while (irqptr) { 1472 irqptr->airq_vector = vector; 1473 irqptr->airq_ipl = (uchar_t)max_ipl; 1474 /* 1475 * reprogram irq being added and every one else 1476 * who is not in the UNINIT state 1477 */ 1478 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 1479 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 1480 apic_record_rdt_entry(irqptr, irqindex); 1481 1482 iflag = intr_clear(); 1483 lock_set(&apic_ioapic_lock); 1484 1485 (void) apic_setup_io_intr(irqptr, irqindex, 1486 B_FALSE); 1487 1488 lock_clear(&apic_ioapic_lock); 1489 intr_restore(iflag); 1490 } 1491 irqptr = irqptr->airq_next; 1492 } 1493 return (PSM_SUCCESS); 1494 1495 } else if (irqptr->airq_ipl != max_ipl && 1496 ioapic_mask_workaround[irqptr->airq_ioapicindex]) { 1497 /* 1498 * We cannot upgrade the vector, but we can change 1499 * the IPL that this vector induces. 1500 * 1501 * Note that we subtract APIC_BASE_VECT from the vector 1502 * here because this array is used in apic_intr_enter 1503 * (no need to add APIC_BASE_VECT in that hot code 1504 * path since we can do it in the rarely-executed path 1505 * here). 1506 */ 1507 apic_ipls[irqptr->airq_vector - APIC_BASE_VECT] = 1508 (uchar_t)max_ipl; 1509 1510 irqptr = irqheadptr; 1511 while (irqptr) { 1512 irqptr->airq_ipl = (uchar_t)max_ipl; 1513 irqptr = irqptr->airq_next; 1514 } 1515 1516 return (PSM_SUCCESS); 1517 } 1518 1519 ASSERT(irqptr); 1520 1521 iflag = intr_clear(); 1522 lock_set(&apic_ioapic_lock); 1523 1524 (void) apic_setup_io_intr(irqptr, irqindex, B_FALSE); 1525 1526 lock_clear(&apic_ioapic_lock); 1527 intr_restore(iflag); 1528 1529 return (PSM_SUCCESS); 1530 } 1531 1532 /* 1533 * Recompute mask bits for the given interrupt vector. 1534 * If there is no interrupt servicing routine for this 1535 * vector, this function should disable interrupt vector 1536 * from happening at all IPLs. If there are still 1537 * handlers using the given vector, this function should 1538 * disable the given vector from happening below the lowest 1539 * IPL of the remaining hadlers. 1540 */ 1541 /*ARGSUSED*/ 1542 int 1543 apic_delspl_common(int irqno, int ipl, int min_ipl, int max_ipl) 1544 { 1545 uchar_t vector; 1546 uint32_t bind_cpu; 1547 int intin, irqindex; 1548 int ioapic_ix; 1549 apic_irq_t *irqptr, *irqheadptr, *irqp; 1550 ulong_t iflag; 1551 1552 mutex_enter(&airq_mutex); 1553 irqindex = IRQINDEX(irqno); 1554 irqptr = irqheadptr = apic_irq_table[irqindex]; 1555 1556 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 1557 "vector=0x%x\n", (void *)irqptr->airq_dip, 1558 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 1559 1560 while (irqptr) { 1561 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 1562 break; 1563 irqptr = irqptr->airq_next; 1564 } 1565 ASSERT(irqptr); 1566 1567 irqptr->airq_share--; 1568 1569 mutex_exit(&airq_mutex); 1570 1571 if (ipl < max_ipl) 1572 return (PSM_SUCCESS); 1573 1574 /* return if it is not hardware interrupt */ 1575 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 1576 return (PSM_SUCCESS); 1577 1578 if (!apic_picinit_called) { 1579 /* 1580 * Clear irq_struct. If two devices shared an intpt 1581 * line & 1 unloaded before picinit, we are hosed. But, then 1582 * we hope the machine will ... 1583 */ 1584 irqptr->airq_mps_intr_index = FREE_INDEX; 1585 irqptr->airq_temp_cpu = IRQ_UNINIT; 1586 apic_free_vector(irqptr->airq_vector); 1587 return (PSM_SUCCESS); 1588 } 1589 /* 1590 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 1591 * use old IPL. Not very elegant, but then we hope ... 1592 */ 1593 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL) && 1594 !ioapic_mask_workaround[irqptr->airq_ioapicindex]) { 1595 apic_irq_t *irqp; 1596 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 1597 apic_mark_vector(irqheadptr->airq_vector, vector); 1598 irqp = irqheadptr; 1599 while (irqp) { 1600 irqp->airq_vector = vector; 1601 irqp->airq_ipl = (uchar_t)max_ipl; 1602 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 1603 apic_record_rdt_entry(irqp, irqindex); 1604 1605 iflag = intr_clear(); 1606 lock_set(&apic_ioapic_lock); 1607 1608 (void) apic_setup_io_intr(irqp, 1609 irqindex, B_FALSE); 1610 1611 lock_clear(&apic_ioapic_lock); 1612 intr_restore(iflag); 1613 } 1614 irqp = irqp->airq_next; 1615 } 1616 } 1617 1618 } else if (irqptr->airq_ipl != max_ipl && 1619 max_ipl != PSM_INVALID_IPL && 1620 ioapic_mask_workaround[irqptr->airq_ioapicindex]) { 1621 1622 /* 1623 * We cannot downgrade the IPL of the vector below the vector's 1624 * hardware priority. If we did, it would be possible for a 1625 * higher-priority hardware vector to interrupt a CPU running at an IPL 1626 * lower than the hardware priority of the interrupting vector (but 1627 * higher than the soft IPL of this IRQ). When this happens, we would 1628 * then try to drop the IPL BELOW what it was (effectively dropping 1629 * below base_spl) which would be potentially catastrophic. 1630 * 1631 * (e.g. Suppose the hardware vector associated with this IRQ is 0x40 1632 * (hardware IPL of 4). Further assume that the old IPL of this IRQ 1633 * was 4, but the new IPL is 1. If we forced vector 0x40 to result in 1634 * an IPL of 1, it would be possible for the processor to be executing 1635 * at IPL 3 and for an interrupt to come in on vector 0x40, interrupting 1636 * the currently-executing ISR. When apic_intr_enter consults 1637 * apic_irqs[], it will return 1, bringing the IPL of the CPU down to 1 1638 * so even though the processor was running at IPL 4, an IPL 1 1639 * interrupt will have interrupted it, which must not happen)). 1640 * 1641 * Effectively, this means that the hardware priority corresponding to 1642 * the IRQ's IPL (in apic_ipls[]) cannot be lower than the vector's 1643 * hardware priority. 1644 * 1645 * (In the above example, then, after removal of the IPL 4 device's 1646 * interrupt handler, the new IPL will continue to be 4 because the 1647 * hardware priority that IPL 1 implies is lower than the hardware 1648 * priority of the vector used.) 1649 */ 1650 /* apic_ipls is indexed by vector, starting at APIC_BASE_VECT */ 1651 const int apic_ipls_index = irqptr->airq_vector - 1652 APIC_BASE_VECT; 1653 const int vect_inherent_hwpri = irqptr->airq_vector >> 1654 APIC_IPL_SHIFT; 1655 1656 /* 1657 * If there are still devices using this IRQ, determine the 1658 * new ipl to use. 1659 */ 1660 if (irqptr->airq_share) { 1661 int vect_desired_hwpri, hwpri; 1662 1663 ASSERT(max_ipl < MAXIPL); 1664 vect_desired_hwpri = apic_ipltopri[max_ipl] >> 1665 APIC_IPL_SHIFT; 1666 1667 /* 1668 * If the desired IPL's hardware priority is lower 1669 * than that of the vector, use the hardware priority 1670 * of the vector to determine the new IPL. 1671 */ 1672 hwpri = (vect_desired_hwpri < vect_inherent_hwpri) ? 1673 vect_inherent_hwpri : vect_desired_hwpri; 1674 1675 /* 1676 * Now, to get the right index for apic_vectortoipl, 1677 * we need to subtract APIC_BASE_VECT from the 1678 * hardware-vector-equivalent (in hwpri). Since hwpri 1679 * is already shifted, we shift APIC_BASE_VECT before 1680 * doing the subtraction. 1681 */ 1682 hwpri -= (APIC_BASE_VECT >> APIC_IPL_SHIFT); 1683 1684 ASSERT(hwpri >= 0); 1685 ASSERT(hwpri < MAXIPL); 1686 max_ipl = apic_vectortoipl[hwpri]; 1687 apic_ipls[apic_ipls_index] = max_ipl; 1688 1689 irqp = irqheadptr; 1690 while (irqp) { 1691 irqp->airq_ipl = (uchar_t)max_ipl; 1692 irqp = irqp->airq_next; 1693 } 1694 } else { 1695 /* 1696 * No more devices on this IRQ, so reset this vector's 1697 * element in apic_ipls to the original IPL for this 1698 * vector 1699 */ 1700 apic_ipls[apic_ipls_index] = 1701 apic_vectortoipl[vect_inherent_hwpri]; 1702 } 1703 } 1704 1705 if (irqptr->airq_share) 1706 return (PSM_SUCCESS); 1707 1708 iflag = intr_clear(); 1709 lock_set(&apic_ioapic_lock); 1710 1711 if (irqptr->airq_mps_intr_index == MSI_INDEX) { 1712 /* 1713 * Disable the MSI vector 1714 * Make sure we only disable on the last 1715 * of the multi-MSI support 1716 */ 1717 if (i_ddi_intr_get_current_nenables(irqptr->airq_dip) == 1) { 1718 apic_pci_msi_disable_mode(irqptr->airq_dip, 1719 DDI_INTR_TYPE_MSI); 1720 } 1721 } else if (irqptr->airq_mps_intr_index == MSIX_INDEX) { 1722 /* 1723 * Disable the MSI-X vector 1724 * needs to clear its mask and addr/data for each MSI-X 1725 */ 1726 apic_pci_msi_unconfigure(irqptr->airq_dip, DDI_INTR_TYPE_MSIX, 1727 irqptr->airq_origirq); 1728 /* 1729 * Make sure we only disable on the last MSI-X 1730 */ 1731 if (i_ddi_intr_get_current_nenables(irqptr->airq_dip) == 1) { 1732 apic_pci_msi_disable_mode(irqptr->airq_dip, 1733 DDI_INTR_TYPE_MSIX); 1734 } 1735 } else { 1736 /* 1737 * The assumption here is that this is safe, even for 1738 * systems with IOAPICs that suffer from the hardware 1739 * erratum because all devices have been quiesced before 1740 * they unregister their interrupt handlers. If that 1741 * assumption turns out to be false, this mask operation 1742 * can induce the same erratum result we're trying to 1743 * avoid. 1744 */ 1745 ioapic_ix = irqptr->airq_ioapicindex; 1746 intin = irqptr->airq_intin_no; 1747 ioapic_write(ioapic_ix, APIC_RDT_CMD + 2 * intin, AV_MASK); 1748 } 1749 1750 #if !defined(__xpv) 1751 apic_vt_ops->apic_intrr_free_entry(irqptr); 1752 #endif 1753 1754 if (max_ipl == PSM_INVALID_IPL) { 1755 ASSERT(irqheadptr == irqptr); 1756 bind_cpu = irqptr->airq_temp_cpu; 1757 if (((uint32_t)bind_cpu != IRQ_UNBOUND) && 1758 ((uint32_t)bind_cpu != IRQ_UNINIT)) { 1759 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 1760 if (bind_cpu & IRQ_USER_BOUND) { 1761 /* If hardbound, temp_cpu == cpu */ 1762 bind_cpu &= ~IRQ_USER_BOUND; 1763 apic_cpus[bind_cpu].aci_bound--; 1764 } else 1765 apic_cpus[bind_cpu].aci_temp_bound--; 1766 } 1767 irqptr->airq_temp_cpu = IRQ_UNINIT; 1768 irqptr->airq_mps_intr_index = FREE_INDEX; 1769 lock_clear(&apic_ioapic_lock); 1770 intr_restore(iflag); 1771 apic_free_vector(irqptr->airq_vector); 1772 return (PSM_SUCCESS); 1773 } 1774 lock_clear(&apic_ioapic_lock); 1775 intr_restore(iflag); 1776 1777 mutex_enter(&airq_mutex); 1778 if ((irqptr == apic_irq_table[irqindex])) { 1779 apic_irq_t *oldirqptr; 1780 /* Move valid irq entry to the head */ 1781 irqheadptr = oldirqptr = irqptr; 1782 irqptr = irqptr->airq_next; 1783 ASSERT(irqptr); 1784 while (irqptr) { 1785 if (irqptr->airq_mps_intr_index != FREE_INDEX) 1786 break; 1787 oldirqptr = irqptr; 1788 irqptr = irqptr->airq_next; 1789 } 1790 /* remove all invalid ones from the beginning */ 1791 apic_irq_table[irqindex] = irqptr; 1792 /* 1793 * and link them back after the head. The invalid ones 1794 * begin with irqheadptr and end at oldirqptr 1795 */ 1796 oldirqptr->airq_next = irqptr->airq_next; 1797 irqptr->airq_next = irqheadptr; 1798 } 1799 mutex_exit(&airq_mutex); 1800 1801 irqptr->airq_temp_cpu = IRQ_UNINIT; 1802 irqptr->airq_mps_intr_index = FREE_INDEX; 1803 1804 return (PSM_SUCCESS); 1805 } 1806 1807 /* 1808 * apic_introp_xlate() replaces apic_translate_irq() and is 1809 * called only from apic_intr_ops(). With the new ADII framework, 1810 * the priority can no longer be retrieved through i_ddi_get_intrspec(). 1811 * It has to be passed in from the caller. 1812 */ 1813 int 1814 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 1815 { 1816 char dev_type[16]; 1817 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 1818 int irqno = ispec->intrspec_vec; 1819 ddi_acc_handle_t cfg_handle; 1820 uchar_t ipin; 1821 struct apic_io_intr *intrp; 1822 iflag_t intr_flag; 1823 ACPI_SUBTABLE_HEADER *hp; 1824 ACPI_MADT_INTERRUPT_OVERRIDE *isop; 1825 apic_irq_t *airqp; 1826 int parent_is_pci_or_pciex = 0; 1827 int child_is_pciex = 0; 1828 1829 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 1830 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 1831 irqno)); 1832 1833 dev_len = sizeof (dev_type); 1834 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 1835 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 1836 &dev_len) == DDI_PROP_SUCCESS) { 1837 if ((strcmp(dev_type, "pci") == 0) || 1838 (strcmp(dev_type, "pciex") == 0)) 1839 parent_is_pci_or_pciex = 1; 1840 } 1841 1842 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, 1843 DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type, 1844 &dev_len) == DDI_PROP_SUCCESS) { 1845 if (strstr(dev_type, "pciex")) 1846 child_is_pciex = 1; 1847 } 1848 1849 1850 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 1851 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) { 1852 airqp->airq_iflag.bustype = 1853 child_is_pciex ? BUS_PCIE : BUS_PCI; 1854 return (apic_vector_to_irq[airqp->airq_vector]); 1855 } 1856 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 1857 NULL, type)); 1858 } 1859 1860 bustype = 0; 1861 1862 /* check if we have already translated this irq */ 1863 mutex_enter(&airq_mutex); 1864 newirq = apic_min_device_irq; 1865 for (; newirq <= apic_max_device_irq; newirq++) { 1866 airqp = apic_irq_table[newirq]; 1867 while (airqp) { 1868 if ((airqp->airq_dip == dip) && 1869 (airqp->airq_origirq == irqno) && 1870 (airqp->airq_mps_intr_index != FREE_INDEX)) { 1871 1872 mutex_exit(&airq_mutex); 1873 return (VIRTIRQ(newirq, airqp->airq_share_id)); 1874 } 1875 airqp = airqp->airq_next; 1876 } 1877 } 1878 mutex_exit(&airq_mutex); 1879 1880 if (apic_defconf) 1881 goto defconf; 1882 1883 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 1884 goto nonpci; 1885 1886 if (parent_is_pci_or_pciex) { 1887 /* pci device */ 1888 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 1889 goto nonpci; 1890 if (busid == 0 && apic_pci_bus_total == 1) 1891 busid = (int)apic_single_pci_busid; 1892 1893 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 1894 goto nonpci; 1895 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 1896 pci_config_teardown(&cfg_handle); 1897 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 1898 if (apic_acpi_translate_pci_irq(dip, busid, devid, 1899 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 1900 goto nonpci; 1901 1902 intr_flag.bustype = child_is_pciex ? BUS_PCIE : BUS_PCI; 1903 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 1904 ispec, &intr_flag, type)) == -1) 1905 goto nonpci; 1906 return (newirq); 1907 } else { 1908 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 1909 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 1910 == NULL) { 1911 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 1912 devid, ipin, &intrp)) == -1) 1913 goto nonpci; 1914 } 1915 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 1916 ispec, NULL, type)) == -1) 1917 goto nonpci; 1918 return (newirq); 1919 } 1920 } else if (strcmp(dev_type, "isa") == 0) 1921 bustype = BUS_ISA; 1922 else if (strcmp(dev_type, "eisa") == 0) 1923 bustype = BUS_EISA; 1924 1925 nonpci: 1926 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 1927 /* search iso entries first */ 1928 if (acpi_iso_cnt != 0) { 1929 hp = (ACPI_SUBTABLE_HEADER *)acpi_isop; 1930 i = 0; 1931 while (i < acpi_iso_cnt) { 1932 if (hp->Type == 1933 ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) { 1934 isop = 1935 (ACPI_MADT_INTERRUPT_OVERRIDE *) hp; 1936 if (isop->Bus == 0 && 1937 isop->SourceIrq == irqno) { 1938 newirq = isop->GlobalIrq; 1939 intr_flag.intr_po = 1940 isop->IntiFlags & 1941 ACPI_MADT_POLARITY_MASK; 1942 intr_flag.intr_el = 1943 (isop->IntiFlags & 1944 ACPI_MADT_TRIGGER_MASK) 1945 >> 2; 1946 intr_flag.bustype = BUS_ISA; 1947 1948 return (apic_setup_irq_table( 1949 dip, newirq, NULL, ispec, 1950 &intr_flag, type)); 1951 1952 } 1953 i++; 1954 } 1955 hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) + 1956 hp->Length); 1957 } 1958 } 1959 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 1960 intr_flag.intr_el = INTR_EL_EDGE; 1961 intr_flag.bustype = BUS_ISA; 1962 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 1963 &intr_flag, type)); 1964 } else { 1965 if (bustype == 0) 1966 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 1967 for (i = 0; i < 2; i++) { 1968 if (((busid = apic_find_bus_id(bustype)) != -1) && 1969 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 1970 != NULL)) { 1971 if ((newirq = apic_setup_irq_table(dip, irqno, 1972 intrp, ispec, NULL, type)) != -1) { 1973 return (newirq); 1974 } 1975 goto defconf; 1976 } 1977 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 1978 } 1979 } 1980 1981 /* MPS default configuration */ 1982 defconf: 1983 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 1984 if (newirq == -1) 1985 return (newirq); 1986 ASSERT(IRQINDEX(newirq) == irqno); 1987 ASSERT(apic_irq_table[irqno]); 1988 return (newirq); 1989 } 1990 1991 1992 1993 1994 1995 1996 /* 1997 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 1998 * needs special handling. We may need to chase up the device tree, 1999 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 2000 * to find the IPIN at the root bus that relates to the IPIN on the 2001 * subsidiary bus (for ACPI or MP). We may, however, have an entry 2002 * in the MP table or the ACPI namespace for this device itself. 2003 * We handle both cases in the search below. 2004 */ 2005 /* this is the non-acpi version */ 2006 static int 2007 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 2008 struct apic_io_intr **intrp) 2009 { 2010 dev_info_t *dipp, *dip; 2011 int pci_irq; 2012 ddi_acc_handle_t cfg_handle; 2013 int bridge_devno, bridge_bus; 2014 int ipin; 2015 2016 dip = idip; 2017 2018 /*CONSTCOND*/ 2019 while (1) { 2020 if (((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) || 2021 (pci_config_setup(dipp, &cfg_handle) != DDI_SUCCESS)) 2022 return (-1); 2023 if ((pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 2024 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 2025 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 2026 pci_config_teardown(&cfg_handle); 2027 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 2028 NULL) != 0) 2029 return (-1); 2030 /* 2031 * This is the rotating scheme documented in the 2032 * PCI-to-PCI spec. If the PCI-to-PCI bridge is 2033 * behind another PCI-to-PCI bridge, then it needs 2034 * to keep ascending until an interrupt entry is 2035 * found or the root is reached. 2036 */ 2037 ipin = (child_devno + child_ipin) % PCI_INTD; 2038 if (bridge_bus == 0 && apic_pci_bus_total == 1) 2039 bridge_bus = (int)apic_single_pci_busid; 2040 pci_irq = ((bridge_devno & 0x1f) << 2) | 2041 (ipin & 0x3); 2042 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 2043 bridge_bus)) != NULL) { 2044 return (pci_irq); 2045 } 2046 dip = dipp; 2047 child_devno = bridge_devno; 2048 child_ipin = ipin; 2049 } else { 2050 pci_config_teardown(&cfg_handle); 2051 return (-1); 2052 } 2053 } 2054 /*LINTED: function will not fall off the bottom */ 2055 } 2056 2057 2058 2059 2060 static uchar_t 2061 acpi_find_ioapic(int irq) 2062 { 2063 int i; 2064 2065 for (i = 0; i < apic_io_max; i++) { 2066 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 2067 return (i); 2068 } 2069 return (0xFF); /* shouldn't happen */ 2070 } 2071 2072 /* 2073 * See if two irqs are compatible for sharing a vector. 2074 * Currently we only support sharing of PCI devices. 2075 */ 2076 static int 2077 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 2078 { 2079 uint_t level1, po1; 2080 uint_t level2, po2; 2081 2082 /* Assume active high by default */ 2083 po1 = 0; 2084 po2 = 0; 2085 2086 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 2087 return (0); 2088 2089 if (iflag1.intr_el == INTR_EL_CONFORM) 2090 level1 = AV_LEVEL; 2091 else 2092 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 2093 2094 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 2095 (iflag1.intr_po == INTR_PO_CONFORM))) 2096 po1 = AV_ACTIVE_LOW; 2097 2098 if (iflag2.intr_el == INTR_EL_CONFORM) 2099 level2 = AV_LEVEL; 2100 else 2101 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 2102 2103 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 2104 (iflag2.intr_po == INTR_PO_CONFORM))) 2105 po2 = AV_ACTIVE_LOW; 2106 2107 if ((level1 == level2) && (po1 == po2)) 2108 return (1); 2109 2110 return (0); 2111 } 2112 2113 /* 2114 * Attempt to share vector with someone else 2115 */ 2116 static int 2117 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 2118 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 2119 { 2120 #ifdef DEBUG 2121 apic_irq_t *tmpirqp = NULL; 2122 #endif /* DEBUG */ 2123 apic_irq_t *irqptr, dummyirq; 2124 int newirq, chosen_irq = -1, share = 127; 2125 int lowest, highest, i; 2126 uchar_t share_id; 2127 2128 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 2129 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 2130 2131 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 2132 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 2133 2134 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 2135 lowest -= APIC_VECTOR_PER_IPL; 2136 dummyirq.airq_mps_intr_index = intr_index; 2137 dummyirq.airq_ioapicindex = ioapicindex; 2138 dummyirq.airq_intin_no = ipin; 2139 if (intr_flagp) 2140 dummyirq.airq_iflag = *intr_flagp; 2141 apic_record_rdt_entry(&dummyirq, irqno); 2142 for (i = lowest; i <= highest; i++) { 2143 newirq = apic_vector_to_irq[i]; 2144 if (newirq == APIC_RESV_IRQ) 2145 continue; 2146 irqptr = apic_irq_table[newirq]; 2147 2148 if ((dummyirq.airq_rdt_entry & 0xFF00) != 2149 (irqptr->airq_rdt_entry & 0xFF00)) 2150 /* not compatible */ 2151 continue; 2152 2153 if (irqptr->airq_share < share) { 2154 share = irqptr->airq_share; 2155 chosen_irq = newirq; 2156 } 2157 } 2158 if (chosen_irq != -1) { 2159 /* 2160 * Assign a share id which is free or which is larger 2161 * than the largest one. 2162 */ 2163 share_id = 1; 2164 mutex_enter(&airq_mutex); 2165 irqptr = apic_irq_table[chosen_irq]; 2166 while (irqptr) { 2167 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 2168 share_id = irqptr->airq_share_id; 2169 break; 2170 } 2171 if (share_id <= irqptr->airq_share_id) 2172 share_id = irqptr->airq_share_id + 1; 2173 #ifdef DEBUG 2174 tmpirqp = irqptr; 2175 #endif /* DEBUG */ 2176 irqptr = irqptr->airq_next; 2177 } 2178 if (!irqptr) { 2179 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 2180 irqptr->airq_temp_cpu = IRQ_UNINIT; 2181 irqptr->airq_next = 2182 apic_irq_table[chosen_irq]->airq_next; 2183 apic_irq_table[chosen_irq]->airq_next = irqptr; 2184 #ifdef DEBUG 2185 tmpirqp = apic_irq_table[chosen_irq]; 2186 #endif /* DEBUG */ 2187 } 2188 irqptr->airq_mps_intr_index = intr_index; 2189 irqptr->airq_ioapicindex = ioapicindex; 2190 irqptr->airq_intin_no = ipin; 2191 if (intr_flagp) 2192 irqptr->airq_iflag = *intr_flagp; 2193 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 2194 irqptr->airq_share_id = share_id; 2195 apic_record_rdt_entry(irqptr, irqno); 2196 *irqptrp = irqptr; 2197 #ifdef DEBUG 2198 /* shuffle the pointers to test apic_delspl path */ 2199 if (tmpirqp) { 2200 tmpirqp->airq_next = irqptr->airq_next; 2201 irqptr->airq_next = apic_irq_table[chosen_irq]; 2202 apic_irq_table[chosen_irq] = irqptr; 2203 } 2204 #endif /* DEBUG */ 2205 mutex_exit(&airq_mutex); 2206 return (VIRTIRQ(chosen_irq, share_id)); 2207 } 2208 return (-1); 2209 } 2210 2211 /* 2212 * 2213 */ 2214 static int 2215 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 2216 struct intrspec *ispec, iflag_t *intr_flagp, int type) 2217 { 2218 int origirq = ispec->intrspec_vec; 2219 uchar_t ipl = ispec->intrspec_pri; 2220 int newirq, intr_index; 2221 uchar_t ipin, ioapic, ioapicindex, vector; 2222 apic_irq_t *irqptr; 2223 major_t major; 2224 dev_info_t *sdip; 2225 2226 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 2227 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 2228 2229 ASSERT(ispec != NULL); 2230 2231 major = (dip != NULL) ? ddi_driver_major(dip) : 0; 2232 2233 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 2234 /* MSI/X doesn't need to setup ioapic stuffs */ 2235 ioapicindex = 0xff; 2236 ioapic = 0xff; 2237 ipin = (uchar_t)0xff; 2238 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 2239 MSIX_INDEX; 2240 mutex_enter(&airq_mutex); 2241 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 2242 mutex_exit(&airq_mutex); 2243 /* need an irq for MSI/X to index into autovect[] */ 2244 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 2245 ddi_get_name(dip), ddi_get_instance(dip)); 2246 return (-1); 2247 } 2248 mutex_exit(&airq_mutex); 2249 2250 } else if (intrp != NULL) { 2251 intr_index = (int)(intrp - apic_io_intrp); 2252 ioapic = intrp->intr_destid; 2253 ipin = intrp->intr_destintin; 2254 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 2255 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 2256 if (apic_io_id[ioapicindex] == ioapic) 2257 break; 2258 ASSERT((ioapic == apic_io_id[ioapicindex]) || 2259 (ioapic == INTR_ALL_APIC)); 2260 2261 /* check whether this intin# has been used by another irqno */ 2262 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 2263 return (newirq); 2264 } 2265 2266 } else if (intr_flagp != NULL) { 2267 /* ACPI case */ 2268 intr_index = ACPI_INDEX; 2269 ioapicindex = acpi_find_ioapic(irqno); 2270 ASSERT(ioapicindex != 0xFF); 2271 ioapic = apic_io_id[ioapicindex]; 2272 ipin = irqno - apic_io_vectbase[ioapicindex]; 2273 if (apic_irq_table[irqno] && 2274 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 2275 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 2276 apic_irq_table[irqno]->airq_ioapicindex == 2277 ioapicindex); 2278 return (irqno); 2279 } 2280 2281 } else { 2282 /* default configuration */ 2283 ioapicindex = 0; 2284 ioapic = apic_io_id[ioapicindex]; 2285 ipin = (uchar_t)irqno; 2286 intr_index = DEFAULT_INDEX; 2287 } 2288 2289 if (ispec == NULL) { 2290 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 2291 irqno)); 2292 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 2293 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 2294 ipl, ioapicindex, ipin, &irqptr)) != -1) { 2295 irqptr->airq_ipl = ipl; 2296 irqptr->airq_origirq = (uchar_t)origirq; 2297 irqptr->airq_dip = dip; 2298 irqptr->airq_major = major; 2299 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 2300 /* This is OK to do really */ 2301 if (sdip == NULL) { 2302 cmn_err(CE_WARN, "Sharing vectors: %s" 2303 " instance %d and SCI", 2304 ddi_get_name(dip), ddi_get_instance(dip)); 2305 } else { 2306 cmn_err(CE_WARN, "Sharing vectors: %s" 2307 " instance %d and %s instance %d", 2308 ddi_get_name(sdip), ddi_get_instance(sdip), 2309 ddi_get_name(dip), ddi_get_instance(dip)); 2310 } 2311 return (newirq); 2312 } 2313 /* try high priority allocation now that share has failed */ 2314 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 2315 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 2316 ddi_get_name(dip), ddi_get_instance(dip)); 2317 return (-1); 2318 } 2319 } 2320 2321 mutex_enter(&airq_mutex); 2322 if (apic_irq_table[irqno] == NULL) { 2323 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 2324 irqptr->airq_temp_cpu = IRQ_UNINIT; 2325 apic_irq_table[irqno] = irqptr; 2326 } else { 2327 irqptr = apic_irq_table[irqno]; 2328 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 2329 /* 2330 * The slot is used by another irqno, so allocate 2331 * a free irqno for this interrupt 2332 */ 2333 newirq = apic_allocate_irq(apic_first_avail_irq); 2334 if (newirq == -1) { 2335 mutex_exit(&airq_mutex); 2336 return (-1); 2337 } 2338 irqno = newirq; 2339 irqptr = apic_irq_table[irqno]; 2340 if (irqptr == NULL) { 2341 irqptr = kmem_zalloc(sizeof (apic_irq_t), 2342 KM_SLEEP); 2343 irqptr->airq_temp_cpu = IRQ_UNINIT; 2344 apic_irq_table[irqno] = irqptr; 2345 } 2346 vector = apic_modify_vector(vector, newirq); 2347 } 2348 } 2349 apic_max_device_irq = max(irqno, apic_max_device_irq); 2350 apic_min_device_irq = min(irqno, apic_min_device_irq); 2351 mutex_exit(&airq_mutex); 2352 irqptr->airq_ioapicindex = ioapicindex; 2353 irqptr->airq_intin_no = ipin; 2354 irqptr->airq_ipl = ipl; 2355 irqptr->airq_vector = vector; 2356 irqptr->airq_origirq = (uchar_t)origirq; 2357 irqptr->airq_share_id = 0; 2358 irqptr->airq_mps_intr_index = (short)intr_index; 2359 irqptr->airq_dip = dip; 2360 irqptr->airq_major = major; 2361 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 2362 if (intr_flagp) 2363 irqptr->airq_iflag = *intr_flagp; 2364 2365 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 2366 /* setup I/O APIC entry for non-MSI/X interrupts */ 2367 apic_record_rdt_entry(irqptr, irqno); 2368 } 2369 return (irqno); 2370 } 2371 2372 /* 2373 * return the cpu to which this intr should be bound. 2374 * Check properties or any other mechanism to see if user wants it 2375 * bound to a specific CPU. If so, return the cpu id with high bit set. 2376 * If not, use the policy to choose a cpu and return the id. 2377 */ 2378 uint32_t 2379 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 2380 { 2381 int instance, instno, prop_len, bind_cpu, count; 2382 uint_t i, rc; 2383 uint32_t cpu; 2384 major_t major; 2385 char *name, *drv_name, *prop_val, *cptr; 2386 char prop_name[32]; 2387 2388 2389 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 2390 return (IRQ_UNBOUND); 2391 2392 if (apic_nproc == 1) 2393 return (0); 2394 2395 drv_name = NULL; 2396 rc = DDI_PROP_NOT_FOUND; 2397 major = (major_t)-1; 2398 if (dip != NULL) { 2399 name = ddi_get_name(dip); 2400 major = ddi_name_to_major(name); 2401 drv_name = ddi_major_to_name(major); 2402 instance = ddi_get_instance(dip); 2403 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 2404 i = apic_min_device_irq; 2405 for (; i <= apic_max_device_irq; i++) { 2406 2407 if ((i == irq) || (apic_irq_table[i] == NULL) || 2408 (apic_irq_table[i]->airq_mps_intr_index 2409 == FREE_INDEX)) 2410 continue; 2411 2412 if ((apic_irq_table[i]->airq_major == major) && 2413 (!(apic_irq_table[i]->airq_cpu & 2414 IRQ_USER_BOUND))) { 2415 2416 cpu = apic_irq_table[i]->airq_cpu; 2417 2418 cmn_err(CE_CONT, 2419 "!%s: %s (%s) instance #%d " 2420 "irq 0x%x vector 0x%x ioapic 0x%x " 2421 "intin 0x%x is bound to cpu %d\n", 2422 psm_name, 2423 name, drv_name, instance, irq, 2424 apic_irq_table[irq]->airq_vector, 2425 ioapicid, intin, cpu); 2426 return (cpu); 2427 } 2428 } 2429 } 2430 /* 2431 * search for "drvname"_intpt_bind_cpus property first, the 2432 * syntax of the property should be "a[,b,c,...]" where 2433 * instance 0 binds to cpu a, instance 1 binds to cpu b, 2434 * instance 3 binds to cpu c... 2435 * ddi_getlongprop() will search /option first, then / 2436 * if "drvname"_intpt_bind_cpus doesn't exist, then find 2437 * intpt_bind_cpus property. The syntax is the same, and 2438 * it applies to all the devices if its "drvname" specific 2439 * property doesn't exist 2440 */ 2441 (void) strcpy(prop_name, drv_name); 2442 (void) strcat(prop_name, "_intpt_bind_cpus"); 2443 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 2444 (caddr_t)&prop_val, &prop_len); 2445 if (rc != DDI_PROP_SUCCESS) { 2446 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 2447 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 2448 } 2449 } 2450 if (rc == DDI_PROP_SUCCESS) { 2451 for (i = count = 0; i < (prop_len - 1); i++) 2452 if (prop_val[i] == ',') 2453 count++; 2454 if (prop_val[i-1] != ',') 2455 count++; 2456 /* 2457 * if somehow the binding instances defined in the 2458 * property are not enough for this instno., then 2459 * reuse the pattern for the next instance until 2460 * it reaches the requested instno 2461 */ 2462 instno = instance % count; 2463 i = 0; 2464 cptr = prop_val; 2465 while (i < instno) 2466 if (*cptr++ == ',') 2467 i++; 2468 bind_cpu = stoi(&cptr); 2469 kmem_free(prop_val, prop_len); 2470 /* if specific cpu is bogus, then default to cpu 0 */ 2471 if (bind_cpu >= apic_nproc) { 2472 cmn_err(CE_WARN, "%s: %s=%s: CPU %d not present", 2473 psm_name, prop_name, prop_val, bind_cpu); 2474 bind_cpu = 0; 2475 } else { 2476 /* indicate that we are bound at user request */ 2477 bind_cpu |= IRQ_USER_BOUND; 2478 } 2479 /* 2480 * no need to check apic_cpus[].aci_status, if specific cpu is 2481 * not up, then post_cpu_start will handle it. 2482 */ 2483 } else { 2484 bind_cpu = apic_next_bind_cpu++; 2485 if (bind_cpu >= apic_nproc) { 2486 apic_next_bind_cpu = 1; 2487 bind_cpu = 0; 2488 } 2489 } 2490 if (drv_name != NULL) 2491 cmn_err(CE_CONT, "!%s: %s (%s) instance %d irq 0x%x " 2492 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 2493 psm_name, name, drv_name, instance, irq, 2494 apic_irq_table[irq]->airq_vector, ioapicid, intin, 2495 bind_cpu & ~IRQ_USER_BOUND); 2496 else 2497 cmn_err(CE_CONT, "!%s: irq 0x%x " 2498 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 2499 psm_name, irq, apic_irq_table[irq]->airq_vector, ioapicid, 2500 intin, bind_cpu & ~IRQ_USER_BOUND); 2501 2502 return ((uint32_t)bind_cpu); 2503 } 2504 2505 static struct apic_io_intr * 2506 apic_find_io_intr_w_busid(int irqno, int busid) 2507 { 2508 struct apic_io_intr *intrp; 2509 2510 /* 2511 * It can have more than 1 entry with same source bus IRQ, 2512 * but unique with the source bus id 2513 */ 2514 intrp = apic_io_intrp; 2515 if (intrp != NULL) { 2516 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 2517 if (intrp->intr_irq == irqno && 2518 intrp->intr_busid == busid && 2519 intrp->intr_type == IO_INTR_INT) 2520 return (intrp); 2521 intrp++; 2522 } 2523 } 2524 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 2525 "busid %x:%x\n", irqno, busid)); 2526 return ((struct apic_io_intr *)NULL); 2527 } 2528 2529 2530 struct mps_bus_info { 2531 char *bus_name; 2532 int bus_id; 2533 } bus_info_array[] = { 2534 "ISA ", BUS_ISA, 2535 "PCI ", BUS_PCI, 2536 "EISA ", BUS_EISA, 2537 "XPRESS", BUS_XPRESS, 2538 "PCMCIA", BUS_PCMCIA, 2539 "VL ", BUS_VL, 2540 "CBUS ", BUS_CBUS, 2541 "CBUSII", BUS_CBUSII, 2542 "FUTURE", BUS_FUTURE, 2543 "INTERN", BUS_INTERN, 2544 "MBI ", BUS_MBI, 2545 "MBII ", BUS_MBII, 2546 "MPI ", BUS_MPI, 2547 "MPSA ", BUS_MPSA, 2548 "NUBUS ", BUS_NUBUS, 2549 "TC ", BUS_TC, 2550 "VME ", BUS_VME, 2551 "PCI-E ", BUS_PCIE 2552 }; 2553 2554 static int 2555 apic_find_bus_type(char *bus) 2556 { 2557 int i = 0; 2558 2559 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 2560 if (strncmp(bus, bus_info_array[i].bus_name, 2561 strlen(bus_info_array[i].bus_name)) == 0) 2562 return (bus_info_array[i].bus_id); 2563 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 2564 return (0); 2565 } 2566 2567 static int 2568 apic_find_bus(int busid) 2569 { 2570 struct apic_bus *busp; 2571 2572 busp = apic_busp; 2573 while (busp->bus_entry == APIC_BUS_ENTRY) { 2574 if (busp->bus_id == busid) 2575 return (apic_find_bus_type((char *)&busp->bus_str1)); 2576 busp++; 2577 } 2578 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 2579 return (0); 2580 } 2581 2582 static int 2583 apic_find_bus_id(int bustype) 2584 { 2585 struct apic_bus *busp; 2586 2587 busp = apic_busp; 2588 while (busp->bus_entry == APIC_BUS_ENTRY) { 2589 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 2590 return (busp->bus_id); 2591 busp++; 2592 } 2593 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 2594 bustype)); 2595 return (-1); 2596 } 2597 2598 /* 2599 * Check if a particular irq need to be reserved for any io_intr 2600 */ 2601 static struct apic_io_intr * 2602 apic_find_io_intr(int irqno) 2603 { 2604 struct apic_io_intr *intrp; 2605 2606 intrp = apic_io_intrp; 2607 if (intrp != NULL) { 2608 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 2609 if (intrp->intr_irq == irqno && 2610 intrp->intr_type == IO_INTR_INT) 2611 return (intrp); 2612 intrp++; 2613 } 2614 } 2615 return ((struct apic_io_intr *)NULL); 2616 } 2617 2618 /* 2619 * Check if the given ioapicindex intin combination has already been assigned 2620 * an irq. If so return irqno. Else -1 2621 */ 2622 static int 2623 apic_find_intin(uchar_t ioapic, uchar_t intin) 2624 { 2625 apic_irq_t *irqptr; 2626 int i; 2627 2628 /* find ioapic and intin in the apic_irq_table[] and return the index */ 2629 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2630 irqptr = apic_irq_table[i]; 2631 while (irqptr) { 2632 if ((irqptr->airq_mps_intr_index >= 0) && 2633 (irqptr->airq_intin_no == intin) && 2634 (irqptr->airq_ioapicindex == ioapic)) { 2635 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 2636 "entry for ioapic:intin %x:%x " 2637 "shared interrupts ?", ioapic, intin)); 2638 return (i); 2639 } 2640 irqptr = irqptr->airq_next; 2641 } 2642 } 2643 return (-1); 2644 } 2645 2646 int 2647 apic_allocate_irq(int irq) 2648 { 2649 int freeirq, i; 2650 2651 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 2652 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 2653 (irq - 1))) == -1) { 2654 /* 2655 * if BIOS really defines every single irq in the mps 2656 * table, then don't worry about conflicting with 2657 * them, just use any free slot in apic_irq_table 2658 */ 2659 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 2660 if ((apic_irq_table[i] == NULL) || 2661 apic_irq_table[i]->airq_mps_intr_index == 2662 FREE_INDEX) { 2663 freeirq = i; 2664 break; 2665 } 2666 } 2667 if (freeirq == -1) { 2668 /* This shouldn't happen, but just in case */ 2669 cmn_err(CE_WARN, "%s: NO available IRQ", psm_name); 2670 return (-1); 2671 } 2672 } 2673 if (apic_irq_table[freeirq] == NULL) { 2674 apic_irq_table[freeirq] = 2675 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 2676 if (apic_irq_table[freeirq] == NULL) { 2677 cmn_err(CE_WARN, "%s: NO memory to allocate IRQ", 2678 psm_name); 2679 return (-1); 2680 } 2681 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 2682 } 2683 return (freeirq); 2684 } 2685 2686 static int 2687 apic_find_free_irq(int start, int end) 2688 { 2689 int i; 2690 2691 for (i = start; i <= end; i++) 2692 /* Check if any I/O entry needs this IRQ */ 2693 if (apic_find_io_intr(i) == NULL) { 2694 /* Then see if it is free */ 2695 if ((apic_irq_table[i] == NULL) || 2696 (apic_irq_table[i]->airq_mps_intr_index == 2697 FREE_INDEX)) { 2698 return (i); 2699 } 2700 } 2701 return (-1); 2702 } 2703 2704 2705 /* 2706 * Mark vector as being in the process of being deleted. Interrupts 2707 * may still come in on some CPU. The moment an interrupt comes with 2708 * the new vector, we know we can free the old one. Called only from 2709 * addspl and delspl with interrupts disabled. Because an interrupt 2710 * can be shared, but no interrupt from either device may come in, 2711 * we also use a timeout mechanism, which we arbitrarily set to 2712 * apic_revector_timeout microseconds. 2713 */ 2714 static void 2715 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 2716 { 2717 ulong_t iflag; 2718 2719 iflag = intr_clear(); 2720 lock_set(&apic_revector_lock); 2721 if (!apic_oldvec_to_newvec) { 2722 apic_oldvec_to_newvec = 2723 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 2724 KM_NOSLEEP); 2725 2726 if (!apic_oldvec_to_newvec) { 2727 /* 2728 * This failure is not catastrophic. 2729 * But, the oldvec will never be freed. 2730 */ 2731 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 2732 lock_clear(&apic_revector_lock); 2733 intr_restore(iflag); 2734 return; 2735 } 2736 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 2737 } 2738 2739 /* See if we already did this for drivers which do double addintrs */ 2740 if (apic_oldvec_to_newvec[oldvector] != newvector) { 2741 apic_oldvec_to_newvec[oldvector] = newvector; 2742 apic_newvec_to_oldvec[newvector] = oldvector; 2743 apic_revector_pending++; 2744 } 2745 lock_clear(&apic_revector_lock); 2746 intr_restore(iflag); 2747 (void) timeout(apic_xlate_vector_free_timeout_handler, 2748 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 2749 } 2750 2751 /* 2752 * xlate_vector is called from intr_enter if revector_pending is set. 2753 * It will xlate it if needed and mark the old vector as free. 2754 */ 2755 uchar_t 2756 apic_xlate_vector(uchar_t vector) 2757 { 2758 uchar_t newvector, oldvector = 0; 2759 2760 lock_set(&apic_revector_lock); 2761 /* Do we really need to do this ? */ 2762 if (!apic_revector_pending) { 2763 lock_clear(&apic_revector_lock); 2764 return (vector); 2765 } 2766 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 2767 oldvector = vector; 2768 else { 2769 /* 2770 * The incoming vector is new . See if a stale entry is 2771 * remaining 2772 */ 2773 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 2774 newvector = vector; 2775 } 2776 2777 if (oldvector) { 2778 apic_revector_pending--; 2779 apic_oldvec_to_newvec[oldvector] = 0; 2780 apic_newvec_to_oldvec[newvector] = 0; 2781 apic_free_vector(oldvector); 2782 lock_clear(&apic_revector_lock); 2783 /* There could have been more than one reprogramming! */ 2784 return (apic_xlate_vector(newvector)); 2785 } 2786 lock_clear(&apic_revector_lock); 2787 return (vector); 2788 } 2789 2790 void 2791 apic_xlate_vector_free_timeout_handler(void *arg) 2792 { 2793 ulong_t iflag; 2794 uchar_t oldvector, newvector; 2795 2796 oldvector = (uchar_t)(uintptr_t)arg; 2797 iflag = intr_clear(); 2798 lock_set(&apic_revector_lock); 2799 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 2800 apic_free_vector(oldvector); 2801 apic_oldvec_to_newvec[oldvector] = 0; 2802 apic_newvec_to_oldvec[newvector] = 0; 2803 apic_revector_pending--; 2804 } 2805 2806 lock_clear(&apic_revector_lock); 2807 intr_restore(iflag); 2808 } 2809 2810 2811 /* 2812 * compute the polarity, trigger mode and vector for programming into 2813 * the I/O apic and record in airq_rdt_entry. 2814 */ 2815 static void 2816 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 2817 { 2818 int ioapicindex, bus_type, vector; 2819 short intr_index; 2820 uint_t level, po, io_po; 2821 struct apic_io_intr *iointrp; 2822 2823 intr_index = irqptr->airq_mps_intr_index; 2824 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 2825 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 2826 (void *)irqptr->airq_dip, irqptr->airq_vector)); 2827 2828 if (intr_index == RESERVE_INDEX) { 2829 apic_error |= APIC_ERR_INVALID_INDEX; 2830 return; 2831 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 2832 return; 2833 } 2834 2835 vector = irqptr->airq_vector; 2836 ioapicindex = irqptr->airq_ioapicindex; 2837 /* Assume edge triggered by default */ 2838 level = 0; 2839 /* Assume active high by default */ 2840 po = 0; 2841 2842 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 2843 ASSERT(irq < 16); 2844 if (eisa_level_intr_mask & (1 << irq)) 2845 level = AV_LEVEL; 2846 if (intr_index == FREE_INDEX && apic_defconf == 0) 2847 apic_error |= APIC_ERR_INVALID_INDEX; 2848 } else if (intr_index == ACPI_INDEX) { 2849 bus_type = irqptr->airq_iflag.bustype; 2850 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 2851 if (bus_type == BUS_PCI) 2852 level = AV_LEVEL; 2853 } else 2854 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 2855 AV_LEVEL : 0; 2856 if (level && 2857 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 2858 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 2859 bus_type == BUS_PCI))) 2860 po = AV_ACTIVE_LOW; 2861 } else { 2862 iointrp = apic_io_intrp + intr_index; 2863 bus_type = apic_find_bus(iointrp->intr_busid); 2864 if (iointrp->intr_el == INTR_EL_CONFORM) { 2865 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 2866 level = AV_LEVEL; 2867 else if (bus_type == BUS_PCI) 2868 level = AV_LEVEL; 2869 } else 2870 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 2871 AV_LEVEL : 0; 2872 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 2873 (iointrp->intr_po == INTR_PO_CONFORM && 2874 bus_type == BUS_PCI))) 2875 po = AV_ACTIVE_LOW; 2876 } 2877 if (level) 2878 apic_level_intr[irq] = 1; 2879 /* 2880 * The 82489DX External APIC cannot do active low polarity interrupts. 2881 */ 2882 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 2883 io_po = po; 2884 else 2885 io_po = 0; 2886 2887 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 2888 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 2889 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 2890 2891 irqptr->airq_rdt_entry = level|io_po|vector; 2892 } 2893 2894 /* 2895 * Bind interrupt corresponding to irq_ptr to bind_cpu. 2896 * Must be called with interrupts disabled and apic_ioapic_lock held 2897 */ 2898 int 2899 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, 2900 struct ioapic_reprogram_data *drep) 2901 { 2902 int ioapicindex, intin_no; 2903 uint32_t airq_temp_cpu; 2904 apic_cpus_info_t *cpu_infop; 2905 uint32_t rdt_entry; 2906 int which_irq; 2907 ioapic_rdt_t irdt; 2908 2909 which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 2910 2911 intin_no = irq_ptr->airq_intin_no; 2912 ioapicindex = irq_ptr->airq_ioapicindex; 2913 airq_temp_cpu = irq_ptr->airq_temp_cpu; 2914 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 2915 if (airq_temp_cpu & IRQ_USER_BOUND) 2916 /* Mask off high bit so it can be used as array index */ 2917 airq_temp_cpu &= ~IRQ_USER_BOUND; 2918 2919 ASSERT(airq_temp_cpu < apic_nproc); 2920 } 2921 2922 /* 2923 * Can't bind to a CPU that's not accepting interrupts: 2924 */ 2925 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 2926 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) 2927 return (1); 2928 2929 /* 2930 * If we are about to change the interrupt vector for this interrupt, 2931 * and this interrupt is level-triggered, attached to an IOAPIC, 2932 * has been delivered to a CPU and that CPU has not handled it 2933 * yet, we cannot reprogram the IOAPIC now. 2934 */ 2935 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 2936 2937 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, 2938 intin_no); 2939 2940 if ((irq_ptr->airq_vector != RDT_VECTOR(rdt_entry)) && 2941 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, 2942 bind_cpu, ioapicindex, intin_no, which_irq, drep) != 0) { 2943 2944 return (0); 2945 } 2946 2947 /* 2948 * NOTE: We do not unmask the RDT here, as an interrupt MAY 2949 * still come in before we have a chance to reprogram it below. 2950 * The reprogramming below will simultaneously change and 2951 * unmask the RDT entry. 2952 */ 2953 2954 if ((uint32_t)bind_cpu == IRQ_UNBOUND) { 2955 irdt.ir_lo = AV_LDEST | AV_LOPRI | 2956 irq_ptr->airq_rdt_entry; 2957 #if !defined(__xpv) 2958 irdt.ir_hi = AV_TOALL >> APIC_ID_BIT_OFFSET; 2959 2960 apic_vt_ops->apic_intrr_alloc_entry(irq_ptr); 2961 apic_vt_ops->apic_intrr_map_entry( 2962 irq_ptr, (void *)&irdt); 2963 apic_vt_ops->apic_intrr_record_rdt(irq_ptr, &irdt); 2964 2965 /* Write the RDT entry -- no specific CPU binding */ 2966 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin_no, 2967 irdt.ir_hi | AV_TOALL); 2968 #else 2969 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin_no, 2970 AV_TOALL); 2971 #endif 2972 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != 2973 IRQ_UNBOUND) 2974 apic_cpus[airq_temp_cpu].aci_temp_bound--; 2975 2976 /* 2977 * Write the vector, trigger, and polarity portion of 2978 * the RDT 2979 */ 2980 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin_no, 2981 irdt.ir_lo); 2982 2983 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 2984 return (0); 2985 } 2986 } 2987 2988 if (bind_cpu & IRQ_USER_BOUND) { 2989 cpu_infop->aci_bound++; 2990 } else { 2991 cpu_infop->aci_temp_bound++; 2992 } 2993 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2994 2995 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 2996 apic_cpus[airq_temp_cpu].aci_temp_bound--; 2997 } 2998 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 2999 3000 irdt.ir_lo = AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry; 3001 irdt.ir_hi = cpu_infop->aci_local_id; 3002 3003 #if !defined(__xpv) 3004 apic_vt_ops->apic_intrr_alloc_entry(irq_ptr); 3005 apic_vt_ops->apic_intrr_map_entry(irq_ptr, (void *)&irdt); 3006 apic_vt_ops->apic_intrr_record_rdt(irq_ptr, &irdt); 3007 3008 /* Write the RDT entry -- bind to a specific CPU: */ 3009 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin_no, 3010 irdt.ir_hi); 3011 #else 3012 /* Write the RDT entry -- bind to a specific CPU: */ 3013 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin_no, 3014 irdt.ir_hi << APIC_ID_BIT_OFFSET); 3015 #endif 3016 /* Write the vector, trigger, and polarity portion of the RDT */ 3017 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin_no, 3018 irdt.ir_lo); 3019 3020 } else { 3021 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 3022 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 3023 if (type == DDI_INTR_TYPE_MSI) { 3024 if (irq_ptr->airq_ioapicindex == 3025 irq_ptr->airq_origirq) { 3026 /* first one */ 3027 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 3028 "apic_pci_msi_enable_vector\n")); 3029 apic_pci_msi_enable_vector(irq_ptr, 3030 type, which_irq, irq_ptr->airq_vector, 3031 irq_ptr->airq_intin_no, 3032 cpu_infop->aci_local_id); 3033 } 3034 if ((irq_ptr->airq_ioapicindex + 3035 irq_ptr->airq_intin_no - 1) == 3036 irq_ptr->airq_origirq) { /* last one */ 3037 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 3038 "apic_pci_msi_enable_mode\n")); 3039 apic_pci_msi_enable_mode(irq_ptr->airq_dip, 3040 type, which_irq); 3041 } 3042 } else { /* MSI-X */ 3043 apic_pci_msi_enable_vector(irq_ptr, type, 3044 irq_ptr->airq_origirq, irq_ptr->airq_vector, 1, 3045 cpu_infop->aci_local_id); 3046 apic_pci_msi_enable_mode(irq_ptr->airq_dip, type, 3047 irq_ptr->airq_origirq); 3048 } 3049 } 3050 irq_ptr->airq_temp_cpu = (uint32_t)bind_cpu; 3051 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 3052 return (0); 3053 } 3054 3055 static void 3056 apic_last_ditch_clear_remote_irr(int ioapic_ix, int intin_no) 3057 { 3058 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no) 3059 & AV_REMOTE_IRR) != 0) { 3060 /* 3061 * Trying to clear the bit through normal 3062 * channels has failed. So as a last-ditch 3063 * effort, try to set the trigger mode to 3064 * edge, then to level. This has been 3065 * observed to work on many systems. 3066 */ 3067 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3068 intin_no, 3069 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3070 intin_no) & ~AV_LEVEL); 3071 3072 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3073 intin_no, 3074 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3075 intin_no) | AV_LEVEL); 3076 3077 /* 3078 * If the bit's STILL set, this interrupt may 3079 * be hosed. 3080 */ 3081 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3082 intin_no) & AV_REMOTE_IRR) != 0) { 3083 3084 prom_printf("%s: Remote IRR still " 3085 "not clear for IOAPIC %d intin %d.\n" 3086 "\tInterrupts to this pin may cease " 3087 "functioning.\n", psm_name, ioapic_ix, 3088 intin_no); 3089 #ifdef DEBUG 3090 apic_last_ditch_reprogram_failures++; 3091 #endif 3092 } 3093 } 3094 } 3095 3096 /* 3097 * This function is protected by apic_ioapic_lock coupled with the 3098 * fact that interrupts are disabled. 3099 */ 3100 static void 3101 delete_defer_repro_ent(int which_irq) 3102 { 3103 ASSERT(which_irq >= 0); 3104 ASSERT(which_irq <= 255); 3105 3106 if (apic_reprogram_info[which_irq].done) 3107 return; 3108 3109 apic_reprogram_info[which_irq].done = B_TRUE; 3110 3111 #ifdef DEBUG 3112 apic_defer_repro_total_retries += 3113 apic_reprogram_info[which_irq].tries; 3114 3115 apic_defer_repro_successes++; 3116 #endif 3117 3118 if (--apic_reprogram_outstanding == 0) { 3119 3120 setlvlx = psm_intr_exit_fn(); 3121 } 3122 } 3123 3124 3125 /* 3126 * Interrupts must be disabled during this function to prevent 3127 * self-deadlock. Interrupts are disabled because this function 3128 * is called from apic_check_stuck_interrupt(), which is called 3129 * from apic_rebind(), which requires its caller to disable interrupts. 3130 */ 3131 static void 3132 add_defer_repro_ent(apic_irq_t *irq_ptr, int which_irq, int new_bind_cpu) 3133 { 3134 ASSERT(which_irq >= 0); 3135 ASSERT(which_irq <= 255); 3136 3137 /* 3138 * On the off-chance that there's already a deferred 3139 * reprogramming on this irq, check, and if so, just update the 3140 * CPU and irq pointer to which the interrupt is targeted, then return. 3141 */ 3142 if (!apic_reprogram_info[which_irq].done) { 3143 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 3144 apic_reprogram_info[which_irq].irqp = irq_ptr; 3145 return; 3146 } 3147 3148 apic_reprogram_info[which_irq].irqp = irq_ptr; 3149 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 3150 apic_reprogram_info[which_irq].tries = 0; 3151 /* 3152 * This must be the last thing set, since we're not 3153 * grabbing any locks, apic_try_deferred_reprogram() will 3154 * make its decision about using this entry iff done 3155 * is false. 3156 */ 3157 apic_reprogram_info[which_irq].done = B_FALSE; 3158 3159 /* 3160 * If there were previously no deferred reprogrammings, change 3161 * setlvlx to call apic_try_deferred_reprogram() 3162 */ 3163 if (++apic_reprogram_outstanding == 1) { 3164 3165 setlvlx = apic_try_deferred_reprogram; 3166 } 3167 } 3168 3169 static void 3170 apic_try_deferred_reprogram(int prev_ipl, int irq) 3171 { 3172 int reproirq; 3173 ulong_t iflag; 3174 struct ioapic_reprogram_data *drep; 3175 3176 (*psm_intr_exit_fn())(prev_ipl, irq); 3177 3178 if (!lock_try(&apic_defer_reprogram_lock)) { 3179 return; 3180 } 3181 3182 /* 3183 * Acquire the apic_ioapic_lock so that any other operations that 3184 * may affect the apic_reprogram_info state are serialized. 3185 * It's still possible for the last deferred reprogramming to clear 3186 * between the time we entered this function and the time we get to 3187 * the for loop below. In that case, *setlvlx will have been set 3188 * back to *_intr_exit and drep will be NULL. (There's no way to 3189 * stop that from happening -- we would need to grab a lock before 3190 * calling *setlvlx, which is neither realistic nor prudent). 3191 */ 3192 iflag = intr_clear(); 3193 lock_set(&apic_ioapic_lock); 3194 3195 /* 3196 * For each deferred RDT entry, try to reprogram it now. Note that 3197 * there is no lock acquisition to read apic_reprogram_info because 3198 * '.done' is set only after the other fields in the structure are set. 3199 */ 3200 3201 drep = NULL; 3202 for (reproirq = 0; reproirq <= APIC_MAX_VECTOR; reproirq++) { 3203 if (apic_reprogram_info[reproirq].done == B_FALSE) { 3204 drep = &apic_reprogram_info[reproirq]; 3205 break; 3206 } 3207 } 3208 3209 /* 3210 * Either we found a deferred action to perform, or 3211 * we entered this function spuriously, after *setlvlx 3212 * was restored to point to *_intr_exit. Any other 3213 * permutation is invalid. 3214 */ 3215 ASSERT(drep != NULL || *setlvlx == psm_intr_exit_fn()); 3216 3217 /* 3218 * Though we can't really do anything about errors 3219 * at this point, keep track of them for reporting. 3220 * Note that it is very possible for apic_setup_io_intr 3221 * to re-register this very timeout if the Remote IRR bit 3222 * has not yet cleared. 3223 */ 3224 3225 #ifdef DEBUG 3226 if (drep != NULL) { 3227 if (apic_setup_io_intr(drep, reproirq, B_TRUE) != 0) { 3228 apic_deferred_setup_failures++; 3229 } 3230 } else { 3231 apic_deferred_spurious_enters++; 3232 } 3233 #else 3234 if (drep != NULL) 3235 (void) apic_setup_io_intr(drep, reproirq, B_TRUE); 3236 #endif 3237 3238 lock_clear(&apic_ioapic_lock); 3239 intr_restore(iflag); 3240 3241 lock_clear(&apic_defer_reprogram_lock); 3242 } 3243 3244 static void 3245 apic_ioapic_wait_pending_clear(int ioapic_ix, int intin_no) 3246 { 3247 int waited; 3248 3249 /* 3250 * Wait for the delivery pending bit to clear. 3251 */ 3252 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no) & 3253 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 3254 3255 /* 3256 * If we're still waiting on the delivery of this interrupt, 3257 * continue to wait here until it is delivered (this should be 3258 * a very small amount of time, but include a timeout just in 3259 * case). 3260 */ 3261 for (waited = 0; waited < apic_max_reps_clear_pending; 3262 waited++) { 3263 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3264 intin_no) & AV_PENDING) == 0) { 3265 break; 3266 } 3267 } 3268 } 3269 } 3270 3271 3272 /* 3273 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 3274 * bit set. Calls functions that modify the function that setlvlx points to, 3275 * so that the reprogramming can be retried very shortly. 3276 * 3277 * This function will mask the RDT entry if the interrupt is level-triggered. 3278 * (The caller is responsible for unmasking the RDT entry.) 3279 * 3280 * Returns non-zero if the caller should defer IOAPIC reprogramming. 3281 */ 3282 static int 3283 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 3284 int new_bind_cpu, int ioapic_ix, int intin_no, int which_irq, 3285 struct ioapic_reprogram_data *drep) 3286 { 3287 int32_t rdt_entry; 3288 int waited; 3289 int reps = 0; 3290 3291 /* 3292 * Wait for the delivery pending bit to clear. 3293 */ 3294 do { 3295 ++reps; 3296 3297 apic_ioapic_wait_pending_clear(ioapic_ix, intin_no); 3298 3299 /* 3300 * Mask the RDT entry, but only if it's a level-triggered 3301 * interrupt 3302 */ 3303 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3304 intin_no); 3305 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 3306 3307 /* Mask it */ 3308 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no, 3309 AV_MASK | rdt_entry); 3310 } 3311 3312 if ((rdt_entry & AV_LEVEL) == AV_LEVEL) { 3313 /* 3314 * If there was a race and an interrupt was injected 3315 * just before we masked, check for that case here. 3316 * Then, unmask the RDT entry and try again. If we're 3317 * on our last try, don't unmask (because we want the 3318 * RDT entry to remain masked for the rest of the 3319 * function). 3320 */ 3321 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3322 intin_no); 3323 if ((rdt_entry & AV_PENDING) && 3324 (reps < apic_max_reps_clear_pending)) { 3325 /* Unmask it */ 3326 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3327 intin_no, rdt_entry & ~AV_MASK); 3328 } 3329 } 3330 3331 } while ((rdt_entry & AV_PENDING) && 3332 (reps < apic_max_reps_clear_pending)); 3333 3334 #ifdef DEBUG 3335 if (rdt_entry & AV_PENDING) 3336 apic_intr_deliver_timeouts++; 3337 #endif 3338 3339 /* 3340 * If the remote IRR bit is set, then the interrupt has been sent 3341 * to a CPU for processing. We have no choice but to wait for 3342 * that CPU to process the interrupt, at which point the remote IRR 3343 * bit will be cleared. 3344 */ 3345 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no) & 3346 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 3347 3348 /* 3349 * If the CPU that this RDT is bound to is NOT the current 3350 * CPU, wait until that CPU handles the interrupt and ACKs 3351 * it. If this interrupt is not bound to any CPU (that is, 3352 * if it's bound to the logical destination of "anyone"), it 3353 * may have been delivered to the current CPU so handle that 3354 * case by deferring the reprogramming (below). 3355 */ 3356 if ((old_bind_cpu != IRQ_UNBOUND) && 3357 (old_bind_cpu != IRQ_UNINIT) && 3358 (old_bind_cpu != psm_get_cpu_id())) { 3359 for (waited = 0; waited < apic_max_reps_clear_pending; 3360 waited++) { 3361 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, 3362 intin_no) & AV_REMOTE_IRR) == 0) { 3363 3364 delete_defer_repro_ent(which_irq); 3365 3366 /* Remote IRR has cleared! */ 3367 return (0); 3368 } 3369 } 3370 } 3371 3372 /* 3373 * If we waited and the Remote IRR bit is still not cleared, 3374 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 3375 * times for this interrupt, try the last-ditch workaround: 3376 */ 3377 if (drep && drep->tries >= APIC_REPROGRAM_MAX_TRIES) { 3378 3379 apic_last_ditch_clear_remote_irr(ioapic_ix, intin_no); 3380 3381 /* Mark this one as reprogrammed: */ 3382 delete_defer_repro_ent(which_irq); 3383 3384 return (0); 3385 } else { 3386 #ifdef DEBUG 3387 apic_intr_deferrals++; 3388 #endif 3389 3390 /* 3391 * If waiting for the Remote IRR bit (above) didn't 3392 * allow it to clear, defer the reprogramming. 3393 * Add a new deferred-programming entry if the 3394 * caller passed a NULL one (and update the existing one 3395 * in case anything changed). 3396 */ 3397 add_defer_repro_ent(irq_ptr, which_irq, new_bind_cpu); 3398 if (drep) 3399 drep->tries++; 3400 3401 /* Inform caller to defer IOAPIC programming: */ 3402 return (1); 3403 } 3404 3405 } 3406 3407 /* Remote IRR is clear */ 3408 delete_defer_repro_ent(which_irq); 3409 3410 return (0); 3411 } 3412 3413 /* 3414 * Called to migrate all interrupts at an irq to another cpu. 3415 * Must be called with interrupts disabled and apic_ioapic_lock held 3416 */ 3417 int 3418 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu) 3419 { 3420 apic_irq_t *irqptr = irq_ptr; 3421 int retval = 0; 3422 3423 while (irqptr) { 3424 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 3425 retval |= apic_rebind(irqptr, bind_cpu, NULL); 3426 irqptr = irqptr->airq_next; 3427 } 3428 3429 return (retval); 3430 } 3431 3432 /* 3433 * apic_intr_redistribute does all the messy computations for identifying 3434 * which interrupt to move to which CPU. Currently we do just one interrupt 3435 * at a time. This reduces the time we spent doing all this within clock 3436 * interrupt. When it is done in idle, we could do more than 1. 3437 * First we find the most busy and the most free CPU (time in ISR only) 3438 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 3439 * Then we look for IRQs which are closest to the difference between the 3440 * most busy CPU and the average ISR load. We try to find one whose load 3441 * is less than difference.If none exists, then we chose one larger than the 3442 * difference, provided it does not make the most idle CPU worse than the 3443 * most busy one. In the end, we clear all the busy fields for CPUs. For 3444 * IRQs, they are cleared as they are scanned. 3445 */ 3446 void 3447 apic_intr_redistribute() 3448 { 3449 int busiest_cpu, most_free_cpu; 3450 int cpu_free, cpu_busy, max_busy, min_busy; 3451 int min_free, diff; 3452 int average_busy, cpus_online; 3453 int i, busy; 3454 ulong_t iflag; 3455 apic_cpus_info_t *cpu_infop; 3456 apic_irq_t *min_busy_irq = NULL; 3457 apic_irq_t *max_busy_irq = NULL; 3458 3459 busiest_cpu = most_free_cpu = -1; 3460 cpu_free = cpu_busy = max_busy = average_busy = 0; 3461 min_free = apic_sample_factor_redistribution; 3462 cpus_online = 0; 3463 /* 3464 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 3465 * without ioapic_lock. That is OK as we are just doing statistical 3466 * sampling anyway and any inaccuracy now will get corrected next time 3467 * The call to rebind which actually changes things will make sure 3468 * we are consistent. 3469 */ 3470 for (i = 0; i < apic_nproc; i++) { 3471 if (!(apic_redist_cpu_skip & (1 << i)) && 3472 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 3473 3474 cpu_infop = &apic_cpus[i]; 3475 /* 3476 * If no unbound interrupts or only 1 total on this 3477 * CPU, skip 3478 */ 3479 if (!cpu_infop->aci_temp_bound || 3480 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 3481 == 1) { 3482 apic_redist_cpu_skip |= 1 << i; 3483 continue; 3484 } 3485 3486 busy = cpu_infop->aci_busy; 3487 average_busy += busy; 3488 cpus_online++; 3489 if (max_busy < busy) { 3490 max_busy = busy; 3491 busiest_cpu = i; 3492 } 3493 if (min_free > busy) { 3494 min_free = busy; 3495 most_free_cpu = i; 3496 } 3497 if (busy > apic_int_busy_mark) { 3498 cpu_busy |= 1 << i; 3499 } else { 3500 if (busy < apic_int_free_mark) 3501 cpu_free |= 1 << i; 3502 } 3503 } 3504 } 3505 if ((cpu_busy && cpu_free) || 3506 (max_busy >= (min_free + apic_diff_for_redistribution))) { 3507 3508 apic_num_imbalance++; 3509 #ifdef DEBUG 3510 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 3511 prom_printf( 3512 "redistribute busy=%x free=%x max=%x min=%x", 3513 cpu_busy, cpu_free, max_busy, min_free); 3514 } 3515 #endif /* DEBUG */ 3516 3517 3518 average_busy /= cpus_online; 3519 3520 diff = max_busy - average_busy; 3521 min_busy = max_busy; /* start with the max possible value */ 3522 max_busy = 0; 3523 min_busy_irq = max_busy_irq = NULL; 3524 i = apic_min_device_irq; 3525 for (; i <= apic_max_device_irq; i++) { 3526 apic_irq_t *irq_ptr; 3527 /* Change to linked list per CPU ? */ 3528 if ((irq_ptr = apic_irq_table[i]) == NULL) 3529 continue; 3530 /* Check for irq_busy & decide which one to move */ 3531 /* Also zero them for next round */ 3532 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 3533 irq_ptr->airq_busy) { 3534 if (irq_ptr->airq_busy < diff) { 3535 /* 3536 * Check for least busy CPU, 3537 * best fit or what ? 3538 */ 3539 if (max_busy < irq_ptr->airq_busy) { 3540 /* 3541 * Most busy within the 3542 * required differential 3543 */ 3544 max_busy = irq_ptr->airq_busy; 3545 max_busy_irq = irq_ptr; 3546 } 3547 } else { 3548 if (min_busy > irq_ptr->airq_busy) { 3549 /* 3550 * least busy, but more than 3551 * the reqd diff 3552 */ 3553 if (min_busy < 3554 (diff + average_busy - 3555 min_free)) { 3556 /* 3557 * Making sure new cpu 3558 * will not end up 3559 * worse 3560 */ 3561 min_busy = 3562 irq_ptr->airq_busy; 3563 3564 min_busy_irq = irq_ptr; 3565 } 3566 } 3567 } 3568 } 3569 irq_ptr->airq_busy = 0; 3570 } 3571 3572 if (max_busy_irq != NULL) { 3573 #ifdef DEBUG 3574 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 3575 prom_printf("rebinding %x to %x", 3576 max_busy_irq->airq_vector, most_free_cpu); 3577 } 3578 #endif /* DEBUG */ 3579 iflag = intr_clear(); 3580 if (lock_try(&apic_ioapic_lock)) { 3581 if (apic_rebind_all(max_busy_irq, 3582 most_free_cpu) == 0) { 3583 /* Make change permenant */ 3584 max_busy_irq->airq_cpu = 3585 (uint32_t)most_free_cpu; 3586 } 3587 lock_clear(&apic_ioapic_lock); 3588 } 3589 intr_restore(iflag); 3590 3591 } else if (min_busy_irq != NULL) { 3592 #ifdef DEBUG 3593 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 3594 prom_printf("rebinding %x to %x", 3595 min_busy_irq->airq_vector, most_free_cpu); 3596 } 3597 #endif /* DEBUG */ 3598 3599 iflag = intr_clear(); 3600 if (lock_try(&apic_ioapic_lock)) { 3601 if (apic_rebind_all(min_busy_irq, 3602 most_free_cpu) == 0) { 3603 /* Make change permenant */ 3604 min_busy_irq->airq_cpu = 3605 (uint32_t)most_free_cpu; 3606 } 3607 lock_clear(&apic_ioapic_lock); 3608 } 3609 intr_restore(iflag); 3610 3611 } else { 3612 if (cpu_busy != (1 << busiest_cpu)) { 3613 apic_redist_cpu_skip |= 1 << busiest_cpu; 3614 /* 3615 * We leave cpu_skip set so that next time we 3616 * can choose another cpu 3617 */ 3618 } 3619 } 3620 apic_num_rebind++; 3621 } else { 3622 /* 3623 * found nothing. Could be that we skipped over valid CPUs 3624 * or we have balanced everything. If we had a variable 3625 * ticks_for_redistribution, it could be increased here. 3626 * apic_int_busy, int_free etc would also need to be 3627 * changed. 3628 */ 3629 if (apic_redist_cpu_skip) 3630 apic_redist_cpu_skip = 0; 3631 } 3632 for (i = 0; i < apic_nproc; i++) { 3633 apic_cpus[i].aci_busy = 0; 3634 } 3635 } 3636 3637 void 3638 apic_cleanup_busy() 3639 { 3640 int i; 3641 apic_irq_t *irq_ptr; 3642 3643 for (i = 0; i < apic_nproc; i++) { 3644 apic_cpus[i].aci_busy = 0; 3645 } 3646 3647 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3648 if ((irq_ptr = apic_irq_table[i]) != NULL) 3649 irq_ptr->airq_busy = 0; 3650 } 3651 } 3652 3653 3654 static int 3655 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 3656 int ipin, int *pci_irqp, iflag_t *intr_flagp) 3657 { 3658 3659 int status; 3660 acpi_psm_lnk_t acpipsmlnk; 3661 3662 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 3663 intr_flagp)) == ACPI_PSM_SUCCESS) { 3664 APIC_VERBOSE_IRQ((CE_CONT, "!%s: Found irqno %d " 3665 "from cache for device %s, instance #%d\n", psm_name, 3666 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 3667 return (status); 3668 } 3669 3670 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 3671 3672 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 3673 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 3674 APIC_VERBOSE_IRQ((CE_WARN, "%s: " 3675 " acpi_translate_pci_irq failed for device %s, instance" 3676 " #%d", psm_name, ddi_get_name(dip), 3677 ddi_get_instance(dip))); 3678 return (status); 3679 } 3680 3681 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 3682 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 3683 intr_flagp); 3684 if (status != ACPI_PSM_SUCCESS) { 3685 status = acpi_get_current_irq_resource(&acpipsmlnk, 3686 pci_irqp, intr_flagp); 3687 } 3688 } 3689 3690 if (status == ACPI_PSM_SUCCESS) { 3691 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 3692 intr_flagp, &acpipsmlnk); 3693 3694 APIC_VERBOSE_IRQ((CE_CONT, "%s: [ACPI] " 3695 "new irq %d for device %s, instance #%d\n", psm_name, 3696 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 3697 } 3698 3699 return (status); 3700 } 3701 3702 /* 3703 * Adds an entry to the irq list passed in, and returns the new list. 3704 * Entries are added in priority order (lower numerical priorities are 3705 * placed closer to the head of the list) 3706 */ 3707 static prs_irq_list_t * 3708 acpi_insert_prs_irq_ent(prs_irq_list_t *listp, int priority, int irq, 3709 iflag_t *iflagp, acpi_prs_private_t *prsprvp) 3710 { 3711 struct prs_irq_list_ent *newent, *prevp = NULL, *origlistp; 3712 3713 newent = kmem_zalloc(sizeof (struct prs_irq_list_ent), KM_SLEEP); 3714 3715 newent->list_prio = priority; 3716 newent->irq = irq; 3717 newent->intrflags = *iflagp; 3718 newent->prsprv = *prsprvp; 3719 /* ->next is NULL from kmem_zalloc */ 3720 3721 /* 3722 * New list -- return the new entry as the list. 3723 */ 3724 if (listp == NULL) 3725 return (newent); 3726 3727 /* 3728 * Save original list pointer for return (since we're not modifying 3729 * the head) 3730 */ 3731 origlistp = listp; 3732 3733 /* 3734 * Insertion sort, with entries with identical keys stored AFTER 3735 * existing entries (the less-than-or-equal test of priority does 3736 * this for us). 3737 */ 3738 while (listp != NULL && listp->list_prio <= priority) { 3739 prevp = listp; 3740 listp = listp->next; 3741 } 3742 3743 newent->next = listp; 3744 3745 if (prevp == NULL) { /* Add at head of list (newent is the new head) */ 3746 return (newent); 3747 } else { 3748 prevp->next = newent; 3749 return (origlistp); 3750 } 3751 } 3752 3753 /* 3754 * Frees the list passed in, deallocating all memory and leaving *listpp 3755 * set to NULL. 3756 */ 3757 static void 3758 acpi_destroy_prs_irq_list(prs_irq_list_t **listpp) 3759 { 3760 struct prs_irq_list_ent *nextp; 3761 3762 ASSERT(listpp != NULL); 3763 3764 while (*listpp != NULL) { 3765 nextp = (*listpp)->next; 3766 kmem_free(*listpp, sizeof (struct prs_irq_list_ent)); 3767 *listpp = nextp; 3768 } 3769 } 3770 3771 /* 3772 * apic_choose_irqs_from_prs returns a list of irqs selected from the list of 3773 * irqs returned by the link device's _PRS method. The irqs are chosen 3774 * to minimize contention in situations where the interrupt link device 3775 * can be programmed to steer interrupts to different interrupt controller 3776 * inputs (some of which may already be in use). The list is sorted in order 3777 * of irqs to use, with the highest priority given to interrupt controller 3778 * inputs that are not shared. When an interrupt controller input 3779 * must be shared, apic_choose_irqs_from_prs adds the possible irqs to the 3780 * returned list in the order that minimizes sharing (thereby ensuring lowest 3781 * possible latency from interrupt trigger time to ISR execution time). 3782 */ 3783 static prs_irq_list_t * 3784 apic_choose_irqs_from_prs(acpi_irqlist_t *irqlistent, dev_info_t *dip, 3785 int crs_irq) 3786 { 3787 int32_t irq; 3788 int i; 3789 prs_irq_list_t *prsirqlistp = NULL; 3790 iflag_t iflags; 3791 3792 while (irqlistent != NULL) { 3793 irqlistent->intr_flags.bustype = BUS_PCI; 3794 3795 for (i = 0; i < irqlistent->num_irqs; i++) { 3796 3797 irq = irqlistent->irqs[i]; 3798 3799 if (irq <= 0) { 3800 /* invalid irq number */ 3801 continue; 3802 } 3803 3804 if ((irq < 16) && (apic_reserved_irqlist[irq])) 3805 continue; 3806 3807 if ((apic_irq_table[irq] == NULL) || 3808 (apic_irq_table[irq]->airq_dip == dip)) { 3809 3810 prsirqlistp = acpi_insert_prs_irq_ent( 3811 prsirqlistp, 0 /* Highest priority */, irq, 3812 &irqlistent->intr_flags, 3813 &irqlistent->acpi_prs_prv); 3814 3815 /* 3816 * If we do not prefer the current irq from _CRS 3817 * or if we do and this irq is the same as the 3818 * current irq from _CRS, this is the one 3819 * to pick. 3820 */ 3821 if (!(apic_prefer_crs) || (irq == crs_irq)) { 3822 return (prsirqlistp); 3823 } 3824 continue; 3825 } 3826 3827 /* 3828 * Edge-triggered interrupts cannot be shared 3829 */ 3830 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 3831 continue; 3832 3833 /* 3834 * To work around BIOSes that contain incorrect 3835 * interrupt polarity information in interrupt 3836 * descriptors returned by _PRS, we assume that 3837 * the polarity of the other device sharing this 3838 * interrupt controller input is compatible. 3839 * If it's not, the caller will catch it when 3840 * the caller invokes the link device's _CRS method 3841 * (after invoking its _SRS method). 3842 */ 3843 iflags = irqlistent->intr_flags; 3844 iflags.intr_po = 3845 apic_irq_table[irq]->airq_iflag.intr_po; 3846 3847 if (!acpi_intr_compatible(iflags, 3848 apic_irq_table[irq]->airq_iflag)) { 3849 APIC_VERBOSE_IRQ((CE_CONT, "!%s: irq %d " 3850 "not compatible [%x:%x:%x !~ %x:%x:%x]", 3851 psm_name, irq, 3852 iflags.intr_po, 3853 iflags.intr_el, 3854 iflags.bustype, 3855 apic_irq_table[irq]->airq_iflag.intr_po, 3856 apic_irq_table[irq]->airq_iflag.intr_el, 3857 apic_irq_table[irq]->airq_iflag.bustype)); 3858 continue; 3859 } 3860 3861 /* 3862 * If we prefer the irq from _CRS, no need 3863 * to search any further (and make sure 3864 * to add this irq with the highest priority 3865 * so it's tried first). 3866 */ 3867 if (crs_irq == irq && apic_prefer_crs) { 3868 3869 return (acpi_insert_prs_irq_ent( 3870 prsirqlistp, 3871 0 /* Highest priority */, 3872 irq, &iflags, 3873 &irqlistent->acpi_prs_prv)); 3874 } 3875 3876 /* 3877 * Priority is equal to the share count (lower 3878 * share count is higher priority). Note that 3879 * the intr flags passed in here are the ones we 3880 * changed above -- if incorrect, it will be 3881 * caught by the caller's _CRS flags comparison. 3882 */ 3883 prsirqlistp = acpi_insert_prs_irq_ent( 3884 prsirqlistp, 3885 apic_irq_table[irq]->airq_share, irq, 3886 &iflags, &irqlistent->acpi_prs_prv); 3887 } 3888 3889 /* Go to the next irqlist entry */ 3890 irqlistent = irqlistent->next; 3891 } 3892 3893 return (prsirqlistp); 3894 } 3895 3896 /* 3897 * Configures the irq for the interrupt link device identified by 3898 * acpipsmlnkp. 3899 * 3900 * Gets the current and the list of possible irq settings for the 3901 * device. If apic_unconditional_srs is not set, and the current 3902 * resource setting is in the list of possible irq settings, 3903 * current irq resource setting is passed to the caller. 3904 * 3905 * Otherwise, picks an irq number from the list of possible irq 3906 * settings, and sets the irq of the device to this value. 3907 * If prefer_crs is set, among a set of irq numbers in the list that have 3908 * the least number of devices sharing the interrupt, we pick current irq 3909 * resource setting if it is a member of this set. 3910 * 3911 * Passes the irq number in the value pointed to by pci_irqp, and 3912 * polarity and sensitivity in the structure pointed to by dipintrflagp 3913 * to the caller. 3914 * 3915 * Note that if setting the irq resource failed, but successfuly obtained 3916 * the current irq resource settings, passes the current irq resources 3917 * and considers it a success. 3918 * 3919 * Returns: 3920 * ACPI_PSM_SUCCESS on success. 3921 * 3922 * ACPI_PSM_FAILURE if an error occured during the configuration or 3923 * if a suitable irq was not found for this device, or if setting the 3924 * irq resource and obtaining the current resource fails. 3925 * 3926 */ 3927 static int 3928 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 3929 int *pci_irqp, iflag_t *dipintr_flagp) 3930 { 3931 int32_t irq; 3932 int cur_irq = -1; 3933 acpi_irqlist_t *irqlistp; 3934 prs_irq_list_t *prs_irq_listp, *prs_irq_entp; 3935 boolean_t found_irq = B_FALSE; 3936 3937 dipintr_flagp->bustype = BUS_PCI; 3938 3939 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 3940 == ACPI_PSM_FAILURE) { 3941 APIC_VERBOSE_IRQ((CE_WARN, "!%s: Unable to determine " 3942 "or assign IRQ for device %s, instance #%d: The system was " 3943 "unable to get the list of potential IRQs from ACPI.", 3944 psm_name, ddi_get_name(dip), ddi_get_instance(dip))); 3945 3946 return (ACPI_PSM_FAILURE); 3947 } 3948 3949 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 3950 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 3951 (cur_irq > 0)) { 3952 /* 3953 * If an IRQ is set in CRS and that IRQ exists in the set 3954 * returned from _PRS, return that IRQ, otherwise print 3955 * a warning 3956 */ 3957 3958 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 3959 == ACPI_PSM_SUCCESS) { 3960 3961 ASSERT(pci_irqp != NULL); 3962 *pci_irqp = cur_irq; 3963 acpi_free_irqlist(irqlistp); 3964 return (ACPI_PSM_SUCCESS); 3965 } 3966 3967 APIC_VERBOSE_IRQ((CE_WARN, "!%s: Could not find the " 3968 "current irq %d for device %s, instance #%d in ACPI's " 3969 "list of possible irqs for this device. Picking one from " 3970 " the latter list.", psm_name, cur_irq, ddi_get_name(dip), 3971 ddi_get_instance(dip))); 3972 } 3973 3974 if ((prs_irq_listp = apic_choose_irqs_from_prs(irqlistp, dip, 3975 cur_irq)) == NULL) { 3976 3977 APIC_VERBOSE_IRQ((CE_WARN, "!%s: Could not find a " 3978 "suitable irq from the list of possible irqs for device " 3979 "%s, instance #%d in ACPI's list of possible irqs", 3980 psm_name, ddi_get_name(dip), ddi_get_instance(dip))); 3981 3982 acpi_free_irqlist(irqlistp); 3983 return (ACPI_PSM_FAILURE); 3984 } 3985 3986 acpi_free_irqlist(irqlistp); 3987 3988 for (prs_irq_entp = prs_irq_listp; 3989 prs_irq_entp != NULL && found_irq == B_FALSE; 3990 prs_irq_entp = prs_irq_entp->next) { 3991 3992 acpipsmlnkp->acpi_prs_prv = prs_irq_entp->prsprv; 3993 irq = prs_irq_entp->irq; 3994 3995 APIC_VERBOSE_IRQ((CE_CONT, "!%s: Setting irq %d for " 3996 "device %s instance #%d\n", psm_name, irq, 3997 ddi_get_name(dip), ddi_get_instance(dip))); 3998 3999 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) 4000 == ACPI_PSM_SUCCESS) { 4001 /* 4002 * setting irq was successful, check to make sure CRS 4003 * reflects that. If CRS does not agree with what we 4004 * set, return the irq that was set. 4005 */ 4006 4007 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 4008 dipintr_flagp) == ACPI_PSM_SUCCESS) { 4009 4010 if (cur_irq != irq) 4011 APIC_VERBOSE_IRQ((CE_WARN, 4012 "!%s: IRQ resource set " 4013 "(irqno %d) for device %s " 4014 "instance #%d, differs from " 4015 "current setting irqno %d", 4016 psm_name, irq, ddi_get_name(dip), 4017 ddi_get_instance(dip), cur_irq)); 4018 } else { 4019 /* 4020 * On at least one system, there was a bug in 4021 * a DSDT method called by _STA, causing _STA to 4022 * indicate that the link device was disabled 4023 * (when, in fact, it was enabled). Since _SRS 4024 * succeeded, assume that _CRS is lying and use 4025 * the iflags from this _PRS interrupt choice. 4026 * If we're wrong about the flags, the polarity 4027 * will be incorrect and we may get an interrupt 4028 * storm, but there's not much else we can do 4029 * at this point. 4030 */ 4031 *dipintr_flagp = prs_irq_entp->intrflags; 4032 } 4033 4034 /* 4035 * Return the irq that was set, and not what _CRS 4036 * reports, since _CRS has been seen to return 4037 * different IRQs than what was passed to _SRS on some 4038 * systems (and just not return successfully on others). 4039 */ 4040 cur_irq = irq; 4041 found_irq = B_TRUE; 4042 } else { 4043 APIC_VERBOSE_IRQ((CE_WARN, "!%s: set resource " 4044 "irq %d failed for device %s instance #%d", 4045 psm_name, irq, ddi_get_name(dip), 4046 ddi_get_instance(dip))); 4047 4048 if (cur_irq == -1) { 4049 acpi_destroy_prs_irq_list(&prs_irq_listp); 4050 return (ACPI_PSM_FAILURE); 4051 } 4052 } 4053 } 4054 4055 acpi_destroy_prs_irq_list(&prs_irq_listp); 4056 4057 if (!found_irq) 4058 return (ACPI_PSM_FAILURE); 4059 4060 ASSERT(pci_irqp != NULL); 4061 *pci_irqp = cur_irq; 4062 return (ACPI_PSM_SUCCESS); 4063 } 4064 4065 void 4066 ioapic_disable_redirection() 4067 { 4068 int ioapic_ix; 4069 int intin_max; 4070 int intin_ix; 4071 4072 /* Disable the I/O APIC redirection entries */ 4073 for (ioapic_ix = 0; ioapic_ix < apic_io_max; ioapic_ix++) { 4074 4075 /* Bits 23-16 define the maximum redirection entries */ 4076 intin_max = (ioapic_read(ioapic_ix, APIC_VERS_CMD) >> 16) 4077 & 0xff; 4078 4079 for (intin_ix = 0; intin_ix <= intin_max; intin_ix++) { 4080 /* 4081 * The assumption here is that this is safe, even for 4082 * systems with IOAPICs that suffer from the hardware 4083 * erratum because all devices have been quiesced before 4084 * this function is called from apic_shutdown() 4085 * (or equivalent). If that assumption turns out to be 4086 * false, this mask operation can induce the same 4087 * erratum result we're trying to avoid. 4088 */ 4089 ioapic_write(ioapic_ix, APIC_RDT_CMD + 2 * intin_ix, 4090 AV_MASK); 4091 } 4092 } 4093 } 4094 4095 /* 4096 * Looks for an IOAPIC with the specified physical address in the /ioapics 4097 * node in the device tree (created by the PCI enumerator). 4098 */ 4099 static boolean_t 4100 apic_is_ioapic_AMD_813x(uint32_t physaddr) 4101 { 4102 /* 4103 * Look in /ioapics, for the ioapic with 4104 * the physical address given 4105 */ 4106 dev_info_t *ioapicsnode = ddi_find_devinfo(IOAPICS_NODE_NAME, -1, 0); 4107 dev_info_t *ioapic_child; 4108 boolean_t rv = B_FALSE; 4109 int vid, did; 4110 uint64_t ioapic_paddr; 4111 boolean_t done = B_FALSE; 4112 4113 if (ioapicsnode == NULL) 4114 return (B_FALSE); 4115 4116 /* Load first child: */ 4117 ioapic_child = ddi_get_child(ioapicsnode); 4118 while (!done && ioapic_child != 0) { /* Iterate over children */ 4119 4120 if ((ioapic_paddr = (uint64_t)ddi_prop_get_int64(DDI_DEV_T_ANY, 4121 ioapic_child, DDI_PROP_DONTPASS, "reg", 0)) 4122 != 0 && physaddr == ioapic_paddr) { 4123 4124 vid = ddi_prop_get_int(DDI_DEV_T_ANY, ioapic_child, 4125 DDI_PROP_DONTPASS, IOAPICS_PROP_VENID, 0); 4126 4127 if (vid == VENID_AMD) { 4128 4129 did = ddi_prop_get_int(DDI_DEV_T_ANY, 4130 ioapic_child, DDI_PROP_DONTPASS, 4131 IOAPICS_PROP_DEVID, 0); 4132 4133 if (did == DEVID_8131_IOAPIC || 4134 did == DEVID_8132_IOAPIC) { 4135 4136 rv = B_TRUE; 4137 done = B_TRUE; 4138 } 4139 } 4140 } 4141 4142 if (!done) 4143 ioapic_child = ddi_get_next_sibling(ioapic_child); 4144 } 4145 4146 /* The ioapics node was held by ddi_find_devinfo, so release it */ 4147 ndi_rele_devi(ioapicsnode); 4148 return (rv); 4149 } 4150 4151 struct apic_state { 4152 int32_t as_task_reg; 4153 int32_t as_dest_reg; 4154 int32_t as_format_reg; 4155 int32_t as_local_timer; 4156 int32_t as_pcint_vect; 4157 int32_t as_int_vect0; 4158 int32_t as_int_vect1; 4159 int32_t as_err_vect; 4160 int32_t as_init_count; 4161 int32_t as_divide_reg; 4162 int32_t as_spur_int_reg; 4163 uint32_t as_ioapic_ids[MAX_IO_APIC]; 4164 }; 4165 4166 4167 static int 4168 apic_acpi_enter_apicmode(void) 4169 { 4170 ACPI_OBJECT_LIST arglist; 4171 ACPI_OBJECT arg; 4172 ACPI_STATUS status; 4173 4174 /* Setup parameter object */ 4175 arglist.Count = 1; 4176 arglist.Pointer = &arg; 4177 arg.Type = ACPI_TYPE_INTEGER; 4178 arg.Integer.Value = ACPI_APIC_MODE; 4179 4180 status = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 4181 if (ACPI_FAILURE(status)) 4182 return (PSM_FAILURE); 4183 else 4184 return (PSM_SUCCESS); 4185 } 4186 4187 4188 static void 4189 apic_save_state(struct apic_state *sp) 4190 { 4191 int i; 4192 ulong_t iflag; 4193 4194 PMD(PMD_SX, ("apic_save_state %p\n", (void *)sp)) 4195 /* 4196 * First the local APIC. 4197 */ 4198 sp->as_task_reg = apic_reg_ops->apic_get_pri(); 4199 sp->as_dest_reg = apic_reg_ops->apic_read(APIC_DEST_REG); 4200 if (apic_mode == LOCAL_APIC) 4201 sp->as_format_reg = apic_reg_ops->apic_read(APIC_FORMAT_REG); 4202 sp->as_local_timer = apic_reg_ops->apic_read(APIC_LOCAL_TIMER); 4203 sp->as_pcint_vect = apic_reg_ops->apic_read(APIC_PCINT_VECT); 4204 sp->as_int_vect0 = apic_reg_ops->apic_read(APIC_INT_VECT0); 4205 sp->as_int_vect1 = apic_reg_ops->apic_read(APIC_INT_VECT1); 4206 sp->as_err_vect = apic_reg_ops->apic_read(APIC_ERR_VECT); 4207 sp->as_init_count = apic_reg_ops->apic_read(APIC_INIT_COUNT); 4208 sp->as_divide_reg = apic_reg_ops->apic_read(APIC_DIVIDE_REG); 4209 sp->as_spur_int_reg = apic_reg_ops->apic_read(APIC_SPUR_INT_REG); 4210 4211 /* 4212 * If on the boot processor then save the IOAPICs' IDs 4213 */ 4214 if (psm_get_cpu_id() == 0) { 4215 4216 iflag = intr_clear(); 4217 lock_set(&apic_ioapic_lock); 4218 4219 for (i = 0; i < apic_io_max; i++) 4220 sp->as_ioapic_ids[i] = ioapic_read(i, APIC_ID_CMD); 4221 4222 lock_clear(&apic_ioapic_lock); 4223 intr_restore(iflag); 4224 } 4225 } 4226 4227 static void 4228 apic_restore_state(struct apic_state *sp) 4229 { 4230 int i; 4231 ulong_t iflag; 4232 4233 /* 4234 * First the local APIC. 4235 */ 4236 apic_reg_ops->apic_write_task_reg(sp->as_task_reg); 4237 if (apic_mode == LOCAL_APIC) { 4238 apic_reg_ops->apic_write(APIC_DEST_REG, sp->as_dest_reg); 4239 apic_reg_ops->apic_write(APIC_FORMAT_REG, sp->as_format_reg); 4240 } 4241 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, sp->as_local_timer); 4242 apic_reg_ops->apic_write(APIC_PCINT_VECT, sp->as_pcint_vect); 4243 apic_reg_ops->apic_write(APIC_INT_VECT0, sp->as_int_vect0); 4244 apic_reg_ops->apic_write(APIC_INT_VECT1, sp->as_int_vect1); 4245 apic_reg_ops->apic_write(APIC_ERR_VECT, sp->as_err_vect); 4246 apic_reg_ops->apic_write(APIC_INIT_COUNT, sp->as_init_count); 4247 apic_reg_ops->apic_write(APIC_DIVIDE_REG, sp->as_divide_reg); 4248 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, sp->as_spur_int_reg); 4249 4250 /* 4251 * the following only needs to be done once, so we do it on the 4252 * boot processor, since we know that we only have one of those 4253 */ 4254 if (psm_get_cpu_id() == 0) { 4255 4256 iflag = intr_clear(); 4257 lock_set(&apic_ioapic_lock); 4258 4259 /* Restore IOAPICs' APIC IDs */ 4260 for (i = 0; i < apic_io_max; i++) { 4261 ioapic_write(i, APIC_ID_CMD, sp->as_ioapic_ids[i]); 4262 } 4263 4264 lock_clear(&apic_ioapic_lock); 4265 intr_restore(iflag); 4266 4267 /* 4268 * Reenter APIC mode before restoring LNK devices 4269 */ 4270 (void) apic_acpi_enter_apicmode(); 4271 4272 /* 4273 * restore acpi link device mappings 4274 */ 4275 acpi_restore_link_devices(); 4276 } 4277 } 4278 4279 /* 4280 * Returns 0 on success 4281 */ 4282 int 4283 apic_state(psm_state_request_t *rp) 4284 { 4285 PMD(PMD_SX, ("apic_state ")) 4286 switch (rp->psr_cmd) { 4287 case PSM_STATE_ALLOC: 4288 rp->req.psm_state_req.psr_state = 4289 kmem_zalloc(sizeof (struct apic_state), KM_NOSLEEP); 4290 if (rp->req.psm_state_req.psr_state == NULL) 4291 return (ENOMEM); 4292 rp->req.psm_state_req.psr_state_size = 4293 sizeof (struct apic_state); 4294 PMD(PMD_SX, (":STATE_ALLOC: state %p, size %lx\n", 4295 rp->req.psm_state_req.psr_state, 4296 rp->req.psm_state_req.psr_state_size)) 4297 return (0); 4298 4299 case PSM_STATE_FREE: 4300 kmem_free(rp->req.psm_state_req.psr_state, 4301 rp->req.psm_state_req.psr_state_size); 4302 PMD(PMD_SX, (" STATE_FREE: state %p, size %lx\n", 4303 rp->req.psm_state_req.psr_state, 4304 rp->req.psm_state_req.psr_state_size)) 4305 return (0); 4306 4307 case PSM_STATE_SAVE: 4308 PMD(PMD_SX, (" STATE_SAVE: state %p, size %lx\n", 4309 rp->req.psm_state_req.psr_state, 4310 rp->req.psm_state_req.psr_state_size)) 4311 apic_save_state(rp->req.psm_state_req.psr_state); 4312 return (0); 4313 4314 case PSM_STATE_RESTORE: 4315 apic_restore_state(rp->req.psm_state_req.psr_state); 4316 PMD(PMD_SX, (" STATE_RESTORE: state %p, size %lx\n", 4317 rp->req.psm_state_req.psr_state, 4318 rp->req.psm_state_req.psr_state_size)) 4319 return (0); 4320 4321 default: 4322 return (EINVAL); 4323 } 4324 } 4325