1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 31 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 32 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 33 * PSMI 1.5 extensions are supported in Solaris Nevada. 34 */ 35 #define PSMI_1_5 36 37 #include <sys/processor.h> 38 #include <sys/time.h> 39 #include <sys/psm.h> 40 #include <sys/smp_impldefs.h> 41 #include <sys/cram.h> 42 #include <sys/acpi/acpi.h> 43 #include <sys/acpica.h> 44 #include <sys/psm_common.h> 45 #include "apic.h" 46 #include <sys/pit.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/ddi_impldefs.h> 50 #include <sys/pci.h> 51 #include <sys/promif.h> 52 #include <sys/x86_archext.h> 53 #include <sys/cpc_impl.h> 54 #include <sys/uadmin.h> 55 #include <sys/panic.h> 56 #include <sys/debug.h> 57 #include <sys/archsystm.h> 58 #include <sys/trap.h> 59 #include <sys/machsystm.h> 60 #include <sys/cpuvar.h> 61 #include <sys/rm_platter.h> 62 #include <sys/privregs.h> 63 #include <sys/cyclic.h> 64 #include <sys/note.h> 65 #include <sys/pci_intr_lib.h> 66 67 /* 68 * Local Function Prototypes 69 */ 70 static void apic_init_intr(); 71 static void apic_ret(); 72 static int apic_handle_defconf(); 73 static int apic_parse_mpct(caddr_t mpct, int bypass); 74 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 75 static int apic_checksum(caddr_t bptr, int len); 76 static int get_apic_cmd1(); 77 static int get_apic_pri(); 78 static int apic_find_bus_type(char *bus); 79 static int apic_find_bus(int busid); 80 static int apic_find_bus_id(int bustype); 81 static struct apic_io_intr *apic_find_io_intr(int irqno); 82 int apic_allocate_irq(int irq); 83 static int apic_find_free_irq(int start, int end); 84 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 85 static void apic_modify_vector(uchar_t vector, int irq); 86 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 87 static uchar_t apic_xlate_vector(uchar_t oldvector); 88 static void apic_xlate_vector_free_timeout_handler(void *arg); 89 static void apic_free_vector(uchar_t vector); 90 static void apic_reprogram_timeout_handler(void *arg); 91 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 92 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq); 93 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq); 94 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq); 95 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 96 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 97 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 98 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 99 int child_ipin, struct apic_io_intr **intrp); 100 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 101 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 102 int type); 103 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 104 iflag_t *intr_flagp); 105 static void apic_nmi_intr(caddr_t arg); 106 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 107 uchar_t intin); 108 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, 109 int when); 110 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe); 111 static void apic_intr_redistribute(); 112 static void apic_cleanup_busy(); 113 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 114 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 115 116 /* ACPI support routines */ 117 static int acpi_probe(void); 118 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 119 int *pci_irqp, iflag_t *intr_flagp); 120 121 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 122 int ipin, int *pci_irqp, iflag_t *intr_flagp); 123 static uchar_t acpi_find_ioapic(int irq); 124 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 125 126 /* 127 * standard MP entries 128 */ 129 static int apic_probe(); 130 static int apic_clkinit(); 131 static int apic_getclkirq(int ipl); 132 static uint_t apic_calibrate(volatile uint32_t *addr, 133 uint16_t *pit_ticks_adj); 134 static hrtime_t apic_gettime(); 135 static hrtime_t apic_gethrtime(); 136 static void apic_init(); 137 static void apic_picinit(void); 138 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 139 static int apic_post_cpu_start(void); 140 static void apic_send_ipi(int cpun, int ipl); 141 static void apic_set_softintr(int softintr); 142 static void apic_set_idlecpu(processorid_t cpun); 143 static void apic_unset_idlecpu(processorid_t cpun); 144 static int apic_softlvl_to_irq(int ipl); 145 static int apic_intr_enter(int ipl, int *vect); 146 static void apic_intr_exit(int ipl, int vect); 147 static void apic_setspl(int ipl); 148 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 149 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 150 static void apic_shutdown(int cmd, int fcn); 151 static void apic_preshutdown(int cmd, int fcn); 152 static int apic_disable_intr(processorid_t cpun); 153 static void apic_enable_intr(processorid_t cpun); 154 static processorid_t apic_get_next_processorid(processorid_t cpun); 155 static int apic_get_ipivect(int ipl, int type); 156 static void apic_timer_reprogram(hrtime_t time); 157 static void apic_timer_enable(void); 158 static void apic_timer_disable(void); 159 static void apic_post_cyclic_setup(void *arg); 160 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 161 psm_intr_op_t, int *); 162 163 static int apic_oneshot = 0; 164 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 165 166 /* 167 * These variables are frequently accessed in apic_intr_enter(), 168 * apic_intr_exit and apic_setspl, so group them together 169 */ 170 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 171 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 172 int apic_clkvect; 173 174 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 175 int apic_sci_vect = -1; 176 iflag_t apic_sci_flags; 177 178 /* vector at which error interrupts come in */ 179 int apic_errvect; 180 int apic_enable_error_intr = 1; 181 int apic_error_display_delay = 100; 182 183 /* vector at which performance counter overflow interrupts come in */ 184 int apic_cpcovf_vect; 185 int apic_enable_cpcovf_intr = 1; 186 187 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */ 188 static int apic_max_usecs_clear_pending = 1000; 189 190 /* Amt of usecs to wait before checking if RDT flags have reset. */ 191 #define APIC_USECS_PER_WAIT_INTERVAL 100 192 193 /* Maximum number of times to retry reprogramming via the timeout */ 194 #define APIC_REPROGRAM_MAX_TIMEOUTS 10 195 196 /* timeout delay for IOAPIC delayed reprogramming */ 197 #define APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */ 198 199 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */ 200 #define DEFERRED 1 201 #define IMMEDIATE 0 202 203 /* 204 * number of bits per byte, from <sys/param.h> 205 */ 206 #define UCHAR_MAX ((1 << NBBY) - 1) 207 208 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ]; 209 210 /* 211 * The following vector assignments influence the value of ipltopri and 212 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 213 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 214 * we care to do so in future. Note some IPLs which are rarely used 215 * will share the vector ranges and heavily used IPLs (5 and 6) have 216 * a wide range. 217 * IPL Vector range. as passed to intr_enter 218 * 0 none. 219 * 1,2,3 0x20-0x2f 0x0-0xf 220 * 4 0x30-0x3f 0x10-0x1f 221 * 5 0x40-0x5f 0x20-0x3f 222 * 6 0x60-0x7f 0x40-0x5f 223 * 7,8,9 0x80-0x8f 0x60-0x6f 224 * 10 0x90-0x9f 0x70-0x7f 225 * 11 0xa0-0xaf 0x80-0x8f 226 * ... ... 227 * 16 0xf0-0xff 0xd0-0xdf 228 */ 229 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 230 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 231 }; 232 /* 233 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 234 * NOTE that this is vector as passed into intr_enter which is 235 * programmed vector - 0x20 (APIC_BASE_VECT) 236 */ 237 238 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 239 /* The taskpri to be programmed into apic to mask given ipl */ 240 241 #if defined(__amd64) 242 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 243 #endif 244 245 /* 246 * Patchable global variables. 247 */ 248 int apic_forceload = 0; 249 250 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 251 #define INTR_ROUND_ROBIN 1 252 #define INTR_LOWEST_PRIORITY 2 253 254 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 255 256 static int apic_next_bind_cpu = 2; /* For round robin assignment */ 257 /* start with cpu 1 */ 258 259 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 260 /* 1 - use gettime() for performance */ 261 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 262 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 263 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 264 int apic_panic_on_nmi = 0; 265 int apic_panic_on_apic_error = 0; 266 267 int apic_verbose = 0; 268 269 /* Flag definitions for apic_verbose */ 270 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 271 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 272 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 273 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 274 275 276 #define APIC_VERBOSE_IOAPIC(fmt) \ 277 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 278 cmn_err fmt; 279 280 #define APIC_VERBOSE_IRQ(fmt) \ 281 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 282 cmn_err fmt; 283 284 #define APIC_VERBOSE_POWEROFF(fmt) \ 285 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 286 prom_printf fmt; 287 288 289 /* Now the ones for Dynamic Interrupt distribution */ 290 int apic_enable_dynamic_migration = 0; 291 292 /* 293 * If enabled, the distribution works as follows: 294 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 295 * and the irq corresponding to the ipl is also set in the aci_current array. 296 * interrupt exit and setspl (due to soft interrupts) will cause the current 297 * ipl to be be changed. This is cache friendly as these frequently used 298 * paths write into a per cpu structure. 299 * 300 * Sampling is done by checking the structures for all CPUs and incrementing 301 * the busy field of the irq (if any) executing on each CPU and the busy field 302 * of the corresponding CPU. 303 * In periodic mode this is done on every clock interrupt. 304 * In one-shot mode, this is done thru a cyclic with an interval of 305 * apic_redistribute_sample_interval (default 10 milli sec). 306 * 307 * Every apic_sample_factor_redistribution times we sample, we do computations 308 * to decide which interrupt needs to be migrated (see comments 309 * before apic_intr_redistribute(). 310 */ 311 312 /* 313 * Following 3 variables start as % and can be patched or set using an 314 * API to be defined in future. They will be scaled to 315 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 316 * mode), or 101 in one-shot mode to stagger it away from one sec processing 317 */ 318 319 int apic_int_busy_mark = 60; 320 int apic_int_free_mark = 20; 321 int apic_diff_for_redistribution = 10; 322 323 /* sampling interval for interrupt redistribution for dynamic migration */ 324 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 325 326 /* 327 * number of times we sample before deciding to redistribute interrupts 328 * for dynamic migration 329 */ 330 int apic_sample_factor_redistribution = 101; 331 332 /* timeout for xlate_vector, mark_vector */ 333 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 334 335 int apic_redist_cpu_skip = 0; 336 int apic_num_imbalance = 0; 337 int apic_num_rebind = 0; 338 339 int apic_nproc = 0; 340 int apic_defconf = 0; 341 int apic_irq_translate = 0; 342 int apic_spec_rev = 0; 343 int apic_imcrp = 0; 344 345 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 346 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 347 348 /* 349 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 350 * will be assigned (via _SRS). If it is not set, use the current 351 * irq setting (via _CRS), but only if that irq is in the set of possible 352 * irqs (returned by _PRS) for the device. 353 */ 354 int apic_unconditional_srs = 1; 355 356 /* 357 * For interrupt link devices, if apic_prefer_crs is set when we are 358 * assigning an IRQ resource to a device, prefer the current IRQ setting 359 * over other possible irq settings under same conditions. 360 */ 361 362 int apic_prefer_crs = 1; 363 364 365 /* minimum number of timer ticks to program to */ 366 int apic_min_timer_ticks = 1; 367 /* 368 * Local static data 369 */ 370 static struct psm_ops apic_ops = { 371 apic_probe, 372 373 apic_init, 374 apic_picinit, 375 apic_intr_enter, 376 apic_intr_exit, 377 apic_setspl, 378 apic_addspl, 379 apic_delspl, 380 apic_disable_intr, 381 apic_enable_intr, 382 apic_softlvl_to_irq, 383 apic_set_softintr, 384 385 apic_set_idlecpu, 386 apic_unset_idlecpu, 387 388 apic_clkinit, 389 apic_getclkirq, 390 (void (*)(void))NULL, /* psm_hrtimeinit */ 391 apic_gethrtime, 392 393 apic_get_next_processorid, 394 apic_cpu_start, 395 apic_post_cpu_start, 396 apic_shutdown, 397 apic_get_ipivect, 398 apic_send_ipi, 399 400 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 401 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 402 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 403 (void (*)(int, char *))NULL, /* psm_notify_error */ 404 (void (*)(int))NULL, /* psm_notify_func */ 405 apic_timer_reprogram, 406 apic_timer_enable, 407 apic_timer_disable, 408 apic_post_cyclic_setup, 409 apic_preshutdown, 410 apic_intr_ops /* Advanced DDI Interrupt framework */ 411 }; 412 413 414 static struct psm_info apic_psm_info = { 415 PSM_INFO_VER01_5, /* version */ 416 PSM_OWN_EXCLUSIVE, /* ownership */ 417 (struct psm_ops *)&apic_ops, /* operation */ 418 "pcplusmp", /* machine name */ 419 "pcplusmp v1.4 compatible %I%", 420 }; 421 422 static void *apic_hdlp; 423 424 #ifdef DEBUG 425 #define DENT 0x0001 426 int apic_debug = 0; 427 /* 428 * set apic_restrict_vector to the # of vectors we want to allow per range 429 * useful in testing shared interrupt logic by setting it to 2 or 3 430 */ 431 int apic_restrict_vector = 0; 432 433 #define APIC_DEBUG_MSGBUFSIZE 2048 434 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 435 int apic_debug_msgbufindex = 0; 436 437 /* 438 * Put "int" info into debug buffer. No MP consistency, but light weight. 439 * Good enough for most debugging. 440 */ 441 #define APIC_DEBUG_BUF_PUT(x) \ 442 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 443 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 444 apic_debug_msgbufindex = 0; 445 446 #endif /* DEBUG */ 447 448 apic_cpus_info_t *apic_cpus; 449 450 static uint_t apic_cpumask = 0; 451 static uint_t apic_flag; 452 453 /* Flag to indicate that we need to shut down all processors */ 454 static uint_t apic_shutdown_processors; 455 456 uint_t apic_nsec_per_intr = 0; 457 458 /* 459 * apic_let_idle_redistribute can have the following values: 460 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 461 * apic_redistribute_lock prevents multiple idle cpus from redistributing 462 */ 463 int apic_num_idle_redistributions = 0; 464 static int apic_let_idle_redistribute = 0; 465 static uint_t apic_nticks = 0; 466 static uint_t apic_skipped_redistribute = 0; 467 468 /* to gather intr data and redistribute */ 469 static void apic_redistribute_compute(void); 470 471 static uint_t last_count_read = 0; 472 static lock_t apic_gethrtime_lock; 473 volatile int apic_hrtime_stamp = 0; 474 volatile hrtime_t apic_nsec_since_boot = 0; 475 static uint_t apic_hertz_count, apic_nsec_per_tick; 476 static hrtime_t apic_nsec_max; 477 478 static hrtime_t apic_last_hrtime = 0; 479 int apic_hrtime_error = 0; 480 int apic_remote_hrterr = 0; 481 int apic_num_nmis = 0; 482 int apic_apic_error = 0; 483 int apic_num_apic_errors = 0; 484 int apic_num_cksum_errors = 0; 485 486 static uchar_t apic_io_id[MAX_IO_APIC]; 487 static uchar_t apic_io_ver[MAX_IO_APIC]; 488 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 489 static uchar_t apic_io_vectend[MAX_IO_APIC]; 490 volatile int32_t *apicioadr[MAX_IO_APIC]; 491 492 /* 493 * First available slot to be used as IRQ index into the apic_irq_table 494 * for those interrupts (like MSI/X) that don't have a physical IRQ. 495 */ 496 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 497 498 /* 499 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 500 * and bound elements of cpus_info and the temp_cpu element of irq_struct 501 */ 502 lock_t apic_ioapic_lock; 503 504 /* 505 * apic_ioapic_reprogram_lock prevents a CPU from exiting 506 * apic_intr_exit before IOAPIC reprogramming information 507 * is collected. 508 */ 509 static lock_t apic_ioapic_reprogram_lock; 510 static int apic_io_max = 0; /* no. of i/o apics enabled */ 511 512 static struct apic_io_intr *apic_io_intrp = 0; 513 static struct apic_bus *apic_busp; 514 515 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 516 static uchar_t apic_resv_vector[MAXIPL+1]; 517 518 static char apic_level_intr[APIC_MAX_VECTOR+1]; 519 static int apic_error = 0; 520 /* values which apic_error can take. Not catastrophic, but may help debug */ 521 #define APIC_ERR_BOOT_EOI 0x1 522 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 523 #define APIC_ERR_INVALID_INDEX 0x4 524 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 525 #define APIC_ERR_APIC_ERROR 0x40000000 526 #define APIC_ERR_NMI 0x80000000 527 528 static int apic_cmos_ssb_set = 0; 529 530 static uint32_t eisa_level_intr_mask = 0; 531 /* At least MSB will be set if EISA bus */ 532 533 static int apic_pci_bus_total = 0; 534 static uchar_t apic_single_pci_busid = 0; 535 536 537 /* 538 * airq_mutex protects additions to the apic_irq_table - the first 539 * pointer and any airq_nexts off of that one. It also protects 540 * apic_max_device_irq & apic_min_device_irq. It also guarantees 541 * that share_id is unique as new ids are generated only when new 542 * irq_t structs are linked in. Once linked in the structs are never 543 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 544 * or allocated. Note that there is a slight gap between allocating in 545 * apic_introp_xlate and programming in addspl. 546 */ 547 kmutex_t airq_mutex; 548 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 549 int apic_max_device_irq = 0; 550 int apic_min_device_irq = APIC_MAX_VECTOR; 551 552 /* use to make sure only one cpu handles the nmi */ 553 static lock_t apic_nmi_lock; 554 /* use to make sure only one cpu handles the error interrupt */ 555 static lock_t apic_error_lock; 556 557 /* 558 * Following declarations are for revectoring; used when ISRs at different 559 * IPLs share an irq. 560 */ 561 static lock_t apic_revector_lock; 562 static int apic_revector_pending = 0; 563 static uchar_t *apic_oldvec_to_newvec; 564 static uchar_t *apic_newvec_to_oldvec; 565 566 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */ 567 static kmutex_t apic_reprogram_timeout_mutex; 568 569 static struct ioapic_reprogram_data { 570 int valid; /* This entry is valid */ 571 int bindcpu; /* The CPU to which the int will be bound */ 572 unsigned timeouts; /* # times the reprogram timeout was called */ 573 } apic_reprogram_info[APIC_MAX_VECTOR+1]; 574 /* 575 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 576 * is indexed by IRQ number, NOT by vector number. 577 */ 578 579 580 /* 581 * The following added to identify a software poweroff method if available. 582 */ 583 584 static struct { 585 int poweroff_method; 586 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 587 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 588 } apic_mps_ids[] = { 589 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 590 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 591 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 592 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 593 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 594 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 595 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 596 }; 597 598 int apic_poweroff_method = APIC_POWEROFF_NONE; 599 600 static struct { 601 uchar_t cntl; 602 uchar_t data; 603 } aspen_bmc[] = { 604 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 605 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 606 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 607 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 608 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 609 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 610 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 611 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 612 613 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 614 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 615 }; 616 617 static struct { 618 int port; 619 uchar_t data; 620 } sitka_bmc[] = { 621 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 622 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 623 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 624 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 625 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 626 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 627 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 628 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 629 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 630 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 631 632 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 633 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 634 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 635 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 636 }; 637 638 639 /* Patchable global variables. */ 640 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 641 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 642 643 /* 644 * ACPI definitions 645 */ 646 /* _PIC method arguments */ 647 #define ACPI_PIC_MODE 0 648 #define ACPI_APIC_MODE 1 649 650 /* APIC error flags we care about */ 651 #define APIC_SEND_CS_ERROR 0x01 652 #define APIC_RECV_CS_ERROR 0x02 653 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 654 655 /* 656 * ACPI variables 657 */ 658 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 659 static int apic_enable_acpi = 0; 660 661 /* ACPI Multiple APIC Description Table ptr */ 662 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 663 664 /* ACPI Interrupt Source Override Structure ptr */ 665 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 666 static int acpi_iso_cnt = 0; 667 668 /* ACPI Non-maskable Interrupt Sources ptr */ 669 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 670 static int acpi_nmi_scnt = 0; 671 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 672 static int acpi_nmi_ccnt = 0; 673 674 /* 675 * extern declarations 676 */ 677 extern int intr_clear(void); 678 extern void intr_restore(uint_t); 679 #if defined(__amd64) 680 extern int intpri_use_cr8; 681 #endif /* __amd64 */ 682 683 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 684 int, int, int); 685 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 686 687 /* 688 * This is the loadable module wrapper 689 */ 690 691 int 692 _init(void) 693 { 694 if (apic_coarse_hrtime) 695 apic_ops.psm_gethrtime = &apic_gettime; 696 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 697 } 698 699 int 700 _fini(void) 701 { 702 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 703 } 704 705 int 706 _info(struct modinfo *modinfop) 707 { 708 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 709 } 710 711 /* 712 * Auto-configuration routines 713 */ 714 715 /* 716 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 717 * May work with 1.1 - but not guaranteed. 718 * According to the MP Spec, the MP floating pointer structure 719 * will be searched in the order described below: 720 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 721 * 2. Within the last kilobyte of system base memory 722 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 723 * Once we find the right signature with proper checksum, we call 724 * either handle_defconf or parse_mpct to get all info necessary for 725 * subsequent operations. 726 */ 727 static int 728 apic_probe() 729 { 730 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 731 caddr_t biosdatap; 732 caddr_t mpct; 733 caddr_t fptr; 734 int i, mpct_size, mapsize, retval = PSM_FAILURE; 735 ushort_t ebda_seg, base_mem_size; 736 struct apic_mpfps_hdr *fpsp; 737 struct apic_mp_cnf_hdr *hdrp; 738 int bypass_cpu_and_ioapics_in_mptables; 739 int acpi_user_options; 740 741 if (apic_forceload < 0) 742 return (retval); 743 744 /* Allow override for MADT-only mode */ 745 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 746 "acpi-user-options", 0); 747 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 748 749 /* Allow apic_use_acpi to override MADT-only mode */ 750 if (!apic_use_acpi) 751 apic_use_acpi_madt_only = 0; 752 753 retval = acpi_probe(); 754 755 /* 756 * mapin the bios data area 40:0 757 * 40:13h - two-byte location reports the base memory size 758 * 40:0Eh - two-byte location for the exact starting address of 759 * the EBDA segment for EISA 760 */ 761 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 762 if (!biosdatap) 763 return (retval); 764 fpsp = (struct apic_mpfps_hdr *)NULL; 765 mapsize = MPFPS_RAM_WIN_LEN; 766 /*LINTED: pointer cast may result in improper alignment */ 767 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 768 /* check the 1k of EBDA */ 769 if (ebda_seg) { 770 ebda_start = ((uint32_t)ebda_seg) << 4; 771 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 772 if (fptr) { 773 if (!(fpsp = 774 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 775 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 776 } 777 } 778 /* If not in EBDA, check the last k of system base memory */ 779 if (!fpsp) { 780 /*LINTED: pointer cast may result in improper alignment */ 781 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 782 783 if (base_mem_size > 512) 784 base_mem_end = 639 * 1024; 785 else 786 base_mem_end = 511 * 1024; 787 /* if ebda == last k of base mem, skip to check BIOS ROM */ 788 if (base_mem_end != ebda_start) { 789 790 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 791 PROT_READ); 792 793 if (fptr) { 794 if (!(fpsp = apic_find_fps_sig(fptr, 795 MPFPS_RAM_WIN_LEN))) 796 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 797 } 798 } 799 } 800 psm_unmap_phys(biosdatap, 0x20); 801 802 /* If still cannot find it, check the BIOS ROM space */ 803 if (!fpsp) { 804 mapsize = MPFPS_ROM_WIN_LEN; 805 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 806 MPFPS_ROM_WIN_LEN, PROT_READ); 807 if (fptr) { 808 if (!(fpsp = 809 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 810 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 811 return (retval); 812 } 813 } 814 } 815 816 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 817 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 818 return (retval); 819 } 820 821 apic_spec_rev = fpsp->mpfps_spec_rev; 822 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 823 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 824 return (retval); 825 } 826 827 /* check IMCR is present or not */ 828 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 829 830 /* check default configuration (dual CPUs) */ 831 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 832 psm_unmap_phys(fptr, mapsize); 833 return (apic_handle_defconf()); 834 } 835 836 /* MP Configuration Table */ 837 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 838 839 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 840 841 /* 842 * Map in enough memory for the MP Configuration Table Header. 843 * Use this table to read the total length of the BIOS data and 844 * map in all the info 845 */ 846 /*LINTED: pointer cast may result in improper alignment */ 847 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 848 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 849 if (!hdrp) 850 return (retval); 851 852 /* check mp configuration table signature PCMP */ 853 if (hdrp->mpcnf_sig != 0x504d4350) { 854 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 855 return (retval); 856 } 857 mpct_size = (int)hdrp->mpcnf_tbl_length; 858 859 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 860 861 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 862 863 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 864 /* This is an ACPI machine No need for further checks */ 865 return (retval); 866 } 867 868 /* 869 * Map in the entries for this machine, ie. Processor 870 * Entry Tables, Bus Entry Tables, etc. 871 * They are in fixed order following one another 872 */ 873 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 874 if (!mpct) 875 return (retval); 876 877 if (apic_checksum(mpct, mpct_size) != 0) 878 goto apic_fail1; 879 880 881 /*LINTED: pointer cast may result in improper alignment */ 882 hdrp = (struct apic_mp_cnf_hdr *)mpct; 883 /*LINTED: pointer cast may result in improper alignment */ 884 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 885 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 886 if (!apicadr) 887 goto apic_fail1; 888 889 /* Parse all information in the tables */ 890 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 891 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 892 PSM_SUCCESS) 893 return (PSM_SUCCESS); 894 895 for (i = 0; i < apic_io_max; i++) 896 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 897 if (apic_cpus) 898 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 899 if (apicadr) 900 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 901 apic_fail1: 902 psm_unmap_phys(mpct, mpct_size); 903 return (retval); 904 } 905 906 static void 907 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 908 { 909 int i; 910 911 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 912 i++) { 913 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 914 strlen(apic_mps_ids[i].oem_id)) == 0) && 915 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 916 strlen(apic_mps_ids[i].prod_id)) == 0)) { 917 918 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 919 break; 920 } 921 } 922 923 if (apic_debug_mps_id != 0) { 924 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 925 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 926 hdrp->mpcnf_oem_str[0], 927 hdrp->mpcnf_oem_str[1], 928 hdrp->mpcnf_oem_str[2], 929 hdrp->mpcnf_oem_str[3], 930 hdrp->mpcnf_oem_str[4], 931 hdrp->mpcnf_oem_str[5], 932 hdrp->mpcnf_oem_str[6], 933 hdrp->mpcnf_oem_str[7], 934 hdrp->mpcnf_prod_str[0], 935 hdrp->mpcnf_prod_str[1], 936 hdrp->mpcnf_prod_str[2], 937 hdrp->mpcnf_prod_str[3], 938 hdrp->mpcnf_prod_str[4], 939 hdrp->mpcnf_prod_str[5], 940 hdrp->mpcnf_prod_str[6], 941 hdrp->mpcnf_prod_str[7], 942 hdrp->mpcnf_prod_str[8], 943 hdrp->mpcnf_prod_str[9], 944 hdrp->mpcnf_prod_str[10], 945 hdrp->mpcnf_prod_str[11]); 946 } 947 } 948 949 static int 950 acpi_probe(void) 951 { 952 int i, id, intmax, ver, index, rv; 953 int acpi_verboseflags = 0; 954 int madt_seen, madt_size; 955 APIC_HEADER *ap; 956 MADT_PROCESSOR_APIC *mpa; 957 MADT_IO_APIC *mia; 958 MADT_IO_SAPIC *misa; 959 MADT_INTERRUPT_OVERRIDE *mio; 960 MADT_NMI_SOURCE *mns; 961 MADT_INTERRUPT_SOURCE *mis; 962 MADT_LOCAL_APIC_NMI *mlan; 963 MADT_ADDRESS_OVERRIDE *mao; 964 ACPI_OBJECT_LIST arglist; 965 ACPI_OBJECT arg; 966 int sci; 967 iflag_t sci_flags; 968 volatile int32_t *ioapic; 969 char local_ids[NCPU]; 970 char proc_ids[NCPU]; 971 uchar_t hid; 972 973 if (!apic_use_acpi) 974 return (PSM_FAILURE); 975 976 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 977 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 978 return (PSM_FAILURE); 979 980 apicadr = (uint32_t *)psm_map_phys( 981 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 982 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 983 if (!apicadr) 984 return (PSM_FAILURE); 985 986 id = apicadr[APIC_LID_REG]; 987 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 988 apic_nproc = index = 1; 989 apic_io_max = 0; 990 991 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 992 madt_size = acpi_mapic_dtp->Length; 993 madt_seen = sizeof (*acpi_mapic_dtp); 994 995 while (madt_seen < madt_size) { 996 switch (ap->Type) { 997 case APIC_PROCESSOR: 998 mpa = (MADT_PROCESSOR_APIC *) ap; 999 if (mpa->ProcessorEnabled) { 1000 if (mpa->LocalApicId == local_ids[0]) 1001 proc_ids[0] = mpa->ProcessorId; 1002 else if (apic_nproc < NCPU) { 1003 local_ids[index] = mpa->LocalApicId; 1004 proc_ids[index] = mpa->ProcessorId; 1005 index++; 1006 apic_nproc++; 1007 } else 1008 cmn_err(CE_WARN, "pcplusmp: exceeded " 1009 "maximum no. of CPUs (= %d)", NCPU); 1010 } 1011 break; 1012 1013 case APIC_IO: 1014 mia = (MADT_IO_APIC *) ap; 1015 if (apic_io_max < MAX_IO_APIC) { 1016 apic_io_id[apic_io_max] = mia->IoApicId; 1017 apic_io_vectbase[apic_io_max] = 1018 mia->Interrupt; 1019 ioapic = apicioadr[apic_io_max] = 1020 (int32_t *)psm_map_phys( 1021 (uint32_t)mia->Address, 1022 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1023 if (!ioapic) 1024 goto cleanup; 1025 apic_io_max++; 1026 } 1027 break; 1028 1029 case APIC_XRUPT_OVERRIDE: 1030 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1031 if (acpi_isop == NULL) 1032 acpi_isop = mio; 1033 acpi_iso_cnt++; 1034 break; 1035 1036 case APIC_NMI: 1037 /* UNIMPLEMENTED */ 1038 mns = (MADT_NMI_SOURCE *) ap; 1039 if (acpi_nmi_sp == NULL) 1040 acpi_nmi_sp = mns; 1041 acpi_nmi_scnt++; 1042 1043 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1044 mns->Interrupt, mns->Polarity, 1045 mns->TriggerMode); 1046 break; 1047 1048 case APIC_LOCAL_NMI: 1049 /* UNIMPLEMENTED */ 1050 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1051 if (acpi_nmi_cp == NULL) 1052 acpi_nmi_cp = mlan; 1053 acpi_nmi_ccnt++; 1054 1055 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1056 mlan->ProcessorId, mlan->Polarity, 1057 mlan->TriggerMode, mlan->Lint); 1058 break; 1059 1060 case APIC_ADDRESS_OVERRIDE: 1061 /* UNIMPLEMENTED */ 1062 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1063 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1064 (long)mao->Address); 1065 break; 1066 1067 case APIC_IO_SAPIC: 1068 /* UNIMPLEMENTED */ 1069 misa = (MADT_IO_SAPIC *) ap; 1070 1071 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1072 misa->IoSapicId, misa->InterruptBase, 1073 (long)misa->Address); 1074 break; 1075 1076 case APIC_XRUPT_SOURCE: 1077 /* UNIMPLEMENTED */ 1078 mis = (MADT_INTERRUPT_SOURCE *) ap; 1079 1080 cmn_err(CE_NOTE, 1081 "!apic: irq source: %d %d %d %d %d %d %d\n", 1082 mis->ProcessorId, mis->ProcessorEid, 1083 mis->Interrupt, mis->Polarity, 1084 mis->TriggerMode, mis->InterruptType, 1085 mis->IoSapicVector); 1086 break; 1087 case APIC_RESERVED: 1088 default: 1089 goto cleanup; 1090 } 1091 1092 /* advance to next entry */ 1093 madt_seen += ap->Length; 1094 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1095 } 1096 1097 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1098 KM_NOSLEEP)) == NULL) 1099 goto cleanup; 1100 1101 apic_cpumask = (1 << apic_nproc) - 1; 1102 1103 /* 1104 * ACPI doesn't provide the local apic ver, get it directly from the 1105 * local apic 1106 */ 1107 ver = apicadr[APIC_VERS_REG]; 1108 for (i = 0; i < apic_nproc; i++) { 1109 apic_cpus[i].aci_local_id = local_ids[i]; 1110 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1111 } 1112 for (i = 0; i < apic_io_max; i++) { 1113 ioapic = apicioadr[i]; 1114 1115 /* 1116 * need to check Sitka on the following acpi problem 1117 * On the Sitka, the ioapic's apic_id field isn't reporting 1118 * the actual io apic id. We have reported this problem 1119 * to Intel. Until they fix the problem, we will get the 1120 * actual id directly from the ioapic. 1121 */ 1122 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1123 id = ioapic[APIC_IO_DATA]; 1124 hid = (uchar_t)(((uint_t)id) >> 24); 1125 1126 if (hid != apic_io_id[i]) { 1127 if (apic_io_id[i] == 0) 1128 apic_io_id[i] = hid; 1129 else { /* set ioapic id to whatever reported by ACPI */ 1130 id = ((int32_t)apic_io_id[i]) << 24; 1131 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1132 ioapic[APIC_IO_DATA] = id; 1133 } 1134 } 1135 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1136 ver = ioapic[APIC_IO_DATA]; 1137 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1138 intmax = (ver >> 16) & 0xff; 1139 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1140 if (apic_first_avail_irq <= apic_io_vectend[i]) 1141 apic_first_avail_irq = apic_io_vectend[i] + 1; 1142 } 1143 1144 1145 /* 1146 * Process SCI configuration here 1147 * An error may be returned here if 1148 * acpi-user-options specifies legacy mode 1149 * (no SCI, no ACPI mode) 1150 */ 1151 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1152 sci = -1; 1153 1154 /* 1155 * Now call acpi_init() to generate namespaces 1156 * If this fails, we don't attempt to use ACPI 1157 * even if we were able to get a MADT above 1158 */ 1159 if (acpica_init() != AE_OK) 1160 goto cleanup; 1161 1162 /* 1163 * Squirrel away the SCI and flags for later on 1164 * in apic_picinit() when we're ready 1165 */ 1166 apic_sci_vect = sci; 1167 apic_sci_flags = sci_flags; 1168 1169 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1170 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1171 1172 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1173 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1174 1175 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1176 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1177 1178 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1179 ACPI_PSM_FAILURE) 1180 goto cleanup; 1181 1182 /* Enable ACPI APIC interrupt routing */ 1183 arglist.Count = 1; 1184 arglist.Pointer = &arg; 1185 arg.Type = ACPI_TYPE_INTEGER; 1186 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1187 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1188 if (rv == AE_OK) { 1189 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1190 apic_enable_acpi = 1; 1191 if (apic_use_acpi_madt_only) { 1192 cmn_err(CE_CONT, 1193 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1194 } 1195 return (PSM_SUCCESS); 1196 } 1197 /* if setting APIC mode failed above, we fall through to cleanup */ 1198 1199 cleanup: 1200 if (apicadr != NULL) { 1201 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1202 apicadr = NULL; 1203 } 1204 apic_nproc = 0; 1205 for (i = 0; i < apic_io_max; i++) { 1206 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1207 apicioadr[i] = NULL; 1208 } 1209 apic_io_max = 0; 1210 acpi_isop = NULL; 1211 acpi_iso_cnt = 0; 1212 acpi_nmi_sp = NULL; 1213 acpi_nmi_scnt = 0; 1214 acpi_nmi_cp = NULL; 1215 acpi_nmi_ccnt = 0; 1216 return (PSM_FAILURE); 1217 } 1218 1219 /* 1220 * Handle default configuration. Fill in reqd global variables & tables 1221 * Fill all details as MP table does not give any more info 1222 */ 1223 static int 1224 apic_handle_defconf() 1225 { 1226 uint_t lid; 1227 1228 /*LINTED: pointer cast may result in improper alignment */ 1229 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1230 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1231 /*LINTED: pointer cast may result in improper alignment */ 1232 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1233 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1234 apic_cpus = (apic_cpus_info_t *) 1235 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1236 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1237 goto apic_handle_defconf_fail; 1238 apic_cpumask = 3; 1239 apic_nproc = 2; 1240 lid = apicadr[APIC_LID_REG]; 1241 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1242 /* 1243 * According to the PC+MP spec 1.1, the local ids 1244 * for the default configuration has to be 0 or 1 1245 */ 1246 if (apic_cpus[0].aci_local_id == 1) 1247 apic_cpus[1].aci_local_id = 0; 1248 else if (apic_cpus[0].aci_local_id == 0) 1249 apic_cpus[1].aci_local_id = 1; 1250 else 1251 goto apic_handle_defconf_fail; 1252 1253 apic_io_id[0] = 2; 1254 apic_io_max = 1; 1255 if (apic_defconf >= 5) { 1256 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1257 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1258 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1259 } else { 1260 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1261 apic_cpus[1].aci_local_ver = 0; 1262 apic_io_ver[0] = 0; 1263 } 1264 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1265 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1266 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1267 return (PSM_SUCCESS); 1268 1269 apic_handle_defconf_fail: 1270 if (apic_cpus) 1271 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1272 if (apicadr) 1273 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1274 if (apicioadr[0]) 1275 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1276 return (PSM_FAILURE); 1277 } 1278 1279 /* Parse the entries in MP configuration table and collect info that we need */ 1280 static int 1281 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1282 { 1283 struct apic_procent *procp; 1284 struct apic_bus *busp; 1285 struct apic_io_entry *ioapicp; 1286 struct apic_io_intr *intrp; 1287 volatile int32_t *ioapic; 1288 uint_t lid; 1289 int id; 1290 uchar_t hid; 1291 1292 /*LINTED: pointer cast may result in improper alignment */ 1293 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1294 1295 /* No need to count cpu entries if we won't use them */ 1296 if (!bypass_cpus_and_ioapics) { 1297 1298 /* Find max # of CPUS and allocate structure accordingly */ 1299 apic_nproc = 0; 1300 while (procp->proc_entry == APIC_CPU_ENTRY) { 1301 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1302 apic_nproc++; 1303 } 1304 procp++; 1305 } 1306 if (apic_nproc > NCPU) 1307 cmn_err(CE_WARN, "pcplusmp: exceeded " 1308 "maximum no. of CPUs (= %d)", NCPU); 1309 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1310 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1311 return (PSM_FAILURE); 1312 } 1313 1314 /*LINTED: pointer cast may result in improper alignment */ 1315 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1316 1317 /* 1318 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1319 * if we're bypassing this information, it has already been filled 1320 * in by acpi_probe(), so don't overwrite it. 1321 */ 1322 if (!bypass_cpus_and_ioapics) 1323 apic_nproc = 1; 1324 1325 while (procp->proc_entry == APIC_CPU_ENTRY) { 1326 /* check whether the cpu exists or not */ 1327 if (!bypass_cpus_and_ioapics && 1328 procp->proc_cpuflags & CPUFLAGS_EN) { 1329 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1330 lid = apicadr[APIC_LID_REG]; 1331 apic_cpus[0].aci_local_id = procp->proc_apicid; 1332 if (apic_cpus[0].aci_local_id != 1333 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1334 return (PSM_FAILURE); 1335 } 1336 apic_cpus[0].aci_local_ver = 1337 procp->proc_version; 1338 } else { 1339 1340 apic_cpus[apic_nproc].aci_local_id = 1341 procp->proc_apicid; 1342 apic_cpus[apic_nproc].aci_local_ver = 1343 procp->proc_version; 1344 apic_nproc++; 1345 1346 } 1347 } 1348 procp++; 1349 } 1350 1351 if (!bypass_cpus_and_ioapics) { 1352 /* convert the number of processors into a cpumask */ 1353 apic_cpumask = (1 << apic_nproc) - 1; 1354 } 1355 1356 /* 1357 * Save start of bus entries for later use. 1358 * Get EISA level cntrl if EISA bus is present. 1359 * Also get the CPI bus id for single CPI bus case 1360 */ 1361 apic_busp = busp = (struct apic_bus *)procp; 1362 while (busp->bus_entry == APIC_BUS_ENTRY) { 1363 lid = apic_find_bus_type((char *)&busp->bus_str1); 1364 if (lid == BUS_EISA) { 1365 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1366 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1367 } else if (lid == BUS_PCI) { 1368 /* 1369 * apic_single_pci_busid will be used only if 1370 * apic_pic_bus_total is equal to 1 1371 */ 1372 apic_pci_bus_total++; 1373 apic_single_pci_busid = busp->bus_id; 1374 } 1375 busp++; 1376 } 1377 1378 ioapicp = (struct apic_io_entry *)busp; 1379 1380 if (!bypass_cpus_and_ioapics) 1381 apic_io_max = 0; 1382 do { 1383 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1384 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1385 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1386 apic_io_ver[apic_io_max] = ioapicp->io_version; 1387 /*LINTED: pointer cast may result in improper alignment */ 1388 apicioadr[apic_io_max] = 1389 (int32_t *)psm_map_phys( 1390 (uint32_t)ioapicp->io_apic_addr, 1391 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1392 1393 if (!apicioadr[apic_io_max]) 1394 return (PSM_FAILURE); 1395 1396 ioapic = apicioadr[apic_io_max]; 1397 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1398 id = ioapic[APIC_IO_DATA]; 1399 hid = (uchar_t)(((uint_t)id) >> 24); 1400 1401 if (hid != apic_io_id[apic_io_max]) { 1402 if (apic_io_id[apic_io_max] == 0) 1403 apic_io_id[apic_io_max] = hid; 1404 else { 1405 /* 1406 * set ioapic id to whatever 1407 * reported by MPS 1408 * 1409 * may not need to set index 1410 * again ??? 1411 * take it out and try 1412 */ 1413 1414 id = ((int32_t) 1415 apic_io_id[apic_io_max]) << 1416 24; 1417 1418 ioapic[APIC_IO_REG] = 1419 APIC_ID_CMD; 1420 1421 ioapic[APIC_IO_DATA] = id; 1422 1423 } 1424 } 1425 apic_io_max++; 1426 } 1427 } 1428 ioapicp++; 1429 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1430 1431 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1432 1433 intrp = apic_io_intrp; 1434 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1435 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1436 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1437 apic_irq_translate = 1; 1438 break; 1439 } 1440 intrp++; 1441 } 1442 1443 return (PSM_SUCCESS); 1444 } 1445 1446 boolean_t 1447 apic_cpu_in_range(int cpu) 1448 { 1449 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1450 } 1451 1452 static struct apic_mpfps_hdr * 1453 apic_find_fps_sig(caddr_t cptr, int len) 1454 { 1455 int i; 1456 1457 /* Look for the pattern "_MP_" */ 1458 for (i = 0; i < len; i += 16) { 1459 if ((*(cptr+i) == '_') && 1460 (*(cptr+i+1) == 'M') && 1461 (*(cptr+i+2) == 'P') && 1462 (*(cptr+i+3) == '_')) 1463 /*LINTED: pointer cast may result in improper alignment */ 1464 return ((struct apic_mpfps_hdr *)(cptr + i)); 1465 } 1466 return (NULL); 1467 } 1468 1469 static int 1470 apic_checksum(caddr_t bptr, int len) 1471 { 1472 int i; 1473 uchar_t cksum; 1474 1475 cksum = 0; 1476 for (i = 0; i < len; i++) 1477 cksum += *bptr++; 1478 return ((int)cksum); 1479 } 1480 1481 1482 /* 1483 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1484 * are also set to NULL. vector->irq is set to a value which cannot map 1485 * to a real irq to show that it is free. 1486 */ 1487 void 1488 apic_init() 1489 { 1490 int i; 1491 int *iptr; 1492 1493 int j = 1; 1494 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1495 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1496 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1497 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1498 /* get to highest vector at the same ipl */ 1499 continue; 1500 for (; j <= apic_vectortoipl[i]; j++) { 1501 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1502 APIC_BASE_VECT; 1503 } 1504 } 1505 for (; j < MAXIPL + 1; j++) 1506 /* fill up any empty ipltopri slots */ 1507 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1508 1509 /* cpu 0 is always up */ 1510 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1511 1512 iptr = (int *)&apic_irq_table[0]; 1513 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1514 apic_level_intr[i] = 0; 1515 *iptr++ = NULL; 1516 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1517 apic_reprogram_info[i].valid = 0; 1518 apic_reprogram_info[i].bindcpu = 0; 1519 apic_reprogram_info[i].timeouts = 0; 1520 } 1521 1522 /* 1523 * Allocate a dummy irq table entry for the reserved entry. 1524 * This takes care of the race between removing an irq and 1525 * clock detecting a CPU in that irq during interrupt load 1526 * sampling. 1527 */ 1528 apic_irq_table[APIC_RESV_IRQ] = 1529 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1530 1531 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1532 mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL); 1533 #if defined(__amd64) 1534 /* 1535 * Make cpu-specific interrupt info point to cr8pri vector 1536 */ 1537 for (i = 0; i <= MAXIPL; i++) 1538 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1539 CPU->cpu_pri_data = apic_cr8pri; 1540 intpri_use_cr8 = 1; 1541 #endif /* __amd64 */ 1542 } 1543 1544 /* 1545 * handler for APIC Error interrupt. Just print a warning and continue 1546 */ 1547 static int 1548 apic_error_intr() 1549 { 1550 uint_t error0, error1, error; 1551 uint_t i; 1552 1553 /* 1554 * We need to write before read as per 7.4.17 of system prog manual. 1555 * We do both and or the results to be safe 1556 */ 1557 error0 = apicadr[APIC_ERROR_STATUS]; 1558 apicadr[APIC_ERROR_STATUS] = 0; 1559 error1 = apicadr[APIC_ERROR_STATUS]; 1560 error = error0 | error1; 1561 1562 /* 1563 * Clear the APIC error status (do this on all cpus that enter here) 1564 * (two writes are required due to the semantics of accessing the 1565 * error status register.) 1566 */ 1567 apicadr[APIC_ERROR_STATUS] = 0; 1568 apicadr[APIC_ERROR_STATUS] = 0; 1569 1570 /* 1571 * Prevent more than 1 CPU from handling error interrupt causing 1572 * double printing (interleave of characters from multiple 1573 * CPU's when using prom_printf) 1574 */ 1575 if (lock_try(&apic_error_lock) == 0) 1576 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1577 if (error) { 1578 #if DEBUG 1579 if (apic_debug) 1580 debug_enter("pcplusmp: APIC Error interrupt received"); 1581 #endif /* DEBUG */ 1582 if (apic_panic_on_apic_error) 1583 cmn_err(CE_PANIC, 1584 "APIC Error interrupt on CPU %d. Status = %x\n", 1585 psm_get_cpu_id(), error); 1586 else { 1587 if ((error & ~APIC_CS_ERRORS) == 0) { 1588 /* cksum error only */ 1589 apic_error |= APIC_ERR_APIC_ERROR; 1590 apic_apic_error |= error; 1591 apic_num_apic_errors++; 1592 apic_num_cksum_errors++; 1593 } else { 1594 /* 1595 * prom_printf is the best shot we have of 1596 * something which is problem free from 1597 * high level/NMI type of interrupts 1598 */ 1599 prom_printf("APIC Error interrupt on CPU %d. " 1600 "Status 0 = %x, Status 1 = %x\n", 1601 psm_get_cpu_id(), error0, error1); 1602 apic_error |= APIC_ERR_APIC_ERROR; 1603 apic_apic_error |= error; 1604 apic_num_apic_errors++; 1605 for (i = 0; i < apic_error_display_delay; i++) { 1606 tenmicrosec(); 1607 } 1608 /* 1609 * provide more delay next time limited to 1610 * roughly 1 clock tick time 1611 */ 1612 if (apic_error_display_delay < 500) 1613 apic_error_display_delay *= 2; 1614 } 1615 } 1616 lock_clear(&apic_error_lock); 1617 return (DDI_INTR_CLAIMED); 1618 } else { 1619 lock_clear(&apic_error_lock); 1620 return (DDI_INTR_UNCLAIMED); 1621 } 1622 /* NOTREACHED */ 1623 } 1624 1625 /* 1626 * Turn off the mask bit in the performance counter Local Vector Table entry. 1627 */ 1628 static void 1629 apic_cpcovf_mask_clear(void) 1630 { 1631 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1632 } 1633 1634 static void 1635 apic_init_intr() 1636 { 1637 processorid_t cpun = psm_get_cpu_id(); 1638 1639 #if defined(__amd64) 1640 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1641 #else 1642 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1643 #endif 1644 1645 if (apic_flat_model) 1646 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1647 else 1648 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1649 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1650 1651 /* need to enable APIC before unmasking NMI */ 1652 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1653 1654 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1655 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1656 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1657 1658 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1659 return; 1660 1661 /* Enable performance counter overflow interrupt */ 1662 1663 if ((x86_feature & X86_MSR) != X86_MSR) 1664 apic_enable_cpcovf_intr = 0; 1665 if (apic_enable_cpcovf_intr) { 1666 if (apic_cpcovf_vect == 0) { 1667 int ipl = APIC_PCINT_IPL; 1668 int irq = apic_get_ipivect(ipl, -1); 1669 1670 ASSERT(irq != -1); 1671 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1672 ASSERT(apic_cpcovf_vect); 1673 (void) add_avintr(NULL, ipl, 1674 (avfunc)kcpc_hw_overflow_intr, 1675 "apic pcint", irq, NULL, NULL, NULL, NULL); 1676 kcpc_hw_overflow_intr_installed = 1; 1677 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1678 } 1679 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1680 } 1681 1682 /* Enable error interrupt */ 1683 1684 if (apic_enable_error_intr) { 1685 if (apic_errvect == 0) { 1686 int ipl = 0xf; /* get highest priority intr */ 1687 int irq = apic_get_ipivect(ipl, -1); 1688 1689 ASSERT(irq != -1); 1690 apic_errvect = apic_irq_table[irq]->airq_vector; 1691 ASSERT(apic_errvect); 1692 /* 1693 * Not PSMI compliant, but we are going to merge 1694 * with ON anyway 1695 */ 1696 (void) add_avintr((void *)NULL, ipl, 1697 (avfunc)apic_error_intr, "apic error intr", 1698 irq, NULL, NULL, NULL, NULL); 1699 } 1700 apicadr[APIC_ERR_VECT] = apic_errvect; 1701 apicadr[APIC_ERROR_STATUS] = 0; 1702 apicadr[APIC_ERROR_STATUS] = 0; 1703 } 1704 } 1705 1706 static void 1707 apic_disable_local_apic() 1708 { 1709 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1710 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1711 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1712 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1713 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1714 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1715 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1716 } 1717 1718 static void 1719 apic_picinit(void) 1720 { 1721 int i, j; 1722 uint_t isr; 1723 volatile int32_t *ioapic; 1724 apic_irq_t *irqptr; 1725 struct intrspec ispec; 1726 1727 /* 1728 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1729 * bit on without clearing it with EOI. Since softint 1730 * uses vector 0x20 to interrupt itself, so softint will 1731 * not work on this machine. In order to fix this problem 1732 * a check is made to verify all the isr bits are clear. 1733 * If not, EOIs are issued to clear the bits. 1734 */ 1735 for (i = 7; i >= 1; i--) { 1736 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1737 for (j = 0; ((j < 32) && (isr != 0)); j++) 1738 if (isr & (1 << j)) { 1739 apicadr[APIC_EOI_REG] = 0; 1740 isr &= ~(1 << j); 1741 apic_error |= APIC_ERR_BOOT_EOI; 1742 } 1743 } 1744 1745 /* set a flag so we know we have run apic_picinit() */ 1746 apic_flag = 1; 1747 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1748 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1749 LOCK_INIT_CLEAR(&apic_revector_lock); 1750 LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock); 1751 LOCK_INIT_CLEAR(&apic_error_lock); 1752 1753 picsetup(); /* initialise the 8259 */ 1754 1755 /* add nmi handler - least priority nmi handler */ 1756 LOCK_INIT_CLEAR(&apic_nmi_lock); 1757 1758 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1759 "pcplusmp NMI handler", (caddr_t)NULL)) 1760 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1761 1762 apic_init_intr(); 1763 1764 /* enable apic mode if imcr present */ 1765 if (apic_imcrp) { 1766 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1767 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1768 } 1769 1770 /* mask interrupt vectors */ 1771 for (j = 0; j < apic_io_max; j++) { 1772 int intin_max; 1773 ioapic = apicioadr[j]; 1774 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1775 /* Bits 23-16 define the maximum redirection entries */ 1776 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1777 for (i = 0; i < intin_max; i++) { 1778 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1779 ioapic[APIC_IO_DATA] = AV_MASK; 1780 } 1781 } 1782 1783 /* 1784 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1785 */ 1786 if (apic_sci_vect > 0) { 1787 /* 1788 * acpica has already done add_avintr(); we just 1789 * to finish the job by mimicing translate_irq() 1790 * 1791 * Fake up an intrspec and setup the tables 1792 */ 1793 ispec.intrspec_vec = apic_sci_vect; 1794 ispec.intrspec_pri = SCI_IPL; 1795 1796 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1797 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1798 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1799 return; 1800 } 1801 irqptr = apic_irq_table[apic_sci_vect]; 1802 1803 /* Program I/O APIC */ 1804 (void) apic_setup_io_intr(irqptr, apic_sci_vect); 1805 1806 irqptr->airq_share++; 1807 } 1808 } 1809 1810 1811 static void 1812 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1813 { 1814 int loop_count; 1815 uint32_t vector; 1816 uint_t cpu_id, iflag; 1817 1818 cpu_id = apic_cpus[cpun].aci_local_id; 1819 1820 apic_cmos_ssb_set = 1; 1821 1822 /* 1823 * Interrupts on BSP cpu will be disabled during these startup 1824 * steps in order to avoid unwanted side effects from 1825 * executing interrupt handlers on a problematic BIOS. 1826 */ 1827 1828 iflag = intr_clear(); 1829 outb(CMOS_ADDR, SSB); 1830 outb(CMOS_DATA, BIOS_SHUTDOWN); 1831 1832 while (get_apic_cmd1() & AV_PENDING) 1833 apic_ret(); 1834 1835 /* for integrated - make sure there is one INIT IPI in buffer */ 1836 /* for external - it will wake up the cpu */ 1837 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1838 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1839 1840 /* If only 1 CPU is installed, PENDING bit will not go low */ 1841 for (loop_count = 0x1000; loop_count; loop_count--) 1842 if (get_apic_cmd1() & AV_PENDING) 1843 apic_ret(); 1844 else 1845 break; 1846 1847 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1848 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1849 1850 drv_usecwait(20000); /* 20 milli sec */ 1851 1852 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1853 /* integrated apic */ 1854 1855 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1856 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1857 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1858 1859 /* to offset the INIT IPI queue up in the buffer */ 1860 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1861 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1862 1863 drv_usecwait(200); /* 20 micro sec */ 1864 1865 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1866 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1867 1868 drv_usecwait(200); /* 20 micro sec */ 1869 } 1870 intr_restore(iflag); 1871 } 1872 1873 1874 #ifdef DEBUG 1875 int apic_break_on_cpu = 9; 1876 int apic_stretch_interrupts = 0; 1877 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1878 1879 void 1880 apic_break() 1881 { 1882 } 1883 #endif /* DEBUG */ 1884 1885 /* 1886 * platform_intr_enter 1887 * 1888 * Called at the beginning of the interrupt service routine to 1889 * mask all level equal to and below the interrupt priority 1890 * of the interrupting vector. An EOI should be given to 1891 * the interrupt controller to enable other HW interrupts. 1892 * 1893 * Return -1 for spurious interrupts 1894 * 1895 */ 1896 /*ARGSUSED*/ 1897 static int 1898 apic_intr_enter(int ipl, int *vectorp) 1899 { 1900 uchar_t vector; 1901 int nipl; 1902 int irq, iflag; 1903 apic_cpus_info_t *cpu_infop; 1904 1905 /* 1906 * The real vector programmed in APIC is *vectorp + 0x20 1907 * But, cmnint code subtracts 0x20 before pushing it. 1908 * Hence APIC_BASE_VECT is 0x20. 1909 */ 1910 1911 vector = (uchar_t)*vectorp; 1912 1913 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1914 if (vector == apic_clkvect) { 1915 if (!apic_oneshot) { 1916 /* NOTE: this is not MT aware */ 1917 apic_hrtime_stamp++; 1918 apic_nsec_since_boot += apic_nsec_per_intr; 1919 apic_hrtime_stamp++; 1920 last_count_read = apic_hertz_count; 1921 apic_redistribute_compute(); 1922 } 1923 1924 /* We will avoid all the book keeping overhead for clock */ 1925 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1926 #if defined(__amd64) 1927 setcr8((ulong_t)apic_cr8pri[nipl]); 1928 #else 1929 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1930 #endif 1931 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1932 apicadr[APIC_EOI_REG] = 0; 1933 return (nipl); 1934 } 1935 1936 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1937 1938 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1939 cpu_infop->aci_spur_cnt++; 1940 return (APIC_INT_SPURIOUS); 1941 } 1942 1943 /* Check if the vector we got is really what we need */ 1944 if (apic_revector_pending) { 1945 /* 1946 * Disable interrupts for the duration of 1947 * the vector translation to prevent a self-race for 1948 * the apic_revector_lock. This cannot be done 1949 * in apic_xlate_vector because it is recursive and 1950 * we want the vector translation to be atomic with 1951 * respect to other (higher-priority) interrupts. 1952 */ 1953 iflag = intr_clear(); 1954 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1955 APIC_BASE_VECT; 1956 intr_restore(iflag); 1957 } 1958 1959 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1960 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1961 1962 #if defined(__amd64) 1963 setcr8((ulong_t)apic_cr8pri[nipl]); 1964 #else 1965 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1966 #endif 1967 1968 cpu_infop->aci_current[nipl] = (uchar_t)irq; 1969 cpu_infop->aci_curipl = (uchar_t)nipl; 1970 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 1971 1972 /* 1973 * apic_level_intr could have been assimilated into the irq struct. 1974 * but, having it as a character array is more efficient in terms of 1975 * cache usage. So, we leave it as is. 1976 */ 1977 if (!apic_level_intr[irq]) 1978 apicadr[APIC_EOI_REG] = 0; 1979 1980 #ifdef DEBUG 1981 APIC_DEBUG_BUF_PUT(vector); 1982 APIC_DEBUG_BUF_PUT(irq); 1983 APIC_DEBUG_BUF_PUT(nipl); 1984 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 1985 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 1986 drv_usecwait(apic_stretch_interrupts); 1987 1988 if (apic_break_on_cpu == psm_get_cpu_id()) 1989 apic_break(); 1990 #endif /* DEBUG */ 1991 return (nipl); 1992 } 1993 1994 static void 1995 apic_intr_exit(int prev_ipl, int irq) 1996 { 1997 apic_cpus_info_t *cpu_infop; 1998 1999 #if defined(__amd64) 2000 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 2001 #else 2002 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 2003 #endif 2004 2005 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 2006 if (apic_level_intr[irq]) 2007 apicadr[APIC_EOI_REG] = 0; 2008 2009 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 2010 /* ISR above current pri could not be in progress */ 2011 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 2012 } 2013 2014 /* 2015 * Mask all interrupts below or equal to the given IPL 2016 */ 2017 static void 2018 apic_setspl(int ipl) 2019 { 2020 2021 #if defined(__amd64) 2022 setcr8((ulong_t)apic_cr8pri[ipl]); 2023 #else 2024 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 2025 #endif 2026 2027 /* interrupts at ipl above this cannot be in progress */ 2028 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2029 /* 2030 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2031 * have enough time to come in before the priority is raised again 2032 * during the idle() loop. 2033 */ 2034 if (apic_setspl_delay) 2035 (void) get_apic_pri(); 2036 } 2037 2038 /* 2039 * trigger a software interrupt at the given IPL 2040 */ 2041 static void 2042 apic_set_softintr(int ipl) 2043 { 2044 int vector; 2045 uint_t flag; 2046 2047 vector = apic_resv_vector[ipl]; 2048 2049 flag = intr_clear(); 2050 2051 while (get_apic_cmd1() & AV_PENDING) 2052 apic_ret(); 2053 2054 /* generate interrupt at vector on itself only */ 2055 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2056 2057 intr_restore(flag); 2058 } 2059 2060 /* 2061 * generates an interprocessor interrupt to another CPU 2062 */ 2063 static void 2064 apic_send_ipi(int cpun, int ipl) 2065 { 2066 int vector; 2067 uint_t flag; 2068 2069 vector = apic_resv_vector[ipl]; 2070 2071 flag = intr_clear(); 2072 2073 while (get_apic_cmd1() & AV_PENDING) 2074 apic_ret(); 2075 2076 apicadr[APIC_INT_CMD2] = 2077 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2078 apicadr[APIC_INT_CMD1] = vector; 2079 2080 intr_restore(flag); 2081 } 2082 2083 2084 /*ARGSUSED*/ 2085 static void 2086 apic_set_idlecpu(processorid_t cpun) 2087 { 2088 } 2089 2090 /*ARGSUSED*/ 2091 static void 2092 apic_unset_idlecpu(processorid_t cpun) 2093 { 2094 } 2095 2096 2097 static void 2098 apic_ret() 2099 { 2100 } 2101 2102 static int 2103 get_apic_cmd1() 2104 { 2105 return (apicadr[APIC_INT_CMD1]); 2106 } 2107 2108 static int 2109 get_apic_pri() 2110 { 2111 #if defined(__amd64) 2112 return ((int)getcr8()); 2113 #else 2114 return (apicadr[APIC_TASK_REG]); 2115 #endif 2116 } 2117 2118 /* 2119 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2120 * apic_gethrtime(). This is used for performance instead of accuracy. 2121 */ 2122 2123 static hrtime_t 2124 apic_gettime() 2125 { 2126 int old_hrtime_stamp; 2127 hrtime_t temp; 2128 2129 /* 2130 * In one-shot mode, we do not keep time, so if anyone 2131 * calls psm_gettime() directly, we vector over to 2132 * gethrtime(). 2133 * one-shot mode MUST NOT be enabled if this psm is the source of 2134 * hrtime. 2135 */ 2136 2137 if (apic_oneshot) 2138 return (gethrtime()); 2139 2140 2141 gettime_again: 2142 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2143 apic_ret(); 2144 2145 temp = apic_nsec_since_boot; 2146 2147 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2148 goto gettime_again; 2149 } 2150 return (temp); 2151 } 2152 2153 /* 2154 * Here we return the number of nanoseconds since booting. Note every 2155 * clock interrupt increments apic_nsec_since_boot by the appropriate 2156 * amount. 2157 */ 2158 static hrtime_t 2159 apic_gethrtime() 2160 { 2161 int curr_timeval, countval, elapsed_ticks, oflags; 2162 int old_hrtime_stamp, status; 2163 hrtime_t temp; 2164 uchar_t cpun; 2165 2166 2167 /* 2168 * In one-shot mode, we do not keep time, so if anyone 2169 * calls psm_gethrtime() directly, we vector over to 2170 * gethrtime(). 2171 * one-shot mode MUST NOT be enabled if this psm is the source of 2172 * hrtime. 2173 */ 2174 2175 if (apic_oneshot) 2176 return (gethrtime()); 2177 2178 oflags = intr_clear(); /* prevent migration */ 2179 2180 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2181 2182 lock_set(&apic_gethrtime_lock); 2183 2184 gethrtime_again: 2185 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2186 apic_ret(); 2187 2188 /* 2189 * Check to see which CPU we are on. Note the time is kept on 2190 * the local APIC of CPU 0. If on CPU 0, simply read the current 2191 * counter. If on another CPU, issue a remote read command to CPU 0. 2192 */ 2193 if (cpun == apic_cpus[0].aci_local_id) { 2194 countval = apicadr[APIC_CURR_COUNT]; 2195 } else { 2196 while (get_apic_cmd1() & AV_PENDING) 2197 apic_ret(); 2198 2199 apicadr[APIC_INT_CMD2] = 2200 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2201 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2202 2203 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2204 apic_ret(); 2205 2206 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2207 countval = apicadr[APIC_REMOTE_READ]; 2208 else { /* 0 = invalid */ 2209 apic_remote_hrterr++; 2210 /* 2211 * return last hrtime right now, will need more 2212 * testing if change to retry 2213 */ 2214 temp = apic_last_hrtime; 2215 2216 lock_clear(&apic_gethrtime_lock); 2217 2218 intr_restore(oflags); 2219 2220 return (temp); 2221 } 2222 } 2223 if (countval > last_count_read) 2224 countval = 0; 2225 else 2226 last_count_read = countval; 2227 2228 elapsed_ticks = apic_hertz_count - countval; 2229 2230 curr_timeval = elapsed_ticks * apic_nsec_per_tick; 2231 temp = apic_nsec_since_boot + curr_timeval; 2232 2233 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2234 /* we might have clobbered last_count_read. Restore it */ 2235 last_count_read = apic_hertz_count; 2236 goto gethrtime_again; 2237 } 2238 2239 if (temp < apic_last_hrtime) { 2240 /* return last hrtime if error occurs */ 2241 apic_hrtime_error++; 2242 temp = apic_last_hrtime; 2243 } 2244 else 2245 apic_last_hrtime = temp; 2246 2247 lock_clear(&apic_gethrtime_lock); 2248 intr_restore(oflags); 2249 2250 return (temp); 2251 } 2252 2253 /* apic NMI handler */ 2254 /*ARGSUSED*/ 2255 static void 2256 apic_nmi_intr(caddr_t arg) 2257 { 2258 if (apic_shutdown_processors) { 2259 apic_disable_local_apic(); 2260 return; 2261 } 2262 2263 if (lock_try(&apic_nmi_lock)) { 2264 if (apic_kmdb_on_nmi) { 2265 if (psm_debugger() == 0) { 2266 cmn_err(CE_PANIC, 2267 "NMI detected, kmdb is not available."); 2268 } else { 2269 debug_enter("\nNMI detected, entering kmdb.\n"); 2270 } 2271 } else { 2272 if (apic_panic_on_nmi) { 2273 /* Keep panic from entering kmdb. */ 2274 nopanicdebug = 1; 2275 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2276 } else { 2277 /* 2278 * prom_printf is the best shot we have 2279 * of something which is problem free from 2280 * high level/NMI type of interrupts 2281 */ 2282 prom_printf("pcplusmp: NMI received\n"); 2283 apic_error |= APIC_ERR_NMI; 2284 apic_num_nmis++; 2285 } 2286 } 2287 lock_clear(&apic_nmi_lock); 2288 } 2289 } 2290 2291 /* 2292 * Add mask bits to disable interrupt vector from happening 2293 * at or above IPL. In addition, it should remove mask bits 2294 * to enable interrupt vectors below the given IPL. 2295 * 2296 * Both add and delspl are complicated by the fact that different interrupts 2297 * may share IRQs. This can happen in two ways. 2298 * 1. The same H/W line is shared by more than 1 device 2299 * 1a. with interrupts at different IPLs 2300 * 1b. with interrupts at same IPL 2301 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2302 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2303 * will get called often when no interrupt is pending for the device. 2304 * For 1a, we just hope that the machine blows up with the person who 2305 * set it up that way!. In the meantime, we handle it at the higher IPL. 2306 */ 2307 /*ARGSUSED*/ 2308 static int 2309 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2310 { 2311 uchar_t vector; 2312 int iflag; 2313 apic_irq_t *irqptr, *irqheadptr; 2314 int irqindex; 2315 2316 ASSERT(max_ipl <= UCHAR_MAX); 2317 irqindex = IRQINDEX(irqno); 2318 2319 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2320 return (PSM_FAILURE); 2321 2322 irqptr = irqheadptr = apic_irq_table[irqindex]; 2323 2324 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2325 "vector=0x%x\n", (void *)irqptr->airq_dip, 2326 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2327 2328 while (irqptr) { 2329 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2330 break; 2331 irqptr = irqptr->airq_next; 2332 } 2333 irqptr->airq_share++; 2334 2335 /* return if it is not hardware interrupt */ 2336 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2337 return (PSM_SUCCESS); 2338 2339 /* Or if there are more interupts at a higher IPL */ 2340 if (ipl != max_ipl) 2341 return (PSM_SUCCESS); 2342 2343 /* 2344 * if apic_picinit() has not been called yet, just return. 2345 * At the end of apic_picinit(), we will call setup_io_intr(). 2346 */ 2347 2348 if (!apic_flag) 2349 return (PSM_SUCCESS); 2350 2351 iflag = intr_clear(); 2352 2353 /* 2354 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2355 * return failure. Not very elegant, but then we hope the 2356 * machine will blow up with ... 2357 */ 2358 if (irqptr->airq_ipl != max_ipl) { 2359 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2360 if (vector == 0) { 2361 intr_restore(iflag); 2362 irqptr->airq_share--; 2363 return (PSM_FAILURE); 2364 } 2365 irqptr = irqheadptr; 2366 apic_mark_vector(irqptr->airq_vector, vector); 2367 while (irqptr) { 2368 irqptr->airq_vector = vector; 2369 irqptr->airq_ipl = (uchar_t)max_ipl; 2370 /* 2371 * reprogram irq being added and every one else 2372 * who is not in the UNINIT state 2373 */ 2374 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2375 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2376 apic_record_rdt_entry(irqptr, irqindex); 2377 (void) apic_setup_io_intr(irqptr, irqindex); 2378 } 2379 irqptr = irqptr->airq_next; 2380 } 2381 intr_restore(iflag); 2382 return (PSM_SUCCESS); 2383 } 2384 2385 ASSERT(irqptr); 2386 (void) apic_setup_io_intr(irqptr, irqindex); 2387 intr_restore(iflag); 2388 return (PSM_SUCCESS); 2389 } 2390 2391 /* 2392 * Recompute mask bits for the given interrupt vector. 2393 * If there is no interrupt servicing routine for this 2394 * vector, this function should disable interrupt vector 2395 * from happening at all IPLs. If there are still 2396 * handlers using the given vector, this function should 2397 * disable the given vector from happening below the lowest 2398 * IPL of the remaining hadlers. 2399 */ 2400 /*ARGSUSED*/ 2401 static int 2402 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2403 { 2404 uchar_t vector, bind_cpu; 2405 int iflag, intin, irqindex; 2406 volatile int32_t *ioapic; 2407 apic_irq_t *irqptr, *irqheadptr; 2408 2409 irqindex = IRQINDEX(irqno); 2410 irqptr = irqheadptr = apic_irq_table[irqindex]; 2411 2412 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2413 "vector=0x%x\n", (void *)irqptr->airq_dip, 2414 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2415 2416 while (irqptr) { 2417 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2418 break; 2419 irqptr = irqptr->airq_next; 2420 } 2421 ASSERT(irqptr); 2422 2423 irqptr->airq_share--; 2424 2425 if (ipl < max_ipl) 2426 return (PSM_SUCCESS); 2427 2428 /* return if it is not hardware interrupt */ 2429 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2430 return (PSM_SUCCESS); 2431 2432 if (!apic_flag) { 2433 /* 2434 * Clear irq_struct. If two devices shared an intpt 2435 * line & 1 unloaded before picinit, we are hosed. But, then 2436 * we hope the machine will ... 2437 */ 2438 irqptr->airq_mps_intr_index = FREE_INDEX; 2439 irqptr->airq_temp_cpu = IRQ_UNINIT; 2440 apic_free_vector(irqptr->airq_vector); 2441 return (PSM_SUCCESS); 2442 } 2443 /* 2444 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2445 * use old IPL. Not very elegant, but then we hope ... 2446 */ 2447 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2448 apic_irq_t *irqp; 2449 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2450 apic_mark_vector(irqheadptr->airq_vector, vector); 2451 irqp = irqheadptr; 2452 while (irqp) { 2453 irqp->airq_vector = vector; 2454 irqp->airq_ipl = (uchar_t)max_ipl; 2455 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2456 apic_record_rdt_entry(irqp, irqindex); 2457 (void) apic_setup_io_intr(irqp, 2458 irqindex); 2459 } 2460 irqp = irqp->airq_next; 2461 } 2462 } 2463 } 2464 2465 if (irqptr->airq_share) 2466 return (PSM_SUCCESS); 2467 2468 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2469 intin = irqptr->airq_intin_no; 2470 iflag = intr_clear(); 2471 lock_set(&apic_ioapic_lock); 2472 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2473 ioapic[APIC_IO_DATA] = AV_MASK; 2474 2475 /* Disable the MSI/X vector */ 2476 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2477 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2478 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2479 2480 /* 2481 * Make sure we only disable on the last 2482 * of the multi-MSI support 2483 */ 2484 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2485 (void) pci_msi_unconfigure(irqptr->airq_dip, type, 2486 irqptr->airq_ioapicindex); 2487 2488 (void) pci_msi_disable_mode(irqptr->airq_dip, type, 2489 irqptr->airq_ioapicindex); 2490 } 2491 } 2492 2493 if (max_ipl == PSM_INVALID_IPL) { 2494 ASSERT(irqheadptr == irqptr); 2495 bind_cpu = irqptr->airq_temp_cpu; 2496 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2497 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2498 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2499 if (bind_cpu & IRQ_USER_BOUND) { 2500 /* If hardbound, temp_cpu == cpu */ 2501 bind_cpu &= ~IRQ_USER_BOUND; 2502 apic_cpus[bind_cpu].aci_bound--; 2503 } else 2504 apic_cpus[bind_cpu].aci_temp_bound--; 2505 } 2506 lock_clear(&apic_ioapic_lock); 2507 intr_restore(iflag); 2508 irqptr->airq_temp_cpu = IRQ_UNINIT; 2509 irqptr->airq_mps_intr_index = FREE_INDEX; 2510 apic_free_vector(irqptr->airq_vector); 2511 return (PSM_SUCCESS); 2512 } 2513 lock_clear(&apic_ioapic_lock); 2514 intr_restore(iflag); 2515 2516 mutex_enter(&airq_mutex); 2517 if ((irqptr == apic_irq_table[irqindex])) { 2518 apic_irq_t *oldirqptr; 2519 /* Move valid irq entry to the head */ 2520 irqheadptr = oldirqptr = irqptr; 2521 irqptr = irqptr->airq_next; 2522 ASSERT(irqptr); 2523 while (irqptr) { 2524 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2525 break; 2526 oldirqptr = irqptr; 2527 irqptr = irqptr->airq_next; 2528 } 2529 /* remove all invalid ones from the beginning */ 2530 apic_irq_table[irqindex] = irqptr; 2531 /* 2532 * and link them back after the head. The invalid ones 2533 * begin with irqheadptr and end at oldirqptr 2534 */ 2535 oldirqptr->airq_next = irqptr->airq_next; 2536 irqptr->airq_next = irqheadptr; 2537 } 2538 mutex_exit(&airq_mutex); 2539 2540 irqptr->airq_temp_cpu = IRQ_UNINIT; 2541 irqptr->airq_mps_intr_index = FREE_INDEX; 2542 return (PSM_SUCCESS); 2543 } 2544 2545 /* 2546 * Return HW interrupt number corresponding to the given IPL 2547 */ 2548 /*ARGSUSED*/ 2549 static int 2550 apic_softlvl_to_irq(int ipl) 2551 { 2552 /* 2553 * Do not use apic to trigger soft interrupt. 2554 * It will cause the system to hang when 2 hardware interrupts 2555 * at the same priority with the softint are already accepted 2556 * by the apic. Cause the AV_PENDING bit will not be cleared 2557 * until one of the hardware interrupt is eoi'ed. If we need 2558 * to send an ipi at this time, we will end up looping forever 2559 * to wait for the AV_PENDING bit to clear. 2560 */ 2561 return (PSM_SV_SOFTWARE); 2562 } 2563 2564 static int 2565 apic_post_cpu_start() 2566 { 2567 int i, cpun; 2568 apic_irq_t *irq_ptr; 2569 2570 apic_init_intr(); 2571 2572 /* 2573 * since some systems don't enable the internal cache on the non-boot 2574 * cpus, so we have to enable them here 2575 */ 2576 setcr0(getcr0() & ~(0x60000000)); 2577 2578 while (get_apic_cmd1() & AV_PENDING) 2579 apic_ret(); 2580 2581 cpun = psm_get_cpu_id(); 2582 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2583 2584 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2585 irq_ptr = apic_irq_table[i]; 2586 if ((irq_ptr == NULL) || 2587 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2588 continue; 2589 2590 while (irq_ptr) { 2591 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) 2592 (void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE); 2593 irq_ptr = irq_ptr->airq_next; 2594 } 2595 } 2596 2597 return (PSM_SUCCESS); 2598 } 2599 2600 processorid_t 2601 apic_get_next_processorid(processorid_t cpu_id) 2602 { 2603 2604 int i; 2605 2606 if (cpu_id == -1) 2607 return ((processorid_t)0); 2608 2609 for (i = cpu_id + 1; i < NCPU; i++) { 2610 if (apic_cpumask & (1 << i)) 2611 return (i); 2612 } 2613 2614 return ((processorid_t)-1); 2615 } 2616 2617 2618 /* 2619 * type == -1 indicates it is an internal request. Do not change 2620 * resv_vector for these requests 2621 */ 2622 static int 2623 apic_get_ipivect(int ipl, int type) 2624 { 2625 uchar_t vector; 2626 int irq; 2627 2628 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2629 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2630 apic_irq_table[irq]->airq_mps_intr_index = 2631 RESERVE_INDEX; 2632 apic_irq_table[irq]->airq_vector = vector; 2633 if (type != -1) { 2634 apic_resv_vector[ipl] = vector; 2635 } 2636 return (irq); 2637 } 2638 } 2639 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2640 return (-1); /* shouldn't happen */ 2641 } 2642 2643 static int 2644 apic_getclkirq(int ipl) 2645 { 2646 int irq; 2647 2648 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2649 return (-1); 2650 /* 2651 * Note the vector in apic_clkvect for per clock handling. 2652 */ 2653 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2654 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2655 apic_clkvect)); 2656 return (irq); 2657 } 2658 2659 /* 2660 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2661 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2662 */ 2663 static uint_t 2664 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2665 { 2666 uint8_t pit_tick_lo; 2667 uint16_t pit_tick, target_pit_tick; 2668 uint32_t start_apic_tick, end_apic_tick; 2669 int iflag; 2670 2671 addr += APIC_CURR_COUNT; 2672 2673 iflag = intr_clear(); 2674 2675 do { 2676 pit_tick_lo = inb(PITCTR0_PORT); 2677 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2678 } while (pit_tick < APIC_TIME_MIN || 2679 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2680 2681 /* 2682 * Wait for the 8254 to decrement by 5 ticks to ensure 2683 * we didn't start in the middle of a tick. 2684 * Compare with 0x10 for the wrap around case. 2685 */ 2686 target_pit_tick = pit_tick - 5; 2687 do { 2688 pit_tick_lo = inb(PITCTR0_PORT); 2689 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2690 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2691 2692 start_apic_tick = *addr; 2693 2694 /* 2695 * Wait for the 8254 to decrement by 2696 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2697 */ 2698 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2699 do { 2700 pit_tick_lo = inb(PITCTR0_PORT); 2701 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2702 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2703 2704 end_apic_tick = *addr; 2705 2706 *pit_ticks_adj = target_pit_tick - pit_tick; 2707 2708 intr_restore(iflag); 2709 2710 return (start_apic_tick - end_apic_tick); 2711 } 2712 2713 /* 2714 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2715 * frequency. Note at this stage in the boot sequence, the boot processor 2716 * is the only active processor. 2717 * hertz value of 0 indicates a one-shot mode request. In this case 2718 * the function returns the resolution (in nanoseconds) for the hardware 2719 * timer interrupt. If one-shot mode capability is not available, 2720 * the return value will be 0. apic_enable_oneshot is a global switch 2721 * for disabling the functionality. 2722 * A non-zero positive value for hertz indicates a periodic mode request. 2723 * In this case the hardware will be programmed to generate clock interrupts 2724 * at hertz frequency and returns the resolution of interrupts in 2725 * nanosecond. 2726 */ 2727 2728 static int 2729 apic_clkinit(int hertz) 2730 { 2731 2732 uint_t apic_ticks = 0; 2733 uint_t pit_time; 2734 int ret; 2735 uint16_t pit_ticks_adj; 2736 static int firsttime = 1; 2737 2738 if (firsttime) { 2739 /* first time calibrate */ 2740 2741 apicadr[APIC_DIVIDE_REG] = 0x0; 2742 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; 2743 2744 /* set periodic interrupt based on CLKIN */ 2745 apicadr[APIC_LOCAL_TIMER] = 2746 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2747 tenmicrosec(); 2748 2749 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2750 2751 apicadr[APIC_LOCAL_TIMER] = 2752 (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 2753 /* 2754 * pit time is the amount of real time (in nanoseconds ) it took 2755 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks 2756 */ 2757 pit_time = ((longlong_t)(APIC_TIME_COUNT + 2758 pit_ticks_adj) * NANOSEC) / PIT_HZ; 2759 2760 /* 2761 * Determine the number of nanoseconds per APIC clock tick 2762 * and then determine how many APIC ticks to interrupt at the 2763 * desired frequency 2764 */ 2765 apic_nsec_per_tick = pit_time / apic_ticks; 2766 if (apic_nsec_per_tick == 0) 2767 apic_nsec_per_tick = 1; 2768 2769 /* the interval timer initial count is 32 bit max */ 2770 apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL; 2771 firsttime = 0; 2772 } 2773 2774 if (hertz != 0) { 2775 /* periodic */ 2776 apic_nsec_per_intr = NANOSEC / hertz; 2777 apic_hertz_count = (longlong_t)apic_nsec_per_intr / 2778 apic_nsec_per_tick; 2779 apic_sample_factor_redistribution = hertz + 1; 2780 } 2781 2782 apic_int_busy_mark = (apic_int_busy_mark * 2783 apic_sample_factor_redistribution) / 100; 2784 apic_int_free_mark = (apic_int_free_mark * 2785 apic_sample_factor_redistribution) / 100; 2786 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2787 apic_sample_factor_redistribution) / 100; 2788 2789 if (hertz == 0) { 2790 /* requested one_shot */ 2791 if (!apic_oneshot_enable) 2792 return (0); 2793 apic_oneshot = 1; 2794 ret = (int)apic_nsec_per_tick; 2795 } else { 2796 /* program the local APIC to interrupt at the given frequency */ 2797 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2798 apicadr[APIC_LOCAL_TIMER] = 2799 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2800 apic_oneshot = 0; 2801 ret = NANOSEC / hertz; 2802 } 2803 2804 return (ret); 2805 2806 } 2807 2808 /* 2809 * apic_preshutdown: 2810 * Called early in shutdown whilst we can still access filesystems to do 2811 * things like loading modules which will be required to complete shutdown 2812 * after filesystems are all unmounted. 2813 */ 2814 static void 2815 apic_preshutdown(int cmd, int fcn) 2816 { 2817 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2818 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2819 2820 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2821 return; 2822 } 2823 } 2824 2825 static void 2826 apic_shutdown(int cmd, int fcn) 2827 { 2828 int iflag, restarts, attempts; 2829 int i, j; 2830 volatile int32_t *ioapic; 2831 uchar_t byte; 2832 2833 /* Send NMI to all CPUs except self to do per processor shutdown */ 2834 iflag = intr_clear(); 2835 while (get_apic_cmd1() & AV_PENDING) 2836 apic_ret(); 2837 apic_shutdown_processors = 1; 2838 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2839 2840 /* restore cmos shutdown byte before reboot */ 2841 if (apic_cmos_ssb_set) { 2842 outb(CMOS_ADDR, SSB); 2843 outb(CMOS_DATA, 0); 2844 } 2845 /* Disable the I/O APIC redirection entries */ 2846 for (j = 0; j < apic_io_max; j++) { 2847 int intin_max; 2848 ioapic = apicioadr[j]; 2849 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2850 /* Bits 23-16 define the maximum redirection entries */ 2851 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2852 for (i = 0; i < intin_max; i++) { 2853 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2854 ioapic[APIC_IO_DATA] = AV_MASK; 2855 } 2856 } 2857 2858 /* disable apic mode if imcr present */ 2859 if (apic_imcrp) { 2860 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2861 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2862 } 2863 2864 apic_disable_local_apic(); 2865 2866 intr_restore(iflag); 2867 2868 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2869 return; 2870 } 2871 2872 switch (apic_poweroff_method) { 2873 case APIC_POWEROFF_VIA_RTC: 2874 2875 /* select the extended NVRAM bank in the RTC */ 2876 outb(CMOS_ADDR, RTC_REGA); 2877 byte = inb(CMOS_DATA); 2878 outb(CMOS_DATA, (byte | EXT_BANK)); 2879 2880 outb(CMOS_ADDR, PFR_REG); 2881 2882 /* for Predator must toggle the PAB bit */ 2883 byte = inb(CMOS_DATA); 2884 2885 /* 2886 * clear power active bar, wakeup alarm and 2887 * kickstart 2888 */ 2889 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2890 outb(CMOS_DATA, byte); 2891 2892 /* delay before next write */ 2893 drv_usecwait(1000); 2894 2895 /* for S40 the following would suffice */ 2896 byte = inb(CMOS_DATA); 2897 2898 /* power active bar control bit */ 2899 byte |= PAB_CBIT; 2900 outb(CMOS_DATA, byte); 2901 2902 break; 2903 2904 case APIC_POWEROFF_VIA_ASPEN_BMC: 2905 restarts = 0; 2906 restart_aspen_bmc: 2907 if (++restarts == 3) 2908 break; 2909 attempts = 0; 2910 do { 2911 byte = inb(MISMIC_FLAG_REGISTER); 2912 byte &= MISMIC_BUSY_MASK; 2913 if (byte != 0) { 2914 drv_usecwait(1000); 2915 if (attempts >= 3) 2916 goto restart_aspen_bmc; 2917 ++attempts; 2918 } 2919 } while (byte != 0); 2920 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2921 byte = inb(MISMIC_FLAG_REGISTER); 2922 byte |= 0x1; 2923 outb(MISMIC_FLAG_REGISTER, byte); 2924 i = 0; 2925 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2926 i++) { 2927 attempts = 0; 2928 do { 2929 byte = inb(MISMIC_FLAG_REGISTER); 2930 byte &= MISMIC_BUSY_MASK; 2931 if (byte != 0) { 2932 drv_usecwait(1000); 2933 if (attempts >= 3) 2934 goto restart_aspen_bmc; 2935 ++attempts; 2936 } 2937 } while (byte != 0); 2938 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 2939 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 2940 byte = inb(MISMIC_FLAG_REGISTER); 2941 byte |= 0x1; 2942 outb(MISMIC_FLAG_REGISTER, byte); 2943 } 2944 break; 2945 2946 case APIC_POWEROFF_VIA_SITKA_BMC: 2947 restarts = 0; 2948 restart_sitka_bmc: 2949 if (++restarts == 3) 2950 break; 2951 attempts = 0; 2952 do { 2953 byte = inb(SMS_STATUS_REGISTER); 2954 byte &= SMS_STATE_MASK; 2955 if ((byte == SMS_READ_STATE) || 2956 (byte == SMS_WRITE_STATE)) { 2957 drv_usecwait(1000); 2958 if (attempts >= 3) 2959 goto restart_sitka_bmc; 2960 ++attempts; 2961 } 2962 } while ((byte == SMS_READ_STATE) || 2963 (byte == SMS_WRITE_STATE)); 2964 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 2965 i = 0; 2966 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 2967 i++) { 2968 attempts = 0; 2969 do { 2970 byte = inb(SMS_STATUS_REGISTER); 2971 byte &= SMS_IBF_MASK; 2972 if (byte != 0) { 2973 drv_usecwait(1000); 2974 if (attempts >= 3) 2975 goto restart_sitka_bmc; 2976 ++attempts; 2977 } 2978 } while (byte != 0); 2979 outb(sitka_bmc[i].port, sitka_bmc[i].data); 2980 } 2981 break; 2982 2983 case APIC_POWEROFF_NONE: 2984 2985 /* If no APIC direct method, we will try using ACPI */ 2986 if (apic_enable_acpi) { 2987 if (acpi_poweroff() == 1) 2988 return; 2989 } else 2990 return; 2991 2992 break; 2993 } 2994 /* 2995 * Wait a limited time here for power to go off. 2996 * If the power does not go off, then there was a 2997 * problem and we should continue to the halt which 2998 * prints a message for the user to press a key to 2999 * reboot. 3000 */ 3001 drv_usecwait(7000000); /* wait seven seconds */ 3002 3003 } 3004 3005 /* 3006 * Try and disable all interrupts. We just assign interrupts to other 3007 * processors based on policy. If any were bound by user request, we 3008 * let them continue and return failure. We do not bother to check 3009 * for cache affinity while rebinding. 3010 */ 3011 3012 static int 3013 apic_disable_intr(processorid_t cpun) 3014 { 3015 int bind_cpu = 0, i, hardbound = 0, iflag; 3016 apic_irq_t *irq_ptr; 3017 3018 if (cpun == 0) 3019 return (PSM_FAILURE); 3020 3021 iflag = intr_clear(); 3022 lock_set(&apic_ioapic_lock); 3023 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 3024 lock_clear(&apic_ioapic_lock); 3025 intr_restore(iflag); 3026 apic_cpus[cpun].aci_curipl = 0; 3027 i = apic_min_device_irq; 3028 for (; i <= apic_max_device_irq; i++) { 3029 /* 3030 * If there are bound interrupts on this cpu, then 3031 * rebind them to other processors. 3032 */ 3033 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3034 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3035 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3036 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3037 apic_nproc)); 3038 3039 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3040 hardbound = 1; 3041 continue; 3042 } 3043 3044 if (irq_ptr->airq_temp_cpu == cpun) { 3045 do { 3046 apic_next_bind_cpu += 2; 3047 bind_cpu = apic_next_bind_cpu / 2; 3048 if (bind_cpu >= apic_nproc) { 3049 apic_next_bind_cpu = 1; 3050 bind_cpu = 0; 3051 3052 } 3053 } while (apic_rebind_all(irq_ptr, bind_cpu, 1)); 3054 } 3055 } 3056 } 3057 if (hardbound) { 3058 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3059 "due to user bound interrupts", cpun); 3060 return (PSM_FAILURE); 3061 } 3062 else 3063 return (PSM_SUCCESS); 3064 } 3065 3066 static void 3067 apic_enable_intr(processorid_t cpun) 3068 { 3069 int i, iflag; 3070 apic_irq_t *irq_ptr; 3071 3072 iflag = intr_clear(); 3073 lock_set(&apic_ioapic_lock); 3074 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3075 lock_clear(&apic_ioapic_lock); 3076 intr_restore(iflag); 3077 3078 i = apic_min_device_irq; 3079 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3080 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3081 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3082 (void) apic_rebind_all(irq_ptr, 3083 irq_ptr->airq_cpu, 1); 3084 } 3085 } 3086 } 3087 } 3088 3089 /* 3090 * apic_introp_xlate() replaces apic_translate_irq() and is 3091 * called only from apic_intr_ops(). With the new ADII framework, 3092 * the priority can no longer be retrived through i_ddi_get_intrspec(). 3093 * It has to be passed in from the caller. 3094 */ 3095 int 3096 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3097 { 3098 char dev_type[16]; 3099 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3100 int irqno = ispec->intrspec_vec; 3101 ddi_acc_handle_t cfg_handle; 3102 uchar_t ipin; 3103 struct apic_io_intr *intrp; 3104 iflag_t intr_flag; 3105 APIC_HEADER *hp; 3106 MADT_INTERRUPT_OVERRIDE *isop; 3107 apic_irq_t *airqp; 3108 3109 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3110 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3111 irqno)); 3112 3113 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3114 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) 3115 return (apic_vector_to_irq[airqp->airq_vector]); 3116 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3117 NULL, type)); 3118 } 3119 3120 bustype = 0; 3121 3122 /* check if we have already translated this irq */ 3123 mutex_enter(&airq_mutex); 3124 newirq = apic_min_device_irq; 3125 for (; newirq <= apic_max_device_irq; newirq++) { 3126 airqp = apic_irq_table[newirq]; 3127 while (airqp) { 3128 if ((airqp->airq_dip == dip) && 3129 (airqp->airq_origirq == irqno) && 3130 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3131 3132 mutex_exit(&airq_mutex); 3133 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3134 } 3135 airqp = airqp->airq_next; 3136 } 3137 } 3138 mutex_exit(&airq_mutex); 3139 3140 if (apic_defconf) 3141 goto defconf; 3142 3143 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3144 goto nonpci; 3145 3146 dev_len = sizeof (dev_type); 3147 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 3148 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3149 &dev_len) != DDI_PROP_SUCCESS) { 3150 goto nonpci; 3151 } 3152 3153 if ((strcmp(dev_type, "pci") == 0) || 3154 (strcmp(dev_type, "pciex") == 0)) { 3155 /* pci device */ 3156 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3157 goto nonpci; 3158 if (busid == 0 && apic_pci_bus_total == 1) 3159 busid = (int)apic_single_pci_busid; 3160 3161 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3162 goto nonpci; 3163 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3164 pci_config_teardown(&cfg_handle); 3165 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3166 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3167 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3168 goto nonpci; 3169 3170 intr_flag.bustype = BUS_PCI; 3171 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3172 ispec, &intr_flag, type)) == -1) 3173 goto nonpci; 3174 return (newirq); 3175 } else { 3176 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3177 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3178 == NULL) { 3179 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3180 devid, ipin, &intrp)) == -1) 3181 goto nonpci; 3182 } 3183 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3184 ispec, NULL, type)) == -1) 3185 goto nonpci; 3186 return (newirq); 3187 } 3188 } else if (strcmp(dev_type, "isa") == 0) 3189 bustype = BUS_ISA; 3190 else if (strcmp(dev_type, "eisa") == 0) 3191 bustype = BUS_EISA; 3192 3193 nonpci: 3194 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3195 /* search iso entries first */ 3196 if (acpi_iso_cnt != 0) { 3197 hp = (APIC_HEADER *)acpi_isop; 3198 i = 0; 3199 while (i < acpi_iso_cnt) { 3200 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3201 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3202 if (isop->Bus == 0 && 3203 isop->Source == irqno) { 3204 newirq = isop->Interrupt; 3205 intr_flag.intr_po = 3206 isop->Polarity; 3207 intr_flag.intr_el = 3208 isop->TriggerMode; 3209 intr_flag.bustype = BUS_ISA; 3210 3211 return (apic_setup_irq_table( 3212 dip, newirq, NULL, ispec, 3213 &intr_flag, type)); 3214 3215 } 3216 i++; 3217 } 3218 hp = (APIC_HEADER *)(((char *)hp) + 3219 hp->Length); 3220 } 3221 } 3222 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3223 intr_flag.intr_el = INTR_EL_EDGE; 3224 intr_flag.bustype = BUS_ISA; 3225 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3226 &intr_flag, type)); 3227 } else { 3228 if (bustype == 0) 3229 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3230 for (i = 0; i < 2; i++) { 3231 if (((busid = apic_find_bus_id(bustype)) != -1) && 3232 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3233 != NULL)) { 3234 if ((newirq = apic_setup_irq_table(dip, irqno, 3235 intrp, ispec, NULL, type)) != -1) { 3236 return (newirq); 3237 } 3238 goto defconf; 3239 } 3240 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3241 } 3242 } 3243 3244 /* MPS default configuration */ 3245 defconf: 3246 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3247 if (newirq == -1) 3248 return (newirq); 3249 ASSERT(IRQINDEX(newirq) == irqno); 3250 ASSERT(apic_irq_table[irqno]); 3251 return (newirq); 3252 } 3253 3254 3255 3256 3257 3258 3259 /* 3260 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3261 * needs special handling. We may need to chase up the device tree, 3262 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3263 * to find the IPIN at the root bus that relates to the IPIN on the 3264 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3265 * in the MP table or the ACPI namespace for this device itself. 3266 * We handle both cases in the search below. 3267 */ 3268 /* this is the non-acpi version */ 3269 static int 3270 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3271 struct apic_io_intr **intrp) 3272 { 3273 dev_info_t *dipp, *dip; 3274 int pci_irq; 3275 ddi_acc_handle_t cfg_handle; 3276 int bridge_devno, bridge_bus; 3277 int ipin; 3278 3279 dip = idip; 3280 3281 /*CONSTCOND*/ 3282 while (1) { 3283 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3284 return (-1); 3285 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3286 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3287 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3288 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3289 pci_config_teardown(&cfg_handle); 3290 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3291 NULL) != 0) 3292 return (-1); 3293 /* 3294 * This is the rotating scheme that Compaq is using 3295 * and documented in the pci to pci spec. Also, if 3296 * the pci to pci bridge is behind another pci to 3297 * pci bridge, then it need to keep transversing 3298 * up until an interrupt entry is found or reach 3299 * the top of the tree 3300 */ 3301 ipin = (child_devno + child_ipin) % PCI_INTD; 3302 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3303 bridge_bus = (int)apic_single_pci_busid; 3304 pci_irq = ((bridge_devno & 0x1f) << 2) | 3305 (ipin & 0x3); 3306 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3307 bridge_bus)) != NULL) { 3308 return (pci_irq); 3309 } 3310 dip = dipp; 3311 child_devno = bridge_devno; 3312 child_ipin = ipin; 3313 } else 3314 return (-1); 3315 } 3316 /*LINTED: function will not fall off the bottom */ 3317 } 3318 3319 3320 3321 3322 static uchar_t 3323 acpi_find_ioapic(int irq) 3324 { 3325 int i; 3326 3327 for (i = 0; i < apic_io_max; i++) { 3328 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3329 return (i); 3330 } 3331 return (0xFF); /* shouldn't happen */ 3332 } 3333 3334 /* 3335 * See if two irqs are compatible for sharing a vector. 3336 * Currently we only support sharing of PCI devices. 3337 */ 3338 static int 3339 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3340 { 3341 uint_t level1, po1; 3342 uint_t level2, po2; 3343 3344 /* Assume active high by default */ 3345 po1 = 0; 3346 po2 = 0; 3347 3348 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3349 return (0); 3350 3351 if (iflag1.intr_el == INTR_EL_CONFORM) 3352 level1 = AV_LEVEL; 3353 else 3354 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3355 3356 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3357 (iflag1.intr_po == INTR_PO_CONFORM))) 3358 po1 = AV_ACTIVE_LOW; 3359 3360 if (iflag2.intr_el == INTR_EL_CONFORM) 3361 level2 = AV_LEVEL; 3362 else 3363 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3364 3365 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3366 (iflag2.intr_po == INTR_PO_CONFORM))) 3367 po2 = AV_ACTIVE_LOW; 3368 3369 if ((level1 == level2) && (po1 == po2)) 3370 return (1); 3371 3372 return (0); 3373 } 3374 3375 /* 3376 * Attempt to share vector with someone else 3377 */ 3378 static int 3379 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3380 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3381 { 3382 #ifdef DEBUG 3383 apic_irq_t *tmpirqp = NULL; 3384 #endif /* DEBUG */ 3385 apic_irq_t *irqptr, dummyirq; 3386 int newirq, chosen_irq = -1, share = 127; 3387 int lowest, highest, i; 3388 uchar_t share_id; 3389 3390 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3391 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3392 3393 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3394 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3395 3396 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3397 lowest -= APIC_VECTOR_PER_IPL; 3398 dummyirq.airq_mps_intr_index = intr_index; 3399 dummyirq.airq_ioapicindex = ioapicindex; 3400 dummyirq.airq_intin_no = ipin; 3401 if (intr_flagp) 3402 dummyirq.airq_iflag = *intr_flagp; 3403 apic_record_rdt_entry(&dummyirq, irqno); 3404 for (i = lowest; i <= highest; i++) { 3405 newirq = apic_vector_to_irq[i]; 3406 if (newirq == APIC_RESV_IRQ) 3407 continue; 3408 irqptr = apic_irq_table[newirq]; 3409 3410 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3411 (irqptr->airq_rdt_entry & 0xFF00)) 3412 /* not compatible */ 3413 continue; 3414 3415 if (irqptr->airq_share < share) { 3416 share = irqptr->airq_share; 3417 chosen_irq = newirq; 3418 } 3419 } 3420 if (chosen_irq != -1) { 3421 /* 3422 * Assign a share id which is free or which is larger 3423 * than the largest one. 3424 */ 3425 share_id = 1; 3426 mutex_enter(&airq_mutex); 3427 irqptr = apic_irq_table[chosen_irq]; 3428 while (irqptr) { 3429 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3430 share_id = irqptr->airq_share_id; 3431 break; 3432 } 3433 if (share_id <= irqptr->airq_share_id) 3434 share_id = irqptr->airq_share_id + 1; 3435 #ifdef DEBUG 3436 tmpirqp = irqptr; 3437 #endif /* DEBUG */ 3438 irqptr = irqptr->airq_next; 3439 } 3440 if (!irqptr) { 3441 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3442 irqptr->airq_temp_cpu = IRQ_UNINIT; 3443 irqptr->airq_next = 3444 apic_irq_table[chosen_irq]->airq_next; 3445 apic_irq_table[chosen_irq]->airq_next = irqptr; 3446 #ifdef DEBUG 3447 tmpirqp = apic_irq_table[chosen_irq]; 3448 #endif /* DEBUG */ 3449 } 3450 irqptr->airq_mps_intr_index = intr_index; 3451 irqptr->airq_ioapicindex = ioapicindex; 3452 irqptr->airq_intin_no = ipin; 3453 if (intr_flagp) 3454 irqptr->airq_iflag = *intr_flagp; 3455 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3456 irqptr->airq_share_id = share_id; 3457 apic_record_rdt_entry(irqptr, irqno); 3458 *irqptrp = irqptr; 3459 #ifdef DEBUG 3460 /* shuffle the pointers to test apic_delspl path */ 3461 if (tmpirqp) { 3462 tmpirqp->airq_next = irqptr->airq_next; 3463 irqptr->airq_next = apic_irq_table[chosen_irq]; 3464 apic_irq_table[chosen_irq] = irqptr; 3465 } 3466 #endif /* DEBUG */ 3467 mutex_exit(&airq_mutex); 3468 return (VIRTIRQ(chosen_irq, share_id)); 3469 } 3470 return (-1); 3471 } 3472 3473 /* 3474 * 3475 */ 3476 static int 3477 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3478 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3479 { 3480 int origirq = ispec->intrspec_vec; 3481 uchar_t ipl = ispec->intrspec_pri; 3482 int newirq, intr_index; 3483 uchar_t ipin, ioapic, ioapicindex, vector; 3484 apic_irq_t *irqptr; 3485 major_t major; 3486 dev_info_t *sdip; 3487 3488 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3489 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3490 3491 ASSERT(ispec != NULL); 3492 3493 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3494 3495 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3496 /* MSI/X doesn't need to setup ioapic stuffs */ 3497 ioapicindex = 0xff; 3498 ioapic = 0xff; 3499 ipin = (uchar_t)0xff; 3500 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3501 MSIX_INDEX; 3502 mutex_enter(&airq_mutex); 3503 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 3504 mutex_exit(&airq_mutex); 3505 /* need an irq for MSI/X to index into autovect[] */ 3506 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3507 ddi_get_name(dip), ddi_get_instance(dip)); 3508 return (-1); 3509 } 3510 mutex_exit(&airq_mutex); 3511 3512 } else if (intrp != NULL) { 3513 intr_index = (int)(intrp - apic_io_intrp); 3514 ioapic = intrp->intr_destid; 3515 ipin = intrp->intr_destintin; 3516 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3517 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3518 if (apic_io_id[ioapicindex] == ioapic) 3519 break; 3520 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3521 (ioapic == INTR_ALL_APIC)); 3522 3523 /* check whether this intin# has been used by another irqno */ 3524 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3525 return (newirq); 3526 } 3527 3528 } else if (intr_flagp != NULL) { 3529 /* ACPI case */ 3530 intr_index = ACPI_INDEX; 3531 ioapicindex = acpi_find_ioapic(irqno); 3532 ASSERT(ioapicindex != 0xFF); 3533 ioapic = apic_io_id[ioapicindex]; 3534 ipin = irqno - apic_io_vectbase[ioapicindex]; 3535 if (apic_irq_table[irqno] && 3536 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3537 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3538 apic_irq_table[irqno]->airq_ioapicindex == 3539 ioapicindex); 3540 return (irqno); 3541 } 3542 3543 } else { 3544 /* default configuration */ 3545 ioapicindex = 0; 3546 ioapic = apic_io_id[ioapicindex]; 3547 ipin = (uchar_t)irqno; 3548 intr_index = DEFAULT_INDEX; 3549 } 3550 3551 if (ispec == NULL) { 3552 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3553 irqno)); 3554 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3555 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3556 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3557 irqptr->airq_ipl = ipl; 3558 irqptr->airq_origirq = (uchar_t)origirq; 3559 irqptr->airq_dip = dip; 3560 irqptr->airq_major = major; 3561 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3562 /* This is OK to do really */ 3563 if (sdip == NULL) { 3564 cmn_err(CE_WARN, "Sharing vectors: %s" 3565 " instance %d and SCI", 3566 ddi_get_name(dip), ddi_get_instance(dip)); 3567 } else { 3568 cmn_err(CE_WARN, "Sharing vectors: %s" 3569 " instance %d and %s instance %d", 3570 ddi_get_name(sdip), ddi_get_instance(sdip), 3571 ddi_get_name(dip), ddi_get_instance(dip)); 3572 } 3573 return (newirq); 3574 } 3575 /* try high priority allocation now that share has failed */ 3576 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3577 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3578 ddi_get_name(dip), ddi_get_instance(dip)); 3579 return (-1); 3580 } 3581 } 3582 3583 mutex_enter(&airq_mutex); 3584 if (apic_irq_table[irqno] == NULL) { 3585 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3586 irqptr->airq_temp_cpu = IRQ_UNINIT; 3587 apic_irq_table[irqno] = irqptr; 3588 } else { 3589 irqptr = apic_irq_table[irqno]; 3590 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3591 /* 3592 * The slot is used by another irqno, so allocate 3593 * a free irqno for this interrupt 3594 */ 3595 newirq = apic_allocate_irq(apic_first_avail_irq); 3596 if (newirq == -1) { 3597 mutex_exit(&airq_mutex); 3598 return (-1); 3599 } 3600 irqno = newirq; 3601 irqptr = apic_irq_table[irqno]; 3602 if (irqptr == NULL) { 3603 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3604 KM_SLEEP); 3605 irqptr->airq_temp_cpu = IRQ_UNINIT; 3606 apic_irq_table[irqno] = irqptr; 3607 } 3608 apic_modify_vector(vector, newirq); 3609 } 3610 } 3611 apic_max_device_irq = max(irqno, apic_max_device_irq); 3612 apic_min_device_irq = min(irqno, apic_min_device_irq); 3613 mutex_exit(&airq_mutex); 3614 irqptr->airq_ioapicindex = ioapicindex; 3615 irqptr->airq_intin_no = ipin; 3616 irqptr->airq_ipl = ipl; 3617 irqptr->airq_vector = vector; 3618 irqptr->airq_origirq = (uchar_t)origirq; 3619 irqptr->airq_share_id = 0; 3620 irqptr->airq_mps_intr_index = (short)intr_index; 3621 irqptr->airq_dip = dip; 3622 irqptr->airq_major = major; 3623 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3624 if (intr_flagp) 3625 irqptr->airq_iflag = *intr_flagp; 3626 3627 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3628 /* setup I/O APIC entry for non-MSI/X interrupts */ 3629 apic_record_rdt_entry(irqptr, irqno); 3630 } 3631 return (irqno); 3632 } 3633 3634 /* 3635 * return the cpu to which this intr should be bound. 3636 * Check properties or any other mechanism to see if user wants it 3637 * bound to a specific CPU. If so, return the cpu id with high bit set. 3638 * If not, use the policy to choose a cpu and return the id. 3639 */ 3640 uchar_t 3641 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3642 { 3643 int instance, instno, prop_len, bind_cpu, count; 3644 uint_t i, rc; 3645 uchar_t cpu; 3646 major_t major; 3647 char *name, *drv_name, *prop_val, *cptr; 3648 char prop_name[32]; 3649 3650 3651 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3652 return (IRQ_UNBOUND); 3653 3654 drv_name = NULL; 3655 rc = DDI_PROP_NOT_FOUND; 3656 major = (major_t)-1; 3657 if (dip != NULL) { 3658 name = ddi_get_name(dip); 3659 major = ddi_name_to_major(name); 3660 drv_name = ddi_major_to_name(major); 3661 instance = ddi_get_instance(dip); 3662 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3663 i = apic_min_device_irq; 3664 for (; i <= apic_max_device_irq; i++) { 3665 3666 if ((i == irq) || (apic_irq_table[i] == NULL) || 3667 (apic_irq_table[i]->airq_mps_intr_index 3668 == FREE_INDEX)) 3669 continue; 3670 3671 if ((apic_irq_table[i]->airq_major == major) && 3672 (!(apic_irq_table[i]->airq_cpu & 3673 IRQ_USER_BOUND))) { 3674 3675 cpu = apic_irq_table[i]->airq_cpu; 3676 3677 cmn_err(CE_CONT, 3678 "!pcplusmp: %s (%s) instance #%d " 3679 "vector 0x%x ioapic 0x%x " 3680 "intin 0x%x is bound to cpu %d\n", 3681 name, drv_name, instance, irq, 3682 ioapicid, intin, cpu); 3683 return (cpu); 3684 } 3685 } 3686 } 3687 /* 3688 * search for "drvname"_intpt_bind_cpus property first, the 3689 * syntax of the property should be "a[,b,c,...]" where 3690 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3691 * instance 3 binds to cpu c... 3692 * ddi_getlongprop() will search /option first, then / 3693 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3694 * intpt_bind_cpus property. The syntax is the same, and 3695 * it applies to all the devices if its "drvname" specific 3696 * property doesn't exist 3697 */ 3698 (void) strcpy(prop_name, drv_name); 3699 (void) strcat(prop_name, "_intpt_bind_cpus"); 3700 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3701 (caddr_t)&prop_val, &prop_len); 3702 if (rc != DDI_PROP_SUCCESS) { 3703 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3704 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3705 } 3706 } 3707 if (rc == DDI_PROP_SUCCESS) { 3708 for (i = count = 0; i < (prop_len - 1); i++) 3709 if (prop_val[i] == ',') 3710 count++; 3711 if (prop_val[i-1] != ',') 3712 count++; 3713 /* 3714 * if somehow the binding instances defined in the 3715 * property are not enough for this instno., then 3716 * reuse the pattern for the next instance until 3717 * it reaches the requested instno 3718 */ 3719 instno = instance % count; 3720 i = 0; 3721 cptr = prop_val; 3722 while (i < instno) 3723 if (*cptr++ == ',') 3724 i++; 3725 bind_cpu = stoi(&cptr); 3726 kmem_free(prop_val, prop_len); 3727 /* if specific cpu is bogus, then default to cpu 0 */ 3728 if (bind_cpu >= apic_nproc) { 3729 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3730 prop_name, prop_val, bind_cpu); 3731 bind_cpu = 0; 3732 } else { 3733 /* indicate that we are bound at user request */ 3734 bind_cpu |= IRQ_USER_BOUND; 3735 } 3736 /* 3737 * no need to check apic_cpus[].aci_status, if specific cpu is 3738 * not up, then post_cpu_start will handle it. 3739 */ 3740 } else { 3741 /* 3742 * We change bind_cpu only for every two calls 3743 * as most drivers still do 2 add_intrs for every 3744 * interrupt 3745 */ 3746 bind_cpu = (apic_next_bind_cpu++) / 2; 3747 if (bind_cpu >= apic_nproc) { 3748 apic_next_bind_cpu = 1; 3749 bind_cpu = 0; 3750 } 3751 } 3752 if (drv_name != NULL) 3753 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3754 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3755 name, drv_name, instance, 3756 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3757 else 3758 cmn_err(CE_CONT, "!pcplusmp: " 3759 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3760 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3761 3762 return ((uchar_t)bind_cpu); 3763 } 3764 3765 static struct apic_io_intr * 3766 apic_find_io_intr_w_busid(int irqno, int busid) 3767 { 3768 struct apic_io_intr *intrp; 3769 3770 /* 3771 * It can have more than 1 entry with same source bus IRQ, 3772 * but unique with the source bus id 3773 */ 3774 intrp = apic_io_intrp; 3775 if (intrp != NULL) { 3776 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3777 if (intrp->intr_irq == irqno && 3778 intrp->intr_busid == busid && 3779 intrp->intr_type == IO_INTR_INT) 3780 return (intrp); 3781 intrp++; 3782 } 3783 } 3784 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3785 "busid %x:%x\n", irqno, busid)); 3786 return ((struct apic_io_intr *)NULL); 3787 } 3788 3789 3790 struct mps_bus_info { 3791 char *bus_name; 3792 int bus_id; 3793 } bus_info_array[] = { 3794 "ISA ", BUS_ISA, 3795 "PCI ", BUS_PCI, 3796 "EISA ", BUS_EISA, 3797 "XPRESS", BUS_XPRESS, 3798 "PCMCIA", BUS_PCMCIA, 3799 "VL ", BUS_VL, 3800 "CBUS ", BUS_CBUS, 3801 "CBUSII", BUS_CBUSII, 3802 "FUTURE", BUS_FUTURE, 3803 "INTERN", BUS_INTERN, 3804 "MBI ", BUS_MBI, 3805 "MBII ", BUS_MBII, 3806 "MPI ", BUS_MPI, 3807 "MPSA ", BUS_MPSA, 3808 "NUBUS ", BUS_NUBUS, 3809 "TC ", BUS_TC, 3810 "VME ", BUS_VME 3811 }; 3812 3813 static int 3814 apic_find_bus_type(char *bus) 3815 { 3816 int i = 0; 3817 3818 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3819 if (strncmp(bus, bus_info_array[i].bus_name, 3820 strlen(bus_info_array[i].bus_name)) == 0) 3821 return (bus_info_array[i].bus_id); 3822 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3823 return (0); 3824 } 3825 3826 static int 3827 apic_find_bus(int busid) 3828 { 3829 struct apic_bus *busp; 3830 3831 busp = apic_busp; 3832 while (busp->bus_entry == APIC_BUS_ENTRY) { 3833 if (busp->bus_id == busid) 3834 return (apic_find_bus_type((char *)&busp->bus_str1)); 3835 busp++; 3836 } 3837 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3838 return (0); 3839 } 3840 3841 static int 3842 apic_find_bus_id(int bustype) 3843 { 3844 struct apic_bus *busp; 3845 3846 busp = apic_busp; 3847 while (busp->bus_entry == APIC_BUS_ENTRY) { 3848 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3849 return (busp->bus_id); 3850 busp++; 3851 } 3852 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3853 bustype)); 3854 return (-1); 3855 } 3856 3857 /* 3858 * Check if a particular irq need to be reserved for any io_intr 3859 */ 3860 static struct apic_io_intr * 3861 apic_find_io_intr(int irqno) 3862 { 3863 struct apic_io_intr *intrp; 3864 3865 intrp = apic_io_intrp; 3866 if (intrp != NULL) { 3867 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3868 if (intrp->intr_irq == irqno && 3869 intrp->intr_type == IO_INTR_INT) 3870 return (intrp); 3871 intrp++; 3872 } 3873 } 3874 return ((struct apic_io_intr *)NULL); 3875 } 3876 3877 /* 3878 * Check if the given ioapicindex intin combination has already been assigned 3879 * an irq. If so return irqno. Else -1 3880 */ 3881 static int 3882 apic_find_intin(uchar_t ioapic, uchar_t intin) 3883 { 3884 apic_irq_t *irqptr; 3885 int i; 3886 3887 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3888 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3889 irqptr = apic_irq_table[i]; 3890 while (irqptr) { 3891 if ((irqptr->airq_mps_intr_index >= 0) && 3892 (irqptr->airq_intin_no == intin) && 3893 (irqptr->airq_ioapicindex == ioapic)) { 3894 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3895 "entry for ioapic:intin %x:%x " 3896 "shared interrupts ?", ioapic, intin)); 3897 return (i); 3898 } 3899 irqptr = irqptr->airq_next; 3900 } 3901 } 3902 return (-1); 3903 } 3904 3905 int 3906 apic_allocate_irq(int irq) 3907 { 3908 int freeirq, i; 3909 3910 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 3911 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 3912 (irq - 1))) == -1) { 3913 /* 3914 * if BIOS really defines every single irq in the mps 3915 * table, then don't worry about conflicting with 3916 * them, just use any free slot in apic_irq_table 3917 */ 3918 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 3919 if ((apic_irq_table[i] == NULL) || 3920 apic_irq_table[i]->airq_mps_intr_index == 3921 FREE_INDEX) { 3922 freeirq = i; 3923 break; 3924 } 3925 } 3926 if (freeirq == -1) { 3927 /* This shouldn't happen, but just in case */ 3928 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 3929 return (-1); 3930 } 3931 } 3932 if (apic_irq_table[freeirq] == NULL) { 3933 apic_irq_table[freeirq] = 3934 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 3935 if (apic_irq_table[freeirq] == NULL) { 3936 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 3937 return (-1); 3938 } 3939 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 3940 } 3941 return (freeirq); 3942 } 3943 3944 static int 3945 apic_find_free_irq(int start, int end) 3946 { 3947 int i; 3948 3949 for (i = start; i <= end; i++) 3950 /* Check if any I/O entry needs this IRQ */ 3951 if (apic_find_io_intr(i) == NULL) { 3952 /* Then see if it is free */ 3953 if ((apic_irq_table[i] == NULL) || 3954 (apic_irq_table[i]->airq_mps_intr_index == 3955 FREE_INDEX)) { 3956 return (i); 3957 } 3958 } 3959 return (-1); 3960 } 3961 3962 /* 3963 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 3964 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 3965 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 3966 * requests and allocated only when pri is set. 3967 */ 3968 static uchar_t 3969 apic_allocate_vector(int ipl, int irq, int pri) 3970 { 3971 int lowest, highest, i; 3972 3973 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3974 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 3975 3976 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 3977 lowest -= APIC_VECTOR_PER_IPL; 3978 3979 #ifdef DEBUG 3980 if (apic_restrict_vector) /* for testing shared interrupt logic */ 3981 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 3982 #endif /* DEBUG */ 3983 if (pri == 0) 3984 highest -= APIC_HI_PRI_VECTS; 3985 3986 for (i = lowest; i < highest; i++) { 3987 if ((i == T_FASTTRAP) || (i == APIC_SPUR_INTR) || 3988 (i == T_SYSCALLINT) || (i == T_DTRACE_PROBE) || 3989 (i == T_DTRACE_RET)) 3990 continue; 3991 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 3992 apic_vector_to_irq[i] = (uchar_t)irq; 3993 return (i); 3994 } 3995 } 3996 3997 return (0); 3998 } 3999 4000 static void 4001 apic_modify_vector(uchar_t vector, int irq) 4002 { 4003 apic_vector_to_irq[vector] = (uchar_t)irq; 4004 } 4005 4006 /* 4007 * Mark vector as being in the process of being deleted. Interrupts 4008 * may still come in on some CPU. The moment an interrupt comes with 4009 * the new vector, we know we can free the old one. Called only from 4010 * addspl and delspl with interrupts disabled. Because an interrupt 4011 * can be shared, but no interrupt from either device may come in, 4012 * we also use a timeout mechanism, which we arbitrarily set to 4013 * apic_revector_timeout microseconds. 4014 */ 4015 static void 4016 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4017 { 4018 int iflag = intr_clear(); 4019 lock_set(&apic_revector_lock); 4020 if (!apic_oldvec_to_newvec) { 4021 apic_oldvec_to_newvec = 4022 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4023 KM_NOSLEEP); 4024 4025 if (!apic_oldvec_to_newvec) { 4026 /* 4027 * This failure is not catastrophic. 4028 * But, the oldvec will never be freed. 4029 */ 4030 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4031 lock_clear(&apic_revector_lock); 4032 intr_restore(iflag); 4033 return; 4034 } 4035 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4036 } 4037 4038 /* See if we already did this for drivers which do double addintrs */ 4039 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4040 apic_oldvec_to_newvec[oldvector] = newvector; 4041 apic_newvec_to_oldvec[newvector] = oldvector; 4042 apic_revector_pending++; 4043 } 4044 lock_clear(&apic_revector_lock); 4045 intr_restore(iflag); 4046 (void) timeout(apic_xlate_vector_free_timeout_handler, 4047 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4048 } 4049 4050 /* 4051 * xlate_vector is called from intr_enter if revector_pending is set. 4052 * It will xlate it if needed and mark the old vector as free. 4053 */ 4054 static uchar_t 4055 apic_xlate_vector(uchar_t vector) 4056 { 4057 uchar_t newvector, oldvector = 0; 4058 4059 lock_set(&apic_revector_lock); 4060 /* Do we really need to do this ? */ 4061 if (!apic_revector_pending) { 4062 lock_clear(&apic_revector_lock); 4063 return (vector); 4064 } 4065 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4066 oldvector = vector; 4067 else { 4068 /* 4069 * The incoming vector is new . See if a stale entry is 4070 * remaining 4071 */ 4072 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4073 newvector = vector; 4074 } 4075 4076 if (oldvector) { 4077 apic_revector_pending--; 4078 apic_oldvec_to_newvec[oldvector] = 0; 4079 apic_newvec_to_oldvec[newvector] = 0; 4080 apic_free_vector(oldvector); 4081 lock_clear(&apic_revector_lock); 4082 /* There could have been more than one reprogramming! */ 4083 return (apic_xlate_vector(newvector)); 4084 } 4085 lock_clear(&apic_revector_lock); 4086 return (vector); 4087 } 4088 4089 void 4090 apic_xlate_vector_free_timeout_handler(void *arg) 4091 { 4092 int iflag; 4093 uchar_t oldvector, newvector; 4094 4095 oldvector = (uchar_t)(uintptr_t)arg; 4096 iflag = intr_clear(); 4097 lock_set(&apic_revector_lock); 4098 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4099 apic_free_vector(oldvector); 4100 apic_oldvec_to_newvec[oldvector] = 0; 4101 apic_newvec_to_oldvec[newvector] = 0; 4102 apic_revector_pending--; 4103 } 4104 4105 lock_clear(&apic_revector_lock); 4106 intr_restore(iflag); 4107 } 4108 4109 4110 /* Mark vector as not being used by any irq */ 4111 static void 4112 apic_free_vector(uchar_t vector) 4113 { 4114 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4115 } 4116 4117 /* 4118 * compute the polarity, trigger mode and vector for programming into 4119 * the I/O apic and record in airq_rdt_entry. 4120 */ 4121 static void 4122 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4123 { 4124 int ioapicindex, bus_type, vector; 4125 short intr_index; 4126 uint_t level, po, io_po; 4127 struct apic_io_intr *iointrp; 4128 4129 intr_index = irqptr->airq_mps_intr_index; 4130 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4131 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4132 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4133 4134 if (intr_index == RESERVE_INDEX) { 4135 apic_error |= APIC_ERR_INVALID_INDEX; 4136 return; 4137 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4138 return; 4139 } 4140 4141 vector = irqptr->airq_vector; 4142 ioapicindex = irqptr->airq_ioapicindex; 4143 /* Assume edge triggered by default */ 4144 level = 0; 4145 /* Assume active high by default */ 4146 po = 0; 4147 4148 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4149 ASSERT(irq < 16); 4150 if (eisa_level_intr_mask & (1 << irq)) 4151 level = AV_LEVEL; 4152 if (intr_index == FREE_INDEX && apic_defconf == 0) 4153 apic_error |= APIC_ERR_INVALID_INDEX; 4154 } else if (intr_index == ACPI_INDEX) { 4155 bus_type = irqptr->airq_iflag.bustype; 4156 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4157 if (bus_type == BUS_PCI) 4158 level = AV_LEVEL; 4159 } else 4160 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4161 AV_LEVEL : 0; 4162 if (level && 4163 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4164 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4165 bus_type == BUS_PCI))) 4166 po = AV_ACTIVE_LOW; 4167 } else { 4168 iointrp = apic_io_intrp + intr_index; 4169 bus_type = apic_find_bus(iointrp->intr_busid); 4170 if (iointrp->intr_el == INTR_EL_CONFORM) { 4171 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4172 level = AV_LEVEL; 4173 else if (bus_type == BUS_PCI) 4174 level = AV_LEVEL; 4175 } else 4176 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4177 AV_LEVEL : 0; 4178 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4179 (iointrp->intr_po == INTR_PO_CONFORM && 4180 bus_type == BUS_PCI))) 4181 po = AV_ACTIVE_LOW; 4182 } 4183 if (level) 4184 apic_level_intr[irq] = 1; 4185 /* 4186 * The 82489DX External APIC cannot do active low polarity interrupts. 4187 */ 4188 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4189 io_po = po; 4190 else 4191 io_po = 0; 4192 4193 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4194 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4195 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4196 4197 irqptr->airq_rdt_entry = level|io_po|vector; 4198 } 4199 4200 /* 4201 * Call rebind to do the actual programming. 4202 */ 4203 static int 4204 apic_setup_io_intr(apic_irq_t *irqptr, int irq) 4205 { 4206 int rv; 4207 4208 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4209 IMMEDIATE)) 4210 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4211 rv = apic_rebind(irqptr, 0, 1, IMMEDIATE); 4212 4213 return (rv); 4214 } 4215 4216 /* 4217 * Deferred reprogramming: Call apic_rebind to do the real work. 4218 */ 4219 static int 4220 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq) 4221 { 4222 int rv; 4223 4224 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4225 DEFERRED)) 4226 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4227 rv = apic_rebind(irqptr, 0, 1, DEFERRED); 4228 4229 return (rv); 4230 } 4231 4232 /* 4233 * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock 4234 * if false (0) means lock is already held (e.g: in rebind_all). 4235 */ 4236 static int 4237 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when) 4238 { 4239 int intin_no; 4240 volatile int32_t *ioapic; 4241 uchar_t airq_temp_cpu; 4242 apic_cpus_info_t *cpu_infop; 4243 int iflag; 4244 int which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4245 4246 intin_no = irq_ptr->airq_intin_no; 4247 ioapic = apicioadr[irq_ptr->airq_ioapicindex]; 4248 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4249 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4250 if (airq_temp_cpu & IRQ_USER_BOUND) 4251 /* Mask off high bit so it can be used as array index */ 4252 airq_temp_cpu &= ~IRQ_USER_BOUND; 4253 4254 ASSERT(airq_temp_cpu < apic_nproc); 4255 } 4256 4257 iflag = intr_clear(); 4258 4259 if (acquire_lock) 4260 lock_set(&apic_ioapic_lock); 4261 4262 /* 4263 * Can't bind to a CPU that's not online: 4264 */ 4265 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4266 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) { 4267 4268 if (acquire_lock) 4269 lock_clear(&apic_ioapic_lock); 4270 4271 intr_restore(iflag); 4272 return (1); 4273 } 4274 4275 /* 4276 * If this is a deferred reprogramming attempt, ensure we have 4277 * not been passed stale data: 4278 */ 4279 if ((when == DEFERRED) && 4280 (apic_reprogram_info[which_irq].valid == 0)) { 4281 /* stale info, so just return */ 4282 if (acquire_lock) 4283 lock_clear(&apic_ioapic_lock); 4284 4285 intr_restore(iflag); 4286 return (0); 4287 } 4288 4289 /* 4290 * If this interrupt has been delivered to a CPU and that CPU 4291 * has not handled it yet, we cannot reprogram the IOAPIC now: 4292 */ 4293 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) && 4294 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu, 4295 ioapic, intin_no, which_irq) != 0) { 4296 4297 if (acquire_lock) 4298 lock_clear(&apic_ioapic_lock); 4299 4300 intr_restore(iflag); 4301 return (0); 4302 } 4303 4304 /* 4305 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4306 * come in before we have a chance to reprogram it below. The 4307 * reprogramming below will simultaneously change and unmask the 4308 * RDT entry. 4309 */ 4310 4311 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4312 /* Write the RDT entry -- no specific CPU binding */ 4313 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4314 4315 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4316 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4317 4318 /* Write the vector, trigger, and polarity portion of the RDT */ 4319 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4320 AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry); 4321 if (acquire_lock) 4322 lock_clear(&apic_ioapic_lock); 4323 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4324 intr_restore(iflag); 4325 return (0); 4326 } 4327 4328 if (bind_cpu & IRQ_USER_BOUND) { 4329 cpu_infop->aci_bound++; 4330 } else { 4331 cpu_infop->aci_temp_bound++; 4332 } 4333 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4334 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4335 /* Write the RDT entry -- bind to a specific CPU: */ 4336 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4337 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4338 } 4339 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4340 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4341 } 4342 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4343 /* Write the vector, trigger, and polarity portion of the RDT */ 4344 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4345 AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry); 4346 } else { 4347 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4348 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 4349 (void) pci_msi_disable_mode(irq_ptr->airq_dip, type, 4350 irq_ptr->airq_ioapicindex); 4351 if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) { 4352 /* first one */ 4353 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4354 "apic_pci_msi_enable_vector\n")); 4355 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type, 4356 which_irq, irq_ptr->airq_vector, 4357 irq_ptr->airq_intin_no, 4358 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4359 cmn_err(CE_WARN, "pcplusmp: " 4360 "apic_pci_msi_enable_vector " 4361 "returned PSM_FAILURE"); 4362 } 4363 } 4364 if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) == 4365 irq_ptr->airq_origirq) { /* last one */ 4366 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4367 "pci_msi_enable_mode\n")); 4368 if (pci_msi_enable_mode(irq_ptr->airq_dip, type, 4369 which_irq) != DDI_SUCCESS) { 4370 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4371 "pci_msi_enable failed\n")); 4372 (void) pci_msi_unconfigure(irq_ptr->airq_dip, 4373 (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4374 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX, 4375 which_irq); 4376 } 4377 } 4378 } 4379 if (acquire_lock) 4380 lock_clear(&apic_ioapic_lock); 4381 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4382 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4383 intr_restore(iflag); 4384 return (0); 4385 } 4386 4387 /* 4388 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4389 * bit set. Sets up a timeout to perform the reprogramming at a later time 4390 * if it cannot wait for the Remote IRR bit to clear (or if waiting did not 4391 * result in the bit's clearing). 4392 * 4393 * This function will mask the RDT entry if the Remote IRR bit is set. 4394 * 4395 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4396 */ 4397 static int 4398 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4399 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq) 4400 { 4401 int32_t rdt_entry; 4402 int waited; 4403 4404 /* Mask the RDT entry, but only if it's a level-triggered interrupt */ 4405 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4406 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4407 4408 /* Mask it */ 4409 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4410 AV_MASK | rdt_entry); 4411 } 4412 4413 /* 4414 * Wait for the delivery pending bit to clear. 4415 */ 4416 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4417 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4418 4419 /* 4420 * If we're still waiting on the delivery of this interrupt, 4421 * continue to wait here until it is delivered (this should be 4422 * a very small amount of time, but include a timeout just in 4423 * case). 4424 */ 4425 for (waited = 0; waited < apic_max_usecs_clear_pending; 4426 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4427 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4428 & AV_PENDING) == 0) { 4429 break; 4430 } 4431 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4432 } 4433 4434 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4435 AV_PENDING) != 0) { 4436 cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not " 4437 "deliver interrupt to local APIC within " 4438 "%d usecs.", irq_ptr->airq_ioapicindex, 4439 irq_ptr->airq_intin_no, 4440 apic_max_usecs_clear_pending); 4441 } 4442 } 4443 4444 /* 4445 * If the remote IRR bit is set, then the interrupt has been sent 4446 * to a CPU for processing. We have no choice but to wait for 4447 * that CPU to process the interrupt, at which point the remote IRR 4448 * bit will be cleared. 4449 */ 4450 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4451 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4452 4453 /* 4454 * If the CPU that this RDT is bound to is NOT the current 4455 * CPU, wait until that CPU handles the interrupt and ACKs 4456 * it. If this interrupt is not bound to any CPU (that is, 4457 * if it's bound to the logical destination of "anyone"), it 4458 * may have been delivered to the current CPU so handle that 4459 * case by deferring the reprogramming (below). 4460 */ 4461 kpreempt_disable(); 4462 if ((old_bind_cpu != IRQ_UNBOUND) && 4463 (old_bind_cpu != IRQ_UNINIT) && 4464 (old_bind_cpu != psm_get_cpu_id())) { 4465 for (waited = 0; waited < apic_max_usecs_clear_pending; 4466 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4467 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4468 intin_no) & AV_REMOTE_IRR) == 0) { 4469 4470 /* Clear the reprogramming state: */ 4471 lock_set(&apic_ioapic_reprogram_lock); 4472 4473 apic_reprogram_info[which_irq].valid 4474 = 0; 4475 apic_reprogram_info[which_irq].bindcpu 4476 = 0; 4477 apic_reprogram_info[which_irq].timeouts 4478 = 0; 4479 4480 lock_clear(&apic_ioapic_reprogram_lock); 4481 4482 /* Remote IRR has cleared! */ 4483 kpreempt_enable(); 4484 return (0); 4485 } 4486 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4487 } 4488 } 4489 kpreempt_enable(); 4490 4491 /* 4492 * If we waited and the Remote IRR bit is still not cleared, 4493 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4494 * times for this interrupt, try the last-ditch workarounds: 4495 */ 4496 if (apic_reprogram_info[which_irq].timeouts >= 4497 APIC_REPROGRAM_MAX_TIMEOUTS) { 4498 4499 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4500 & AV_REMOTE_IRR) != 0) { 4501 /* 4502 * Trying to clear the bit through normal 4503 * channels has failed. So as a last-ditch 4504 * effort, try to set the trigger mode to 4505 * edge, then to level. This has been 4506 * observed to work on many systems. 4507 */ 4508 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4509 intin_no, 4510 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4511 intin_no) & ~AV_LEVEL); 4512 4513 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4514 intin_no, 4515 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4516 intin_no) | AV_LEVEL); 4517 4518 /* 4519 * If the bit's STILL set, declare total and 4520 * utter failure 4521 */ 4522 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4523 intin_no) & AV_REMOTE_IRR) != 0) { 4524 cmn_err(CE_WARN, "!IOAPIC %d intin %d: " 4525 "Remote IRR failed to reset " 4526 "within %d usecs. Interrupts to " 4527 "this pin may cease to function.", 4528 irq_ptr->airq_ioapicindex, 4529 irq_ptr->airq_intin_no, 4530 apic_max_usecs_clear_pending); 4531 } 4532 } 4533 /* Clear the reprogramming state: */ 4534 lock_set(&apic_ioapic_reprogram_lock); 4535 4536 apic_reprogram_info[which_irq].valid = 0; 4537 apic_reprogram_info[which_irq].bindcpu = 0; 4538 apic_reprogram_info[which_irq].timeouts = 0; 4539 4540 lock_clear(&apic_ioapic_reprogram_lock); 4541 } else { 4542 #ifdef DEBUG 4543 cmn_err(CE_WARN, "Deferring reprogramming of irq %d", 4544 which_irq); 4545 #endif /* DEBUG */ 4546 /* 4547 * If waiting for the Remote IRR bit (above) didn't 4548 * allow it to clear, defer the reprogramming: 4549 */ 4550 lock_set(&apic_ioapic_reprogram_lock); 4551 4552 apic_reprogram_info[which_irq].valid = 1; 4553 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4554 apic_reprogram_info[which_irq].timeouts++; 4555 4556 lock_clear(&apic_ioapic_reprogram_lock); 4557 4558 /* Fire up a timeout to handle this later */ 4559 (void) timeout(apic_reprogram_timeout_handler, 4560 (void *) 0, 4561 drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY)); 4562 4563 /* Inform caller to defer IOAPIC programming: */ 4564 return (1); 4565 } 4566 } 4567 return (0); 4568 } 4569 4570 /* 4571 * Timeout handler that performs the APIC reprogramming 4572 */ 4573 /*ARGSUSED*/ 4574 static void 4575 apic_reprogram_timeout_handler(void *arg) 4576 { 4577 /*LINTED: set but not used in function*/ 4578 int i, result; 4579 4580 /* Serialize access to this function */ 4581 mutex_enter(&apic_reprogram_timeout_mutex); 4582 4583 /* 4584 * For each entry in the reprogramming state that's valid, 4585 * try the reprogramming again: 4586 */ 4587 for (i = 0; i < APIC_MAX_VECTOR; i++) { 4588 if (apic_reprogram_info[i].valid == 0) 4589 continue; 4590 /* 4591 * Though we can't really do anything about errors 4592 * at this point, keep track of them for reporting. 4593 * Note that it is very possible for apic_setup_io_intr 4594 * to re-register this very timeout if the Remote IRR bit 4595 * has not yet cleared. 4596 */ 4597 result = apic_setup_io_intr_deferred(apic_irq_table[i], i); 4598 4599 #ifdef DEBUG 4600 if (result) 4601 cmn_err(CE_WARN, "apic_reprogram_timeout: " 4602 "apic_setup_io_intr returned nonzero for " 4603 "irq=%d!", i); 4604 #endif /* DEBUG */ 4605 } 4606 4607 mutex_exit(&apic_reprogram_timeout_mutex); 4608 } 4609 4610 4611 /* 4612 * Called to migrate all interrupts at an irq to another cpu. safe 4613 * if true means we are not being called from an interrupt 4614 * context and hence it is safe to do a lock_set. If false 4615 * do only a lock_try and return failure ( non 0 ) if we cannot get it 4616 */ 4617 int 4618 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe) 4619 { 4620 apic_irq_t *irqptr = irq_ptr; 4621 int retval = 0; 4622 int iflag; 4623 4624 iflag = intr_clear(); 4625 if (!safe) { 4626 if (lock_try(&apic_ioapic_lock) == 0) { 4627 intr_restore(iflag); 4628 return (1); 4629 } 4630 } else 4631 lock_set(&apic_ioapic_lock); 4632 4633 while (irqptr) { 4634 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4635 retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE); 4636 irqptr = irqptr->airq_next; 4637 } 4638 lock_clear(&apic_ioapic_lock); 4639 intr_restore(iflag); 4640 return (retval); 4641 } 4642 4643 /* 4644 * apic_intr_redistribute does all the messy computations for identifying 4645 * which interrupt to move to which CPU. Currently we do just one interrupt 4646 * at a time. This reduces the time we spent doing all this within clock 4647 * interrupt. When it is done in idle, we could do more than 1. 4648 * First we find the most busy and the most free CPU (time in ISR only) 4649 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4650 * Then we look for IRQs which are closest to the difference between the 4651 * most busy CPU and the average ISR load. We try to find one whose load 4652 * is less than difference.If none exists, then we chose one larger than the 4653 * difference, provided it does not make the most idle CPU worse than the 4654 * most busy one. In the end, we clear all the busy fields for CPUs. For 4655 * IRQs, they are cleared as they are scanned. 4656 */ 4657 static void 4658 apic_intr_redistribute() 4659 { 4660 int busiest_cpu, most_free_cpu; 4661 int cpu_free, cpu_busy, max_busy, min_busy; 4662 int min_free, diff; 4663 int average_busy, cpus_online; 4664 int i, busy; 4665 apic_cpus_info_t *cpu_infop; 4666 apic_irq_t *min_busy_irq = NULL; 4667 apic_irq_t *max_busy_irq = NULL; 4668 4669 busiest_cpu = most_free_cpu = -1; 4670 cpu_free = cpu_busy = max_busy = average_busy = 0; 4671 min_free = apic_sample_factor_redistribution; 4672 cpus_online = 0; 4673 /* 4674 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4675 * without ioapic_lock. That is OK as we are just doing statistical 4676 * sampling anyway and any inaccuracy now will get corrected next time 4677 * The call to rebind which actually changes things will make sure 4678 * we are consistent. 4679 */ 4680 for (i = 0; i < apic_nproc; i++) { 4681 if (!(apic_redist_cpu_skip & (1 << i)) && 4682 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4683 4684 cpu_infop = &apic_cpus[i]; 4685 /* 4686 * If no unbound interrupts or only 1 total on this 4687 * CPU, skip 4688 */ 4689 if (!cpu_infop->aci_temp_bound || 4690 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4691 == 1) { 4692 apic_redist_cpu_skip |= 1 << i; 4693 continue; 4694 } 4695 4696 busy = cpu_infop->aci_busy; 4697 average_busy += busy; 4698 cpus_online++; 4699 if (max_busy < busy) { 4700 max_busy = busy; 4701 busiest_cpu = i; 4702 } 4703 if (min_free > busy) { 4704 min_free = busy; 4705 most_free_cpu = i; 4706 } 4707 if (busy > apic_int_busy_mark) { 4708 cpu_busy |= 1 << i; 4709 } else { 4710 if (busy < apic_int_free_mark) 4711 cpu_free |= 1 << i; 4712 } 4713 } 4714 } 4715 if ((cpu_busy && cpu_free) || 4716 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4717 4718 apic_num_imbalance++; 4719 #ifdef DEBUG 4720 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4721 prom_printf( 4722 "redistribute busy=%x free=%x max=%x min=%x", 4723 cpu_busy, cpu_free, max_busy, min_free); 4724 } 4725 #endif /* DEBUG */ 4726 4727 4728 average_busy /= cpus_online; 4729 4730 diff = max_busy - average_busy; 4731 min_busy = max_busy; /* start with the max possible value */ 4732 max_busy = 0; 4733 min_busy_irq = max_busy_irq = NULL; 4734 i = apic_min_device_irq; 4735 for (; i < apic_max_device_irq; i++) { 4736 apic_irq_t *irq_ptr; 4737 /* Change to linked list per CPU ? */ 4738 if ((irq_ptr = apic_irq_table[i]) == NULL) 4739 continue; 4740 /* Check for irq_busy & decide which one to move */ 4741 /* Also zero them for next round */ 4742 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4743 irq_ptr->airq_busy) { 4744 if (irq_ptr->airq_busy < diff) { 4745 /* 4746 * Check for least busy CPU, 4747 * best fit or what ? 4748 */ 4749 if (max_busy < irq_ptr->airq_busy) { 4750 /* 4751 * Most busy within the 4752 * required differential 4753 */ 4754 max_busy = irq_ptr->airq_busy; 4755 max_busy_irq = irq_ptr; 4756 } 4757 } else { 4758 if (min_busy > irq_ptr->airq_busy) { 4759 /* 4760 * least busy, but more than 4761 * the reqd diff 4762 */ 4763 if (min_busy < 4764 (diff + average_busy - 4765 min_free)) { 4766 /* 4767 * Making sure new cpu 4768 * will not end up 4769 * worse 4770 */ 4771 min_busy = 4772 irq_ptr->airq_busy; 4773 4774 min_busy_irq = irq_ptr; 4775 } 4776 } 4777 } 4778 } 4779 irq_ptr->airq_busy = 0; 4780 } 4781 4782 if (max_busy_irq != NULL) { 4783 #ifdef DEBUG 4784 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4785 prom_printf("rebinding %x to %x", 4786 max_busy_irq->airq_vector, most_free_cpu); 4787 } 4788 #endif /* DEBUG */ 4789 if (apic_rebind_all(max_busy_irq, most_free_cpu, 0) 4790 == 0) 4791 /* Make change permenant */ 4792 max_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4793 } else if (min_busy_irq != NULL) { 4794 #ifdef DEBUG 4795 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4796 prom_printf("rebinding %x to %x", 4797 min_busy_irq->airq_vector, most_free_cpu); 4798 } 4799 #endif /* DEBUG */ 4800 4801 if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) == 4802 0) 4803 /* Make change permenant */ 4804 min_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4805 } else { 4806 if (cpu_busy != (1 << busiest_cpu)) { 4807 apic_redist_cpu_skip |= 1 << busiest_cpu; 4808 /* 4809 * We leave cpu_skip set so that next time we 4810 * can choose another cpu 4811 */ 4812 } 4813 } 4814 apic_num_rebind++; 4815 } else { 4816 /* 4817 * found nothing. Could be that we skipped over valid CPUs 4818 * or we have balanced everything. If we had a variable 4819 * ticks_for_redistribution, it could be increased here. 4820 * apic_int_busy, int_free etc would also need to be 4821 * changed. 4822 */ 4823 if (apic_redist_cpu_skip) 4824 apic_redist_cpu_skip = 0; 4825 } 4826 for (i = 0; i < apic_nproc; i++) { 4827 apic_cpus[i].aci_busy = 0; 4828 } 4829 } 4830 4831 static void 4832 apic_cleanup_busy() 4833 { 4834 int i; 4835 apic_irq_t *irq_ptr; 4836 4837 for (i = 0; i < apic_nproc; i++) { 4838 apic_cpus[i].aci_busy = 0; 4839 } 4840 4841 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 4842 if ((irq_ptr = apic_irq_table[i]) != NULL) 4843 irq_ptr->airq_busy = 0; 4844 } 4845 apic_skipped_redistribute = 0; 4846 } 4847 4848 4849 /* 4850 * This function will reprogram the timer. 4851 * 4852 * When in oneshot mode the argument is the absolute time in future to 4853 * generate the interrupt at. 4854 * 4855 * When in periodic mode, the argument is the interval at which the 4856 * interrupts should be generated. There is no need to support the periodic 4857 * mode timer change at this time. 4858 */ 4859 static void 4860 apic_timer_reprogram(hrtime_t time) 4861 { 4862 hrtime_t now; 4863 uint_t ticks; 4864 4865 /* 4866 * We should be called from high PIL context (CBE_HIGH_PIL), 4867 * so kpreempt is disabled. 4868 */ 4869 4870 if (!apic_oneshot) { 4871 /* time is the interval for periodic mode */ 4872 ticks = (uint_t)((time) / apic_nsec_per_tick); 4873 } else { 4874 /* one shot mode */ 4875 4876 now = gethrtime(); 4877 4878 if (time <= now) { 4879 /* 4880 * requested to generate an interrupt in the past 4881 * generate an interrupt as soon as possible 4882 */ 4883 ticks = apic_min_timer_ticks; 4884 } else if ((time - now) > apic_nsec_max) { 4885 /* 4886 * requested to generate an interrupt at a time 4887 * further than what we are capable of. Set to max 4888 * the hardware can handle 4889 */ 4890 4891 ticks = APIC_MAXVAL; 4892 #ifdef DEBUG 4893 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 4894 " %lld too far in future, current time" 4895 " %lld \n", time, now); 4896 #endif /* DEBUG */ 4897 } else 4898 ticks = (uint_t)((time - now) / apic_nsec_per_tick); 4899 } 4900 4901 if (ticks < apic_min_timer_ticks) 4902 ticks = apic_min_timer_ticks; 4903 4904 apicadr[APIC_INIT_COUNT] = ticks; 4905 4906 } 4907 4908 /* 4909 * This function will enable timer interrupts. 4910 */ 4911 static void 4912 apic_timer_enable(void) 4913 { 4914 /* 4915 * We should be Called from high PIL context (CBE_HIGH_PIL), 4916 * so kpreempt is disabled. 4917 */ 4918 4919 if (!apic_oneshot) 4920 apicadr[APIC_LOCAL_TIMER] = 4921 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 4922 else { 4923 /* one shot */ 4924 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 4925 } 4926 } 4927 4928 /* 4929 * This function will disable timer interrupts. 4930 */ 4931 static void 4932 apic_timer_disable(void) 4933 { 4934 /* 4935 * We should be Called from high PIL context (CBE_HIGH_PIL), 4936 * so kpreempt is disabled. 4937 */ 4938 4939 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 4940 } 4941 4942 4943 cyclic_id_t apic_cyclic_id; 4944 4945 /* 4946 * If this module needs to be a consumer of cyclic subsystem, they 4947 * can be added here, since at this time kernel cyclic subsystem is initialized 4948 * argument is not currently used, and is reserved for future. 4949 */ 4950 static void 4951 apic_post_cyclic_setup(void *arg) 4952 { 4953 _NOTE(ARGUNUSED(arg)) 4954 cyc_handler_t hdlr; 4955 cyc_time_t when; 4956 4957 /* cpu_lock is held */ 4958 4959 /* set up cyclics for intr redistribution */ 4960 4961 /* 4962 * In peridoc mode intr redistribution processing is done in 4963 * apic_intr_enter during clk intr processing 4964 */ 4965 if (!apic_oneshot) 4966 return; 4967 4968 hdlr.cyh_level = CY_LOW_LEVEL; 4969 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 4970 hdlr.cyh_arg = NULL; 4971 4972 when.cyt_when = 0; 4973 when.cyt_interval = apic_redistribute_sample_interval; 4974 apic_cyclic_id = cyclic_add(&hdlr, &when); 4975 4976 4977 } 4978 4979 static void 4980 apic_redistribute_compute(void) 4981 { 4982 int i, j, max_busy; 4983 4984 if (apic_enable_dynamic_migration) { 4985 if (++apic_nticks == apic_sample_factor_redistribution) { 4986 /* 4987 * Time to call apic_intr_redistribute(). 4988 * reset apic_nticks. This will cause max_busy 4989 * to be calculated below and if it is more than 4990 * apic_int_busy, we will do the whole thing 4991 */ 4992 apic_nticks = 0; 4993 } 4994 max_busy = 0; 4995 for (i = 0; i < apic_nproc; i++) { 4996 4997 /* 4998 * Check if curipl is non zero & if ISR is in 4999 * progress 5000 */ 5001 if (((j = apic_cpus[i].aci_curipl) != 0) && 5002 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 5003 5004 int irq; 5005 apic_cpus[i].aci_busy++; 5006 irq = apic_cpus[i].aci_current[j]; 5007 apic_irq_table[irq]->airq_busy++; 5008 } 5009 5010 if (!apic_nticks && 5011 (apic_cpus[i].aci_busy > max_busy)) 5012 max_busy = apic_cpus[i].aci_busy; 5013 } 5014 if (!apic_nticks) { 5015 if (max_busy > apic_int_busy_mark) { 5016 /* 5017 * We could make the following check be 5018 * skipped > 1 in which case, we get a 5019 * redistribution at half the busy mark (due to 5020 * double interval). Need to be able to collect 5021 * more empirical data to decide if that is a 5022 * good strategy. Punt for now. 5023 */ 5024 if (apic_skipped_redistribute) 5025 apic_cleanup_busy(); 5026 else 5027 apic_intr_redistribute(); 5028 } else 5029 apic_skipped_redistribute++; 5030 } 5031 } 5032 } 5033 5034 5035 static int 5036 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5037 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5038 { 5039 5040 int status; 5041 acpi_psm_lnk_t acpipsmlnk; 5042 5043 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5044 intr_flagp)) == ACPI_PSM_SUCCESS) { 5045 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5046 "from cache for device %s, instance #%d\n", *pci_irqp, 5047 ddi_get_name(dip), ddi_get_instance(dip))); 5048 return (status); 5049 } 5050 5051 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5052 5053 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5054 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5055 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5056 " acpi_translate_pci_irq failed for device %s, instance" 5057 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5058 return (status); 5059 } 5060 5061 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5062 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5063 intr_flagp); 5064 if (status != ACPI_PSM_SUCCESS) { 5065 status = acpi_get_current_irq_resource(&acpipsmlnk, 5066 pci_irqp, intr_flagp); 5067 } 5068 } 5069 5070 if (status == ACPI_PSM_SUCCESS) { 5071 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5072 intr_flagp, &acpipsmlnk); 5073 5074 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5075 "new irq %d for device %s, instance #%d\n", 5076 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5077 } 5078 5079 return (status); 5080 } 5081 5082 /* 5083 * Configures the irq for the interrupt link device identified by 5084 * acpipsmlnkp. 5085 * 5086 * Gets the current and the list of possible irq settings for the 5087 * device. If apic_unconditional_srs is not set, and the current 5088 * resource setting is in the list of possible irq settings, 5089 * current irq resource setting is passed to the caller. 5090 * 5091 * Otherwise, picks an irq number from the list of possible irq 5092 * settings, and sets the irq of the device to this value. 5093 * If prefer_crs is set, among a set of irq numbers in the list that have 5094 * the least number of devices sharing the interrupt, we pick current irq 5095 * resource setting if it is a member of this set. 5096 * 5097 * Passes the irq number in the value pointed to by pci_irqp, and 5098 * polarity and sensitivity in the structure pointed to by dipintrflagp 5099 * to the caller. 5100 * 5101 * Note that if setting the irq resource failed, but successfuly obtained 5102 * the current irq resource settings, passes the current irq resources 5103 * and considers it a success. 5104 * 5105 * Returns: 5106 * ACPI_PSM_SUCCESS on success. 5107 * 5108 * ACPI_PSM_FAILURE if an error occured during the configuration or 5109 * if a suitable irq was not found for this device, or if setting the 5110 * irq resource and obtaining the current resource fails. 5111 * 5112 */ 5113 static int 5114 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5115 int *pci_irqp, iflag_t *dipintr_flagp) 5116 { 5117 5118 int i, min_share, foundnow, done = 0; 5119 int32_t irq; 5120 int32_t share_irq = -1; 5121 int32_t chosen_irq = -1; 5122 int cur_irq = -1; 5123 acpi_irqlist_t *irqlistp; 5124 acpi_irqlist_t *irqlistent; 5125 5126 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5127 == ACPI_PSM_FAILURE) { 5128 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5129 "or assign IRQ for device %s, instance #%d: The system was " 5130 "unable to get the list of potential IRQs from ACPI.", 5131 ddi_get_name(dip), ddi_get_instance(dip))); 5132 5133 return (ACPI_PSM_FAILURE); 5134 } 5135 5136 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5137 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5138 (cur_irq > 0)) { 5139 /* 5140 * If an IRQ is set in CRS and that IRQ exists in the set 5141 * returned from _PRS, return that IRQ, otherwise print 5142 * a warning 5143 */ 5144 5145 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5146 == ACPI_PSM_SUCCESS) { 5147 5148 acpi_free_irqlist(irqlistp); 5149 ASSERT(pci_irqp != NULL); 5150 *pci_irqp = cur_irq; 5151 return (ACPI_PSM_SUCCESS); 5152 } 5153 5154 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5155 "current irq %d for device %s, instance #%d in ACPI's " 5156 "list of possible irqs for this device. Picking one from " 5157 " the latter list.", cur_irq, ddi_get_name(dip), 5158 ddi_get_instance(dip))); 5159 } 5160 5161 irqlistent = irqlistp; 5162 min_share = 255; 5163 5164 while (irqlistent != NULL) { 5165 irqlistent->intr_flags.bustype = BUS_PCI; 5166 5167 for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) { 5168 5169 irq = irqlistent->irqs[i]; 5170 5171 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5172 continue; 5173 5174 if (irq == 0) { 5175 /* invalid irq number */ 5176 continue; 5177 } 5178 5179 if ((apic_irq_table[irq] == NULL) || 5180 (apic_irq_table[irq]->airq_dip == dip)) { 5181 chosen_irq = irq; 5182 foundnow = 1; 5183 /* 5184 * If we do not prefer current irq from crs 5185 * or if we do and this irq is the same as 5186 * current irq from crs, this is the one 5187 * to pick. 5188 */ 5189 if (!(apic_prefer_crs) || (irq == cur_irq)) { 5190 done = 1; 5191 break; 5192 } 5193 continue; 5194 } 5195 5196 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5197 continue; 5198 5199 if (!acpi_intr_compatible(irqlistent->intr_flags, 5200 apic_irq_table[irq]->airq_iflag)) 5201 continue; 5202 5203 if ((apic_irq_table[irq]->airq_share < min_share) || 5204 ((apic_irq_table[irq]->airq_share == min_share) && 5205 (cur_irq == irq) && (apic_prefer_crs))) { 5206 min_share = apic_irq_table[irq]->airq_share; 5207 share_irq = irq; 5208 foundnow = 1; 5209 } 5210 } 5211 5212 /* 5213 * If we found an IRQ in the inner loop this time, save the 5214 * details from the irqlist for later use. 5215 */ 5216 if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) { 5217 /* 5218 * Copy the acpi_prs_private_t and flags from this 5219 * irq list entry, since we found an irq from this 5220 * entry. 5221 */ 5222 acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv; 5223 *dipintr_flagp = irqlistent->intr_flags; 5224 } 5225 5226 if (done) 5227 break; 5228 5229 /* Go to the next irqlist entry */ 5230 irqlistent = irqlistent->next; 5231 } 5232 5233 5234 acpi_free_irqlist(irqlistp); 5235 if (chosen_irq != -1) 5236 irq = chosen_irq; 5237 else if (share_irq != -1) 5238 irq = share_irq; 5239 else { 5240 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5241 "suitable irq from the list of possible irqs for device " 5242 "%s, instance #%d in ACPI's list of possible irqs", 5243 ddi_get_name(dip), ddi_get_instance(dip))); 5244 return (ACPI_PSM_FAILURE); 5245 } 5246 5247 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s " 5248 "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip))); 5249 5250 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) { 5251 /* 5252 * setting irq was successful, check to make sure CRS 5253 * reflects that. If CRS does not agree with what we 5254 * set, return the irq that was set. 5255 */ 5256 5257 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5258 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5259 5260 if (cur_irq != irq) 5261 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: " 5262 "IRQ resource set (irqno %d) for device %s " 5263 "instance #%d, differs from current " 5264 "setting irqno %d", 5265 irq, ddi_get_name(dip), 5266 ddi_get_instance(dip), cur_irq)); 5267 } 5268 5269 /* 5270 * return the irq that was set, and not what CRS reports, 5271 * since CRS has been seen to be bogus on some systems 5272 */ 5273 cur_irq = irq; 5274 } else { 5275 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d " 5276 "failed for device %s instance #%d", 5277 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5278 5279 if (cur_irq == -1) 5280 return (ACPI_PSM_FAILURE); 5281 } 5282 5283 ASSERT(pci_irqp != NULL); 5284 *pci_irqp = cur_irq; 5285 return (ACPI_PSM_SUCCESS); 5286 } 5287