1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 30 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 31 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 32 * PSMI 1.5 extensions are supported in Solaris Nevada. 33 */ 34 #define PSMI_1_5 35 36 #include <sys/processor.h> 37 #include <sys/time.h> 38 #include <sys/psm.h> 39 #include <sys/smp_impldefs.h> 40 #include <sys/cram.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/psm_common.h> 44 #include "apic.h" 45 #include <sys/pit.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/ddi_impldefs.h> 49 #include <sys/pci.h> 50 #include <sys/promif.h> 51 #include <sys/x86_archext.h> 52 #include <sys/cpc_impl.h> 53 #include <sys/uadmin.h> 54 #include <sys/panic.h> 55 #include <sys/debug.h> 56 #include <sys/archsystm.h> 57 #include <sys/trap.h> 58 #include <sys/machsystm.h> 59 #include <sys/cpuvar.h> 60 #include <sys/rm_platter.h> 61 #include <sys/privregs.h> 62 #include <sys/cyclic.h> 63 #include <sys/note.h> 64 #include <sys/pci_intr_lib.h> 65 66 /* 67 * Local Function Prototypes 68 */ 69 static void apic_init_intr(); 70 static void apic_ret(); 71 static int apic_handle_defconf(); 72 static int apic_parse_mpct(caddr_t mpct, int bypass); 73 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 74 static int apic_checksum(caddr_t bptr, int len); 75 static int get_apic_cmd1(); 76 static int get_apic_pri(); 77 static int apic_find_bus_type(char *bus); 78 static int apic_find_bus(int busid); 79 static int apic_find_bus_id(int bustype); 80 static struct apic_io_intr *apic_find_io_intr(int irqno); 81 int apic_allocate_irq(int irq); 82 static int apic_find_free_irq(int start, int end); 83 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 84 static void apic_modify_vector(uchar_t vector, int irq); 85 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 86 static uchar_t apic_xlate_vector(uchar_t oldvector); 87 static void apic_xlate_vector_free_timeout_handler(void *arg); 88 static void apic_free_vector(uchar_t vector); 89 static void apic_reprogram_timeout_handler(void *arg); 90 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 91 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq, 92 int iflag, boolean_t *restore_intrp); 93 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq); 94 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq); 95 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 96 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 97 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 98 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 99 int child_ipin, struct apic_io_intr **intrp); 100 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 101 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 102 int type); 103 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 104 iflag_t *intr_flagp); 105 static void apic_nmi_intr(caddr_t arg); 106 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 107 uchar_t intin); 108 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, 109 int when); 110 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe); 111 static void apic_intr_redistribute(); 112 static void apic_cleanup_busy(); 113 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 114 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 115 116 /* ACPI support routines */ 117 static int acpi_probe(void); 118 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 119 int *pci_irqp, iflag_t *intr_flagp); 120 121 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 122 int ipin, int *pci_irqp, iflag_t *intr_flagp); 123 static uchar_t acpi_find_ioapic(int irq); 124 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 125 126 /* 127 * standard MP entries 128 */ 129 static int apic_probe(); 130 static int apic_clkinit(); 131 static int apic_getclkirq(int ipl); 132 static uint_t apic_calibrate(volatile uint32_t *addr, 133 uint16_t *pit_ticks_adj); 134 static hrtime_t apic_gettime(); 135 static hrtime_t apic_gethrtime(); 136 static void apic_init(); 137 static void apic_picinit(void); 138 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 139 static int apic_post_cpu_start(void); 140 static void apic_send_ipi(int cpun, int ipl); 141 static void apic_set_softintr(int softintr); 142 static void apic_set_idlecpu(processorid_t cpun); 143 static void apic_unset_idlecpu(processorid_t cpun); 144 static int apic_softlvl_to_irq(int ipl); 145 static int apic_intr_enter(int ipl, int *vect); 146 static void apic_intr_exit(int ipl, int vect); 147 static void apic_setspl(int ipl); 148 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 149 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 150 static void apic_shutdown(int cmd, int fcn); 151 static void apic_preshutdown(int cmd, int fcn); 152 static int apic_disable_intr(processorid_t cpun); 153 static void apic_enable_intr(processorid_t cpun); 154 static processorid_t apic_get_next_processorid(processorid_t cpun); 155 static int apic_get_ipivect(int ipl, int type); 156 static void apic_timer_reprogram(hrtime_t time); 157 static void apic_timer_enable(void); 158 static void apic_timer_disable(void); 159 static void apic_post_cyclic_setup(void *arg); 160 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 161 psm_intr_op_t, int *); 162 163 static int apic_oneshot = 0; 164 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 165 166 /* 167 * These variables are frequently accessed in apic_intr_enter(), 168 * apic_intr_exit and apic_setspl, so group them together 169 */ 170 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 171 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 172 int apic_clkvect; 173 174 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 175 int apic_sci_vect = -1; 176 iflag_t apic_sci_flags; 177 178 /* vector at which error interrupts come in */ 179 int apic_errvect; 180 int apic_enable_error_intr = 1; 181 int apic_error_display_delay = 100; 182 183 /* vector at which performance counter overflow interrupts come in */ 184 int apic_cpcovf_vect; 185 int apic_enable_cpcovf_intr = 1; 186 187 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */ 188 static int apic_max_usecs_clear_pending = 1000; 189 190 /* Amt of usecs to wait before checking if RDT flags have reset. */ 191 #define APIC_USECS_PER_WAIT_INTERVAL 100 192 193 /* Maximum number of times to retry reprogramming via the timeout */ 194 #define APIC_REPROGRAM_MAX_TIMEOUTS 10 195 196 /* timeout delay for IOAPIC delayed reprogramming */ 197 #define APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */ 198 199 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */ 200 #define DEFERRED 1 201 #define IMMEDIATE 0 202 203 /* 204 * number of bits per byte, from <sys/param.h> 205 */ 206 #define UCHAR_MAX ((1 << NBBY) - 1) 207 208 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ + 1]; 209 210 /* 211 * The following vector assignments influence the value of ipltopri and 212 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 213 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 214 * we care to do so in future. Note some IPLs which are rarely used 215 * will share the vector ranges and heavily used IPLs (5 and 6) have 216 * a wide range. 217 * IPL Vector range. as passed to intr_enter 218 * 0 none. 219 * 1,2,3 0x20-0x2f 0x0-0xf 220 * 4 0x30-0x3f 0x10-0x1f 221 * 5 0x40-0x5f 0x20-0x3f 222 * 6 0x60-0x7f 0x40-0x5f 223 * 7,8,9 0x80-0x8f 0x60-0x6f 224 * 10 0x90-0x9f 0x70-0x7f 225 * 11 0xa0-0xaf 0x80-0x8f 226 * ... ... 227 * 16 0xf0-0xff 0xd0-0xdf 228 */ 229 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 230 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 231 }; 232 /* 233 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 234 * NOTE that this is vector as passed into intr_enter which is 235 * programmed vector - 0x20 (APIC_BASE_VECT) 236 */ 237 238 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 239 /* The taskpri to be programmed into apic to mask given ipl */ 240 241 #if defined(__amd64) 242 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 243 #endif 244 245 /* 246 * Patchable global variables. 247 */ 248 int apic_forceload = 0; 249 250 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 251 #define INTR_ROUND_ROBIN 1 252 #define INTR_LOWEST_PRIORITY 2 253 254 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 255 256 static int apic_next_bind_cpu = 1; /* For round robin assignment */ 257 /* start with cpu 1 */ 258 259 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 260 /* 1 - use gettime() for performance */ 261 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 262 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 263 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 264 int apic_panic_on_nmi = 0; 265 int apic_panic_on_apic_error = 0; 266 267 int apic_verbose = 0; 268 269 /* Flag definitions for apic_verbose */ 270 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 271 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 272 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 273 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 274 275 276 #define APIC_VERBOSE_IOAPIC(fmt) \ 277 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 278 cmn_err fmt; 279 280 #define APIC_VERBOSE_IRQ(fmt) \ 281 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 282 cmn_err fmt; 283 284 #define APIC_VERBOSE_POWEROFF(fmt) \ 285 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 286 prom_printf fmt; 287 288 289 /* Now the ones for Dynamic Interrupt distribution */ 290 int apic_enable_dynamic_migration = 0; 291 292 /* 293 * If enabled, the distribution works as follows: 294 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 295 * and the irq corresponding to the ipl is also set in the aci_current array. 296 * interrupt exit and setspl (due to soft interrupts) will cause the current 297 * ipl to be be changed. This is cache friendly as these frequently used 298 * paths write into a per cpu structure. 299 * 300 * Sampling is done by checking the structures for all CPUs and incrementing 301 * the busy field of the irq (if any) executing on each CPU and the busy field 302 * of the corresponding CPU. 303 * In periodic mode this is done on every clock interrupt. 304 * In one-shot mode, this is done thru a cyclic with an interval of 305 * apic_redistribute_sample_interval (default 10 milli sec). 306 * 307 * Every apic_sample_factor_redistribution times we sample, we do computations 308 * to decide which interrupt needs to be migrated (see comments 309 * before apic_intr_redistribute(). 310 */ 311 312 /* 313 * Following 3 variables start as % and can be patched or set using an 314 * API to be defined in future. They will be scaled to 315 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 316 * mode), or 101 in one-shot mode to stagger it away from one sec processing 317 */ 318 319 int apic_int_busy_mark = 60; 320 int apic_int_free_mark = 20; 321 int apic_diff_for_redistribution = 10; 322 323 /* sampling interval for interrupt redistribution for dynamic migration */ 324 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 325 326 /* 327 * number of times we sample before deciding to redistribute interrupts 328 * for dynamic migration 329 */ 330 int apic_sample_factor_redistribution = 101; 331 332 /* timeout for xlate_vector, mark_vector */ 333 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 334 335 int apic_redist_cpu_skip = 0; 336 int apic_num_imbalance = 0; 337 int apic_num_rebind = 0; 338 339 int apic_nproc = 0; 340 int apic_defconf = 0; 341 int apic_irq_translate = 0; 342 int apic_spec_rev = 0; 343 int apic_imcrp = 0; 344 345 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 346 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 347 348 /* 349 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 350 * will be assigned (via _SRS). If it is not set, use the current 351 * irq setting (via _CRS), but only if that irq is in the set of possible 352 * irqs (returned by _PRS) for the device. 353 */ 354 int apic_unconditional_srs = 1; 355 356 /* 357 * For interrupt link devices, if apic_prefer_crs is set when we are 358 * assigning an IRQ resource to a device, prefer the current IRQ setting 359 * over other possible irq settings under same conditions. 360 */ 361 362 int apic_prefer_crs = 1; 363 364 365 /* minimum number of timer ticks to program to */ 366 int apic_min_timer_ticks = 1; 367 /* 368 * Local static data 369 */ 370 static struct psm_ops apic_ops = { 371 apic_probe, 372 373 apic_init, 374 apic_picinit, 375 apic_intr_enter, 376 apic_intr_exit, 377 apic_setspl, 378 apic_addspl, 379 apic_delspl, 380 apic_disable_intr, 381 apic_enable_intr, 382 apic_softlvl_to_irq, 383 apic_set_softintr, 384 385 apic_set_idlecpu, 386 apic_unset_idlecpu, 387 388 apic_clkinit, 389 apic_getclkirq, 390 (void (*)(void))NULL, /* psm_hrtimeinit */ 391 apic_gethrtime, 392 393 apic_get_next_processorid, 394 apic_cpu_start, 395 apic_post_cpu_start, 396 apic_shutdown, 397 apic_get_ipivect, 398 apic_send_ipi, 399 400 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 401 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 402 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 403 (void (*)(int, char *))NULL, /* psm_notify_error */ 404 (void (*)(int))NULL, /* psm_notify_func */ 405 apic_timer_reprogram, 406 apic_timer_enable, 407 apic_timer_disable, 408 apic_post_cyclic_setup, 409 apic_preshutdown, 410 apic_intr_ops /* Advanced DDI Interrupt framework */ 411 }; 412 413 414 static struct psm_info apic_psm_info = { 415 PSM_INFO_VER01_5, /* version */ 416 PSM_OWN_EXCLUSIVE, /* ownership */ 417 (struct psm_ops *)&apic_ops, /* operation */ 418 "pcplusmp", /* machine name */ 419 "pcplusmp v1.4 compatible %I%", 420 }; 421 422 static void *apic_hdlp; 423 424 #ifdef DEBUG 425 #define DENT 0x0001 426 int apic_debug = 0; 427 /* 428 * set apic_restrict_vector to the # of vectors we want to allow per range 429 * useful in testing shared interrupt logic by setting it to 2 or 3 430 */ 431 int apic_restrict_vector = 0; 432 433 #define APIC_DEBUG_MSGBUFSIZE 2048 434 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 435 int apic_debug_msgbufindex = 0; 436 437 /* 438 * Put "int" info into debug buffer. No MP consistency, but light weight. 439 * Good enough for most debugging. 440 */ 441 #define APIC_DEBUG_BUF_PUT(x) \ 442 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 443 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 444 apic_debug_msgbufindex = 0; 445 446 #endif /* DEBUG */ 447 448 apic_cpus_info_t *apic_cpus; 449 450 static cpuset_t apic_cpumask; 451 static uint_t apic_flag; 452 453 /* Flag to indicate that we need to shut down all processors */ 454 static uint_t apic_shutdown_processors; 455 456 uint_t apic_nsec_per_intr = 0; 457 458 /* 459 * apic_let_idle_redistribute can have the following values: 460 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 461 * apic_redistribute_lock prevents multiple idle cpus from redistributing 462 */ 463 int apic_num_idle_redistributions = 0; 464 static int apic_let_idle_redistribute = 0; 465 static uint_t apic_nticks = 0; 466 static uint_t apic_skipped_redistribute = 0; 467 468 /* to gather intr data and redistribute */ 469 static void apic_redistribute_compute(void); 470 471 static uint_t last_count_read = 0; 472 static lock_t apic_gethrtime_lock; 473 volatile int apic_hrtime_stamp = 0; 474 volatile hrtime_t apic_nsec_since_boot = 0; 475 static uint_t apic_hertz_count, apic_nsec_per_tick; 476 static hrtime_t apic_nsec_max; 477 478 static hrtime_t apic_last_hrtime = 0; 479 int apic_hrtime_error = 0; 480 int apic_remote_hrterr = 0; 481 int apic_num_nmis = 0; 482 int apic_apic_error = 0; 483 int apic_num_apic_errors = 0; 484 int apic_num_cksum_errors = 0; 485 486 static uchar_t apic_io_id[MAX_IO_APIC]; 487 static uchar_t apic_io_ver[MAX_IO_APIC]; 488 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 489 static uchar_t apic_io_vectend[MAX_IO_APIC]; 490 volatile int32_t *apicioadr[MAX_IO_APIC]; 491 492 /* 493 * First available slot to be used as IRQ index into the apic_irq_table 494 * for those interrupts (like MSI/X) that don't have a physical IRQ. 495 */ 496 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 497 498 /* 499 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 500 * and bound elements of cpus_info and the temp_cpu element of irq_struct 501 */ 502 lock_t apic_ioapic_lock; 503 504 /* 505 * apic_ioapic_reprogram_lock prevents a CPU from exiting 506 * apic_intr_exit before IOAPIC reprogramming information 507 * is collected. 508 */ 509 static lock_t apic_ioapic_reprogram_lock; 510 static int apic_io_max = 0; /* no. of i/o apics enabled */ 511 512 static struct apic_io_intr *apic_io_intrp = 0; 513 static struct apic_bus *apic_busp; 514 515 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 516 static uchar_t apic_resv_vector[MAXIPL+1]; 517 518 static char apic_level_intr[APIC_MAX_VECTOR+1]; 519 static int apic_error = 0; 520 /* values which apic_error can take. Not catastrophic, but may help debug */ 521 #define APIC_ERR_BOOT_EOI 0x1 522 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 523 #define APIC_ERR_INVALID_INDEX 0x4 524 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 525 #define APIC_ERR_APIC_ERROR 0x40000000 526 #define APIC_ERR_NMI 0x80000000 527 528 static int apic_cmos_ssb_set = 0; 529 530 static uint32_t eisa_level_intr_mask = 0; 531 /* At least MSB will be set if EISA bus */ 532 533 static int apic_pci_bus_total = 0; 534 static uchar_t apic_single_pci_busid = 0; 535 536 537 /* 538 * airq_mutex protects additions to the apic_irq_table - the first 539 * pointer and any airq_nexts off of that one. It also protects 540 * apic_max_device_irq & apic_min_device_irq. It also guarantees 541 * that share_id is unique as new ids are generated only when new 542 * irq_t structs are linked in. Once linked in the structs are never 543 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 544 * or allocated. Note that there is a slight gap between allocating in 545 * apic_introp_xlate and programming in addspl. 546 */ 547 kmutex_t airq_mutex; 548 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 549 int apic_max_device_irq = 0; 550 int apic_min_device_irq = APIC_MAX_VECTOR; 551 552 /* use to make sure only one cpu handles the nmi */ 553 static lock_t apic_nmi_lock; 554 /* use to make sure only one cpu handles the error interrupt */ 555 static lock_t apic_error_lock; 556 557 /* 558 * Following declarations are for revectoring; used when ISRs at different 559 * IPLs share an irq. 560 */ 561 static lock_t apic_revector_lock; 562 static int apic_revector_pending = 0; 563 static uchar_t *apic_oldvec_to_newvec; 564 static uchar_t *apic_newvec_to_oldvec; 565 566 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */ 567 static kmutex_t apic_reprogram_timeout_mutex; 568 569 static struct ioapic_reprogram_data { 570 int valid; /* This entry is valid */ 571 int bindcpu; /* The CPU to which the int will be bound */ 572 unsigned timeouts; /* # times the reprogram timeout was called */ 573 } apic_reprogram_info[APIC_MAX_VECTOR+1]; 574 /* 575 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 576 * is indexed by IRQ number, NOT by vector number. 577 */ 578 579 580 /* 581 * The following added to identify a software poweroff method if available. 582 */ 583 584 static struct { 585 int poweroff_method; 586 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 587 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 588 } apic_mps_ids[] = { 589 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 590 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 591 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 592 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 593 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 594 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 595 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 596 }; 597 598 int apic_poweroff_method = APIC_POWEROFF_NONE; 599 600 static struct { 601 uchar_t cntl; 602 uchar_t data; 603 } aspen_bmc[] = { 604 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 605 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 606 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 607 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 608 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 609 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 610 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 611 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 612 613 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 614 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 615 }; 616 617 static struct { 618 int port; 619 uchar_t data; 620 } sitka_bmc[] = { 621 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 622 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 623 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 624 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 625 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 626 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 627 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 628 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 629 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 630 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 631 632 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 633 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 634 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 635 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 636 }; 637 638 639 /* Patchable global variables. */ 640 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 641 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 642 643 /* 644 * ACPI definitions 645 */ 646 /* _PIC method arguments */ 647 #define ACPI_PIC_MODE 0 648 #define ACPI_APIC_MODE 1 649 650 /* APIC error flags we care about */ 651 #define APIC_SEND_CS_ERROR 0x01 652 #define APIC_RECV_CS_ERROR 0x02 653 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 654 655 /* 656 * ACPI variables 657 */ 658 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 659 static int apic_enable_acpi = 0; 660 661 /* ACPI Multiple APIC Description Table ptr */ 662 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 663 664 /* ACPI Interrupt Source Override Structure ptr */ 665 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 666 static int acpi_iso_cnt = 0; 667 668 /* ACPI Non-maskable Interrupt Sources ptr */ 669 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 670 static int acpi_nmi_scnt = 0; 671 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 672 static int acpi_nmi_ccnt = 0; 673 674 /* 675 * extern declarations 676 */ 677 extern int intr_clear(void); 678 extern void intr_restore(uint_t); 679 #if defined(__amd64) 680 extern int intpri_use_cr8; 681 #endif /* __amd64 */ 682 683 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 684 int, int, int); 685 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 686 extern int apic_pci_msi_unconfigure(dev_info_t *, int, int); 687 extern int apic_pci_msi_disable_mode(dev_info_t *, int, int); 688 extern int apic_pci_msi_enable_mode(dev_info_t *, int, int); 689 690 /* 691 * This is the loadable module wrapper 692 */ 693 694 int 695 _init(void) 696 { 697 if (apic_coarse_hrtime) 698 apic_ops.psm_gethrtime = &apic_gettime; 699 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 700 } 701 702 int 703 _fini(void) 704 { 705 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 706 } 707 708 int 709 _info(struct modinfo *modinfop) 710 { 711 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 712 } 713 714 /* 715 * Auto-configuration routines 716 */ 717 718 /* 719 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 720 * May work with 1.1 - but not guaranteed. 721 * According to the MP Spec, the MP floating pointer structure 722 * will be searched in the order described below: 723 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 724 * 2. Within the last kilobyte of system base memory 725 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 726 * Once we find the right signature with proper checksum, we call 727 * either handle_defconf or parse_mpct to get all info necessary for 728 * subsequent operations. 729 */ 730 static int 731 apic_probe() 732 { 733 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 734 caddr_t biosdatap; 735 caddr_t mpct; 736 caddr_t fptr; 737 int i, mpct_size, mapsize, retval = PSM_FAILURE; 738 ushort_t ebda_seg, base_mem_size; 739 struct apic_mpfps_hdr *fpsp; 740 struct apic_mp_cnf_hdr *hdrp; 741 int bypass_cpu_and_ioapics_in_mptables; 742 int acpi_user_options; 743 744 if (apic_forceload < 0) 745 return (retval); 746 747 /* Allow override for MADT-only mode */ 748 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 749 "acpi-user-options", 0); 750 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 751 752 /* Allow apic_use_acpi to override MADT-only mode */ 753 if (!apic_use_acpi) 754 apic_use_acpi_madt_only = 0; 755 756 retval = acpi_probe(); 757 758 /* 759 * mapin the bios data area 40:0 760 * 40:13h - two-byte location reports the base memory size 761 * 40:0Eh - two-byte location for the exact starting address of 762 * the EBDA segment for EISA 763 */ 764 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 765 if (!biosdatap) 766 return (retval); 767 fpsp = (struct apic_mpfps_hdr *)NULL; 768 mapsize = MPFPS_RAM_WIN_LEN; 769 /*LINTED: pointer cast may result in improper alignment */ 770 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 771 /* check the 1k of EBDA */ 772 if (ebda_seg) { 773 ebda_start = ((uint32_t)ebda_seg) << 4; 774 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 775 if (fptr) { 776 if (!(fpsp = 777 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 778 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 779 } 780 } 781 /* If not in EBDA, check the last k of system base memory */ 782 if (!fpsp) { 783 /*LINTED: pointer cast may result in improper alignment */ 784 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 785 786 if (base_mem_size > 512) 787 base_mem_end = 639 * 1024; 788 else 789 base_mem_end = 511 * 1024; 790 /* if ebda == last k of base mem, skip to check BIOS ROM */ 791 if (base_mem_end != ebda_start) { 792 793 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 794 PROT_READ); 795 796 if (fptr) { 797 if (!(fpsp = apic_find_fps_sig(fptr, 798 MPFPS_RAM_WIN_LEN))) 799 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 800 } 801 } 802 } 803 psm_unmap_phys(biosdatap, 0x20); 804 805 /* If still cannot find it, check the BIOS ROM space */ 806 if (!fpsp) { 807 mapsize = MPFPS_ROM_WIN_LEN; 808 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 809 MPFPS_ROM_WIN_LEN, PROT_READ); 810 if (fptr) { 811 if (!(fpsp = 812 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 813 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 814 return (retval); 815 } 816 } 817 } 818 819 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 820 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 821 return (retval); 822 } 823 824 apic_spec_rev = fpsp->mpfps_spec_rev; 825 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 826 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 827 return (retval); 828 } 829 830 /* check IMCR is present or not */ 831 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 832 833 /* check default configuration (dual CPUs) */ 834 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 835 psm_unmap_phys(fptr, mapsize); 836 return (apic_handle_defconf()); 837 } 838 839 /* MP Configuration Table */ 840 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 841 842 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 843 844 /* 845 * Map in enough memory for the MP Configuration Table Header. 846 * Use this table to read the total length of the BIOS data and 847 * map in all the info 848 */ 849 /*LINTED: pointer cast may result in improper alignment */ 850 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 851 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 852 if (!hdrp) 853 return (retval); 854 855 /* check mp configuration table signature PCMP */ 856 if (hdrp->mpcnf_sig != 0x504d4350) { 857 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 858 return (retval); 859 } 860 mpct_size = (int)hdrp->mpcnf_tbl_length; 861 862 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 863 864 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 865 866 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 867 /* This is an ACPI machine No need for further checks */ 868 return (retval); 869 } 870 871 /* 872 * Map in the entries for this machine, ie. Processor 873 * Entry Tables, Bus Entry Tables, etc. 874 * They are in fixed order following one another 875 */ 876 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 877 if (!mpct) 878 return (retval); 879 880 if (apic_checksum(mpct, mpct_size) != 0) 881 goto apic_fail1; 882 883 884 /*LINTED: pointer cast may result in improper alignment */ 885 hdrp = (struct apic_mp_cnf_hdr *)mpct; 886 /*LINTED: pointer cast may result in improper alignment */ 887 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 888 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 889 if (!apicadr) 890 goto apic_fail1; 891 892 /* Parse all information in the tables */ 893 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 894 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 895 PSM_SUCCESS) 896 return (PSM_SUCCESS); 897 898 for (i = 0; i < apic_io_max; i++) 899 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 900 if (apic_cpus) 901 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 902 if (apicadr) 903 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 904 apic_fail1: 905 psm_unmap_phys(mpct, mpct_size); 906 return (retval); 907 } 908 909 static void 910 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 911 { 912 int i; 913 914 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 915 i++) { 916 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 917 strlen(apic_mps_ids[i].oem_id)) == 0) && 918 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 919 strlen(apic_mps_ids[i].prod_id)) == 0)) { 920 921 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 922 break; 923 } 924 } 925 926 if (apic_debug_mps_id != 0) { 927 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 928 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 929 hdrp->mpcnf_oem_str[0], 930 hdrp->mpcnf_oem_str[1], 931 hdrp->mpcnf_oem_str[2], 932 hdrp->mpcnf_oem_str[3], 933 hdrp->mpcnf_oem_str[4], 934 hdrp->mpcnf_oem_str[5], 935 hdrp->mpcnf_oem_str[6], 936 hdrp->mpcnf_oem_str[7], 937 hdrp->mpcnf_prod_str[0], 938 hdrp->mpcnf_prod_str[1], 939 hdrp->mpcnf_prod_str[2], 940 hdrp->mpcnf_prod_str[3], 941 hdrp->mpcnf_prod_str[4], 942 hdrp->mpcnf_prod_str[5], 943 hdrp->mpcnf_prod_str[6], 944 hdrp->mpcnf_prod_str[7], 945 hdrp->mpcnf_prod_str[8], 946 hdrp->mpcnf_prod_str[9], 947 hdrp->mpcnf_prod_str[10], 948 hdrp->mpcnf_prod_str[11]); 949 } 950 } 951 952 static int 953 acpi_probe(void) 954 { 955 int i, id, intmax, ver, index, rv; 956 int acpi_verboseflags = 0; 957 int madt_seen, madt_size; 958 APIC_HEADER *ap; 959 MADT_PROCESSOR_APIC *mpa; 960 MADT_IO_APIC *mia; 961 MADT_IO_SAPIC *misa; 962 MADT_INTERRUPT_OVERRIDE *mio; 963 MADT_NMI_SOURCE *mns; 964 MADT_INTERRUPT_SOURCE *mis; 965 MADT_LOCAL_APIC_NMI *mlan; 966 MADT_ADDRESS_OVERRIDE *mao; 967 ACPI_OBJECT_LIST arglist; 968 ACPI_OBJECT arg; 969 int sci; 970 iflag_t sci_flags; 971 volatile int32_t *ioapic; 972 char local_ids[NCPU]; 973 char proc_ids[NCPU]; 974 uchar_t hid; 975 976 if (!apic_use_acpi) 977 return (PSM_FAILURE); 978 979 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 980 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 981 return (PSM_FAILURE); 982 983 apicadr = (uint32_t *)psm_map_phys( 984 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 985 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 986 if (!apicadr) 987 return (PSM_FAILURE); 988 989 id = apicadr[APIC_LID_REG]; 990 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 991 apic_nproc = index = 1; 992 CPUSET_ONLY(apic_cpumask, 0); 993 apic_io_max = 0; 994 995 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 996 madt_size = acpi_mapic_dtp->Length; 997 madt_seen = sizeof (*acpi_mapic_dtp); 998 999 while (madt_seen < madt_size) { 1000 switch (ap->Type) { 1001 case APIC_PROCESSOR: 1002 mpa = (MADT_PROCESSOR_APIC *) ap; 1003 if (mpa->ProcessorEnabled) { 1004 if (mpa->LocalApicId == local_ids[0]) 1005 proc_ids[0] = mpa->ProcessorId; 1006 else if (apic_nproc < NCPU) { 1007 local_ids[index] = mpa->LocalApicId; 1008 proc_ids[index] = mpa->ProcessorId; 1009 CPUSET_ADD(apic_cpumask, index); 1010 index++; 1011 apic_nproc++; 1012 } else 1013 cmn_err(CE_WARN, "pcplusmp: exceeded " 1014 "maximum no. of CPUs (= %d)", NCPU); 1015 } 1016 break; 1017 1018 case APIC_IO: 1019 mia = (MADT_IO_APIC *) ap; 1020 if (apic_io_max < MAX_IO_APIC) { 1021 apic_io_id[apic_io_max] = mia->IoApicId; 1022 apic_io_vectbase[apic_io_max] = 1023 mia->Interrupt; 1024 ioapic = apicioadr[apic_io_max] = 1025 (int32_t *)psm_map_phys( 1026 (uint32_t)mia->Address, 1027 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1028 if (!ioapic) 1029 goto cleanup; 1030 apic_io_max++; 1031 } 1032 break; 1033 1034 case APIC_XRUPT_OVERRIDE: 1035 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1036 if (acpi_isop == NULL) 1037 acpi_isop = mio; 1038 acpi_iso_cnt++; 1039 break; 1040 1041 case APIC_NMI: 1042 /* UNIMPLEMENTED */ 1043 mns = (MADT_NMI_SOURCE *) ap; 1044 if (acpi_nmi_sp == NULL) 1045 acpi_nmi_sp = mns; 1046 acpi_nmi_scnt++; 1047 1048 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1049 mns->Interrupt, mns->Polarity, 1050 mns->TriggerMode); 1051 break; 1052 1053 case APIC_LOCAL_NMI: 1054 /* UNIMPLEMENTED */ 1055 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1056 if (acpi_nmi_cp == NULL) 1057 acpi_nmi_cp = mlan; 1058 acpi_nmi_ccnt++; 1059 1060 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1061 mlan->ProcessorId, mlan->Polarity, 1062 mlan->TriggerMode, mlan->Lint); 1063 break; 1064 1065 case APIC_ADDRESS_OVERRIDE: 1066 /* UNIMPLEMENTED */ 1067 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1068 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1069 (long)mao->Address); 1070 break; 1071 1072 case APIC_IO_SAPIC: 1073 /* UNIMPLEMENTED */ 1074 misa = (MADT_IO_SAPIC *) ap; 1075 1076 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1077 misa->IoSapicId, misa->InterruptBase, 1078 (long)misa->Address); 1079 break; 1080 1081 case APIC_XRUPT_SOURCE: 1082 /* UNIMPLEMENTED */ 1083 mis = (MADT_INTERRUPT_SOURCE *) ap; 1084 1085 cmn_err(CE_NOTE, 1086 "!apic: irq source: %d %d %d %d %d %d %d\n", 1087 mis->ProcessorId, mis->ProcessorEid, 1088 mis->Interrupt, mis->Polarity, 1089 mis->TriggerMode, mis->InterruptType, 1090 mis->IoSapicVector); 1091 break; 1092 case APIC_RESERVED: 1093 default: 1094 break; /* ignore unknown items as per ACPI spec */ 1095 } 1096 1097 /* advance to next entry */ 1098 madt_seen += ap->Length; 1099 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1100 } 1101 1102 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1103 KM_NOSLEEP)) == NULL) 1104 goto cleanup; 1105 1106 /* 1107 * ACPI doesn't provide the local apic ver, get it directly from the 1108 * local apic 1109 */ 1110 ver = apicadr[APIC_VERS_REG]; 1111 for (i = 0; i < apic_nproc; i++) { 1112 apic_cpus[i].aci_local_id = local_ids[i]; 1113 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1114 } 1115 for (i = 0; i < apic_io_max; i++) { 1116 ioapic = apicioadr[i]; 1117 1118 /* 1119 * need to check Sitka on the following acpi problem 1120 * On the Sitka, the ioapic's apic_id field isn't reporting 1121 * the actual io apic id. We have reported this problem 1122 * to Intel. Until they fix the problem, we will get the 1123 * actual id directly from the ioapic. 1124 */ 1125 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1126 id = ioapic[APIC_IO_DATA]; 1127 hid = (uchar_t)(((uint_t)id) >> 24); 1128 1129 if (hid != apic_io_id[i]) { 1130 if (apic_io_id[i] == 0) 1131 apic_io_id[i] = hid; 1132 else { /* set ioapic id to whatever reported by ACPI */ 1133 id = ((int32_t)apic_io_id[i]) << 24; 1134 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1135 ioapic[APIC_IO_DATA] = id; 1136 } 1137 } 1138 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1139 ver = ioapic[APIC_IO_DATA]; 1140 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1141 intmax = (ver >> 16) & 0xff; 1142 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1143 if (apic_first_avail_irq <= apic_io_vectend[i]) 1144 apic_first_avail_irq = apic_io_vectend[i] + 1; 1145 } 1146 1147 1148 /* 1149 * Process SCI configuration here 1150 * An error may be returned here if 1151 * acpi-user-options specifies legacy mode 1152 * (no SCI, no ACPI mode) 1153 */ 1154 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1155 sci = -1; 1156 1157 /* 1158 * Now call acpi_init() to generate namespaces 1159 * If this fails, we don't attempt to use ACPI 1160 * even if we were able to get a MADT above 1161 */ 1162 if (acpica_init() != AE_OK) 1163 goto cleanup; 1164 1165 /* 1166 * Squirrel away the SCI and flags for later on 1167 * in apic_picinit() when we're ready 1168 */ 1169 apic_sci_vect = sci; 1170 apic_sci_flags = sci_flags; 1171 1172 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1173 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1174 1175 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1176 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1177 1178 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1179 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1180 1181 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1182 ACPI_PSM_FAILURE) 1183 goto cleanup; 1184 1185 /* Enable ACPI APIC interrupt routing */ 1186 arglist.Count = 1; 1187 arglist.Pointer = &arg; 1188 arg.Type = ACPI_TYPE_INTEGER; 1189 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1190 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1191 if (rv == AE_OK) { 1192 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1193 apic_enable_acpi = 1; 1194 if (apic_use_acpi_madt_only) { 1195 cmn_err(CE_CONT, 1196 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1197 } 1198 return (PSM_SUCCESS); 1199 } 1200 /* if setting APIC mode failed above, we fall through to cleanup */ 1201 1202 cleanup: 1203 if (apicadr != NULL) { 1204 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1205 apicadr = NULL; 1206 } 1207 apic_nproc = 0; 1208 for (i = 0; i < apic_io_max; i++) { 1209 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1210 apicioadr[i] = NULL; 1211 } 1212 apic_io_max = 0; 1213 acpi_isop = NULL; 1214 acpi_iso_cnt = 0; 1215 acpi_nmi_sp = NULL; 1216 acpi_nmi_scnt = 0; 1217 acpi_nmi_cp = NULL; 1218 acpi_nmi_ccnt = 0; 1219 return (PSM_FAILURE); 1220 } 1221 1222 /* 1223 * Handle default configuration. Fill in reqd global variables & tables 1224 * Fill all details as MP table does not give any more info 1225 */ 1226 static int 1227 apic_handle_defconf() 1228 { 1229 uint_t lid; 1230 1231 /*LINTED: pointer cast may result in improper alignment */ 1232 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1233 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1234 /*LINTED: pointer cast may result in improper alignment */ 1235 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1236 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1237 apic_cpus = (apic_cpus_info_t *) 1238 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1239 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1240 goto apic_handle_defconf_fail; 1241 CPUSET_ONLY(apic_cpumask, 0); 1242 CPUSET_ADD(apic_cpumask, 1); 1243 apic_nproc = 2; 1244 lid = apicadr[APIC_LID_REG]; 1245 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1246 /* 1247 * According to the PC+MP spec 1.1, the local ids 1248 * for the default configuration has to be 0 or 1 1249 */ 1250 if (apic_cpus[0].aci_local_id == 1) 1251 apic_cpus[1].aci_local_id = 0; 1252 else if (apic_cpus[0].aci_local_id == 0) 1253 apic_cpus[1].aci_local_id = 1; 1254 else 1255 goto apic_handle_defconf_fail; 1256 1257 apic_io_id[0] = 2; 1258 apic_io_max = 1; 1259 if (apic_defconf >= 5) { 1260 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1261 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1262 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1263 } else { 1264 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1265 apic_cpus[1].aci_local_ver = 0; 1266 apic_io_ver[0] = 0; 1267 } 1268 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1269 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1270 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1271 return (PSM_SUCCESS); 1272 1273 apic_handle_defconf_fail: 1274 if (apic_cpus) 1275 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1276 if (apicadr) 1277 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1278 if (apicioadr[0]) 1279 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1280 return (PSM_FAILURE); 1281 } 1282 1283 /* Parse the entries in MP configuration table and collect info that we need */ 1284 static int 1285 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1286 { 1287 struct apic_procent *procp; 1288 struct apic_bus *busp; 1289 struct apic_io_entry *ioapicp; 1290 struct apic_io_intr *intrp; 1291 volatile int32_t *ioapic; 1292 uint_t lid; 1293 int id; 1294 uchar_t hid; 1295 1296 /*LINTED: pointer cast may result in improper alignment */ 1297 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1298 1299 /* No need to count cpu entries if we won't use them */ 1300 if (!bypass_cpus_and_ioapics) { 1301 1302 /* Find max # of CPUS and allocate structure accordingly */ 1303 apic_nproc = 0; 1304 CPUSET_ZERO(apic_cpumask); 1305 while (procp->proc_entry == APIC_CPU_ENTRY) { 1306 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1307 if (apic_nproc < NCPU) 1308 CPUSET_ADD(apic_cpumask, apic_nproc); 1309 apic_nproc++; 1310 } 1311 procp++; 1312 } 1313 if (apic_nproc > NCPU) 1314 cmn_err(CE_WARN, "pcplusmp: exceeded " 1315 "maximum no. of CPUs (= %d)", NCPU); 1316 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1317 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1318 return (PSM_FAILURE); 1319 } 1320 1321 /*LINTED: pointer cast may result in improper alignment */ 1322 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1323 1324 /* 1325 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1326 * if we're bypassing this information, it has already been filled 1327 * in by acpi_probe(), so don't overwrite it. 1328 */ 1329 if (!bypass_cpus_and_ioapics) 1330 apic_nproc = 1; 1331 1332 while (procp->proc_entry == APIC_CPU_ENTRY) { 1333 /* check whether the cpu exists or not */ 1334 if (!bypass_cpus_and_ioapics && 1335 procp->proc_cpuflags & CPUFLAGS_EN) { 1336 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1337 lid = apicadr[APIC_LID_REG]; 1338 apic_cpus[0].aci_local_id = procp->proc_apicid; 1339 if (apic_cpus[0].aci_local_id != 1340 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1341 return (PSM_FAILURE); 1342 } 1343 apic_cpus[0].aci_local_ver = 1344 procp->proc_version; 1345 } else { 1346 1347 apic_cpus[apic_nproc].aci_local_id = 1348 procp->proc_apicid; 1349 apic_cpus[apic_nproc].aci_local_ver = 1350 procp->proc_version; 1351 apic_nproc++; 1352 1353 } 1354 } 1355 procp++; 1356 } 1357 1358 /* 1359 * Save start of bus entries for later use. 1360 * Get EISA level cntrl if EISA bus is present. 1361 * Also get the CPI bus id for single CPI bus case 1362 */ 1363 apic_busp = busp = (struct apic_bus *)procp; 1364 while (busp->bus_entry == APIC_BUS_ENTRY) { 1365 lid = apic_find_bus_type((char *)&busp->bus_str1); 1366 if (lid == BUS_EISA) { 1367 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1368 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1369 } else if (lid == BUS_PCI) { 1370 /* 1371 * apic_single_pci_busid will be used only if 1372 * apic_pic_bus_total is equal to 1 1373 */ 1374 apic_pci_bus_total++; 1375 apic_single_pci_busid = busp->bus_id; 1376 } 1377 busp++; 1378 } 1379 1380 ioapicp = (struct apic_io_entry *)busp; 1381 1382 if (!bypass_cpus_and_ioapics) 1383 apic_io_max = 0; 1384 do { 1385 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1386 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1387 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1388 apic_io_ver[apic_io_max] = ioapicp->io_version; 1389 /*LINTED: pointer cast may result in improper alignment */ 1390 apicioadr[apic_io_max] = 1391 (int32_t *)psm_map_phys( 1392 (uint32_t)ioapicp->io_apic_addr, 1393 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1394 1395 if (!apicioadr[apic_io_max]) 1396 return (PSM_FAILURE); 1397 1398 ioapic = apicioadr[apic_io_max]; 1399 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1400 id = ioapic[APIC_IO_DATA]; 1401 hid = (uchar_t)(((uint_t)id) >> 24); 1402 1403 if (hid != apic_io_id[apic_io_max]) { 1404 if (apic_io_id[apic_io_max] == 0) 1405 apic_io_id[apic_io_max] = hid; 1406 else { 1407 /* 1408 * set ioapic id to whatever 1409 * reported by MPS 1410 * 1411 * may not need to set index 1412 * again ??? 1413 * take it out and try 1414 */ 1415 1416 id = ((int32_t) 1417 apic_io_id[apic_io_max]) << 1418 24; 1419 1420 ioapic[APIC_IO_REG] = 1421 APIC_ID_CMD; 1422 1423 ioapic[APIC_IO_DATA] = id; 1424 1425 } 1426 } 1427 apic_io_max++; 1428 } 1429 } 1430 ioapicp++; 1431 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1432 1433 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1434 1435 intrp = apic_io_intrp; 1436 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1437 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1438 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1439 apic_irq_translate = 1; 1440 break; 1441 } 1442 intrp++; 1443 } 1444 1445 return (PSM_SUCCESS); 1446 } 1447 1448 boolean_t 1449 apic_cpu_in_range(int cpu) 1450 { 1451 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1452 } 1453 1454 static struct apic_mpfps_hdr * 1455 apic_find_fps_sig(caddr_t cptr, int len) 1456 { 1457 int i; 1458 1459 /* Look for the pattern "_MP_" */ 1460 for (i = 0; i < len; i += 16) { 1461 if ((*(cptr+i) == '_') && 1462 (*(cptr+i+1) == 'M') && 1463 (*(cptr+i+2) == 'P') && 1464 (*(cptr+i+3) == '_')) 1465 /*LINTED: pointer cast may result in improper alignment */ 1466 return ((struct apic_mpfps_hdr *)(cptr + i)); 1467 } 1468 return (NULL); 1469 } 1470 1471 static int 1472 apic_checksum(caddr_t bptr, int len) 1473 { 1474 int i; 1475 uchar_t cksum; 1476 1477 cksum = 0; 1478 for (i = 0; i < len; i++) 1479 cksum += *bptr++; 1480 return ((int)cksum); 1481 } 1482 1483 1484 /* 1485 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1486 * are also set to NULL. vector->irq is set to a value which cannot map 1487 * to a real irq to show that it is free. 1488 */ 1489 void 1490 apic_init() 1491 { 1492 int i; 1493 int *iptr; 1494 1495 int j = 1; 1496 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1497 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1498 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1499 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1500 /* get to highest vector at the same ipl */ 1501 continue; 1502 for (; j <= apic_vectortoipl[i]; j++) { 1503 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1504 APIC_BASE_VECT; 1505 } 1506 } 1507 for (; j < MAXIPL + 1; j++) 1508 /* fill up any empty ipltopri slots */ 1509 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1510 1511 /* cpu 0 is always up */ 1512 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1513 1514 iptr = (int *)&apic_irq_table[0]; 1515 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1516 apic_level_intr[i] = 0; 1517 *iptr++ = NULL; 1518 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1519 apic_reprogram_info[i].valid = 0; 1520 apic_reprogram_info[i].bindcpu = 0; 1521 apic_reprogram_info[i].timeouts = 0; 1522 } 1523 1524 /* 1525 * Allocate a dummy irq table entry for the reserved entry. 1526 * This takes care of the race between removing an irq and 1527 * clock detecting a CPU in that irq during interrupt load 1528 * sampling. 1529 */ 1530 apic_irq_table[APIC_RESV_IRQ] = 1531 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1532 1533 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1534 mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL); 1535 #if defined(__amd64) 1536 /* 1537 * Make cpu-specific interrupt info point to cr8pri vector 1538 */ 1539 for (i = 0; i <= MAXIPL; i++) 1540 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1541 CPU->cpu_pri_data = apic_cr8pri; 1542 intpri_use_cr8 = 1; 1543 #endif /* __amd64 */ 1544 } 1545 1546 /* 1547 * handler for APIC Error interrupt. Just print a warning and continue 1548 */ 1549 static int 1550 apic_error_intr() 1551 { 1552 uint_t error0, error1, error; 1553 uint_t i; 1554 1555 /* 1556 * We need to write before read as per 7.4.17 of system prog manual. 1557 * We do both and or the results to be safe 1558 */ 1559 error0 = apicadr[APIC_ERROR_STATUS]; 1560 apicadr[APIC_ERROR_STATUS] = 0; 1561 error1 = apicadr[APIC_ERROR_STATUS]; 1562 error = error0 | error1; 1563 1564 /* 1565 * Clear the APIC error status (do this on all cpus that enter here) 1566 * (two writes are required due to the semantics of accessing the 1567 * error status register.) 1568 */ 1569 apicadr[APIC_ERROR_STATUS] = 0; 1570 apicadr[APIC_ERROR_STATUS] = 0; 1571 1572 /* 1573 * Prevent more than 1 CPU from handling error interrupt causing 1574 * double printing (interleave of characters from multiple 1575 * CPU's when using prom_printf) 1576 */ 1577 if (lock_try(&apic_error_lock) == 0) 1578 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1579 if (error) { 1580 #if DEBUG 1581 if (apic_debug) 1582 debug_enter("pcplusmp: APIC Error interrupt received"); 1583 #endif /* DEBUG */ 1584 if (apic_panic_on_apic_error) 1585 cmn_err(CE_PANIC, 1586 "APIC Error interrupt on CPU %d. Status = %x\n", 1587 psm_get_cpu_id(), error); 1588 else { 1589 if ((error & ~APIC_CS_ERRORS) == 0) { 1590 /* cksum error only */ 1591 apic_error |= APIC_ERR_APIC_ERROR; 1592 apic_apic_error |= error; 1593 apic_num_apic_errors++; 1594 apic_num_cksum_errors++; 1595 } else { 1596 /* 1597 * prom_printf is the best shot we have of 1598 * something which is problem free from 1599 * high level/NMI type of interrupts 1600 */ 1601 prom_printf("APIC Error interrupt on CPU %d. " 1602 "Status 0 = %x, Status 1 = %x\n", 1603 psm_get_cpu_id(), error0, error1); 1604 apic_error |= APIC_ERR_APIC_ERROR; 1605 apic_apic_error |= error; 1606 apic_num_apic_errors++; 1607 for (i = 0; i < apic_error_display_delay; i++) { 1608 tenmicrosec(); 1609 } 1610 /* 1611 * provide more delay next time limited to 1612 * roughly 1 clock tick time 1613 */ 1614 if (apic_error_display_delay < 500) 1615 apic_error_display_delay *= 2; 1616 } 1617 } 1618 lock_clear(&apic_error_lock); 1619 return (DDI_INTR_CLAIMED); 1620 } else { 1621 lock_clear(&apic_error_lock); 1622 return (DDI_INTR_UNCLAIMED); 1623 } 1624 /* NOTREACHED */ 1625 } 1626 1627 /* 1628 * Turn off the mask bit in the performance counter Local Vector Table entry. 1629 */ 1630 static void 1631 apic_cpcovf_mask_clear(void) 1632 { 1633 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1634 } 1635 1636 static void 1637 apic_init_intr() 1638 { 1639 processorid_t cpun = psm_get_cpu_id(); 1640 1641 #if defined(__amd64) 1642 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1643 #else 1644 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1645 #endif 1646 1647 if (apic_flat_model) 1648 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1649 else 1650 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1651 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1652 1653 /* need to enable APIC before unmasking NMI */ 1654 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1655 1656 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1657 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1658 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1659 1660 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1661 return; 1662 1663 /* Enable performance counter overflow interrupt */ 1664 1665 if ((x86_feature & X86_MSR) != X86_MSR) 1666 apic_enable_cpcovf_intr = 0; 1667 if (apic_enable_cpcovf_intr) { 1668 if (apic_cpcovf_vect == 0) { 1669 int ipl = APIC_PCINT_IPL; 1670 int irq = apic_get_ipivect(ipl, -1); 1671 1672 ASSERT(irq != -1); 1673 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1674 ASSERT(apic_cpcovf_vect); 1675 (void) add_avintr(NULL, ipl, 1676 (avfunc)kcpc_hw_overflow_intr, 1677 "apic pcint", irq, NULL, NULL, NULL, NULL); 1678 kcpc_hw_overflow_intr_installed = 1; 1679 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1680 } 1681 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1682 } 1683 1684 /* Enable error interrupt */ 1685 1686 if (apic_enable_error_intr) { 1687 if (apic_errvect == 0) { 1688 int ipl = 0xf; /* get highest priority intr */ 1689 int irq = apic_get_ipivect(ipl, -1); 1690 1691 ASSERT(irq != -1); 1692 apic_errvect = apic_irq_table[irq]->airq_vector; 1693 ASSERT(apic_errvect); 1694 /* 1695 * Not PSMI compliant, but we are going to merge 1696 * with ON anyway 1697 */ 1698 (void) add_avintr((void *)NULL, ipl, 1699 (avfunc)apic_error_intr, "apic error intr", 1700 irq, NULL, NULL, NULL, NULL); 1701 } 1702 apicadr[APIC_ERR_VECT] = apic_errvect; 1703 apicadr[APIC_ERROR_STATUS] = 0; 1704 apicadr[APIC_ERROR_STATUS] = 0; 1705 } 1706 } 1707 1708 static void 1709 apic_disable_local_apic() 1710 { 1711 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1712 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1713 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1714 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1715 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1716 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1717 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1718 } 1719 1720 static void 1721 apic_picinit(void) 1722 { 1723 int i, j; 1724 uint_t isr; 1725 volatile int32_t *ioapic; 1726 apic_irq_t *irqptr; 1727 struct intrspec ispec; 1728 1729 /* 1730 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1731 * bit on without clearing it with EOI. Since softint 1732 * uses vector 0x20 to interrupt itself, so softint will 1733 * not work on this machine. In order to fix this problem 1734 * a check is made to verify all the isr bits are clear. 1735 * If not, EOIs are issued to clear the bits. 1736 */ 1737 for (i = 7; i >= 1; i--) { 1738 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1739 for (j = 0; ((j < 32) && (isr != 0)); j++) 1740 if (isr & (1 << j)) { 1741 apicadr[APIC_EOI_REG] = 0; 1742 isr &= ~(1 << j); 1743 apic_error |= APIC_ERR_BOOT_EOI; 1744 } 1745 } 1746 1747 /* set a flag so we know we have run apic_picinit() */ 1748 apic_flag = 1; 1749 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1750 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1751 LOCK_INIT_CLEAR(&apic_revector_lock); 1752 LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock); 1753 LOCK_INIT_CLEAR(&apic_error_lock); 1754 1755 picsetup(); /* initialise the 8259 */ 1756 1757 /* add nmi handler - least priority nmi handler */ 1758 LOCK_INIT_CLEAR(&apic_nmi_lock); 1759 1760 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1761 "pcplusmp NMI handler", (caddr_t)NULL)) 1762 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1763 1764 apic_init_intr(); 1765 1766 /* enable apic mode if imcr present */ 1767 if (apic_imcrp) { 1768 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1769 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1770 } 1771 1772 /* mask interrupt vectors */ 1773 for (j = 0; j < apic_io_max; j++) { 1774 int intin_max; 1775 ioapic = apicioadr[j]; 1776 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1777 /* Bits 23-16 define the maximum redirection entries */ 1778 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1779 for (i = 0; i < intin_max; i++) { 1780 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1781 ioapic[APIC_IO_DATA] = AV_MASK; 1782 } 1783 } 1784 1785 /* 1786 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1787 */ 1788 if (apic_sci_vect > 0) { 1789 /* 1790 * acpica has already done add_avintr(); we just 1791 * to finish the job by mimicing translate_irq() 1792 * 1793 * Fake up an intrspec and setup the tables 1794 */ 1795 ispec.intrspec_vec = apic_sci_vect; 1796 ispec.intrspec_pri = SCI_IPL; 1797 1798 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1799 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1800 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1801 return; 1802 } 1803 irqptr = apic_irq_table[apic_sci_vect]; 1804 1805 /* Program I/O APIC */ 1806 (void) apic_setup_io_intr(irqptr, apic_sci_vect); 1807 1808 irqptr->airq_share++; 1809 } 1810 } 1811 1812 1813 static void 1814 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1815 { 1816 int loop_count; 1817 uint32_t vector; 1818 uint_t cpu_id, iflag; 1819 1820 cpu_id = apic_cpus[cpun].aci_local_id; 1821 1822 apic_cmos_ssb_set = 1; 1823 1824 /* 1825 * Interrupts on BSP cpu will be disabled during these startup 1826 * steps in order to avoid unwanted side effects from 1827 * executing interrupt handlers on a problematic BIOS. 1828 */ 1829 1830 iflag = intr_clear(); 1831 outb(CMOS_ADDR, SSB); 1832 outb(CMOS_DATA, BIOS_SHUTDOWN); 1833 1834 while (get_apic_cmd1() & AV_PENDING) 1835 apic_ret(); 1836 1837 /* for integrated - make sure there is one INIT IPI in buffer */ 1838 /* for external - it will wake up the cpu */ 1839 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1840 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1841 1842 /* If only 1 CPU is installed, PENDING bit will not go low */ 1843 for (loop_count = 0x1000; loop_count; loop_count--) 1844 if (get_apic_cmd1() & AV_PENDING) 1845 apic_ret(); 1846 else 1847 break; 1848 1849 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1850 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1851 1852 drv_usecwait(20000); /* 20 milli sec */ 1853 1854 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1855 /* integrated apic */ 1856 1857 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1858 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1859 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1860 1861 /* to offset the INIT IPI queue up in the buffer */ 1862 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1863 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1864 1865 drv_usecwait(200); /* 20 micro sec */ 1866 1867 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1868 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1869 1870 drv_usecwait(200); /* 20 micro sec */ 1871 } 1872 intr_restore(iflag); 1873 } 1874 1875 1876 #ifdef DEBUG 1877 int apic_break_on_cpu = 9; 1878 int apic_stretch_interrupts = 0; 1879 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1880 1881 void 1882 apic_break() 1883 { 1884 } 1885 #endif /* DEBUG */ 1886 1887 /* 1888 * platform_intr_enter 1889 * 1890 * Called at the beginning of the interrupt service routine to 1891 * mask all level equal to and below the interrupt priority 1892 * of the interrupting vector. An EOI should be given to 1893 * the interrupt controller to enable other HW interrupts. 1894 * 1895 * Return -1 for spurious interrupts 1896 * 1897 */ 1898 /*ARGSUSED*/ 1899 static int 1900 apic_intr_enter(int ipl, int *vectorp) 1901 { 1902 uchar_t vector; 1903 int nipl; 1904 int irq, iflag; 1905 apic_cpus_info_t *cpu_infop; 1906 1907 /* 1908 * The real vector programmed in APIC is *vectorp + 0x20 1909 * But, cmnint code subtracts 0x20 before pushing it. 1910 * Hence APIC_BASE_VECT is 0x20. 1911 */ 1912 1913 vector = (uchar_t)*vectorp; 1914 1915 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1916 if (vector == apic_clkvect) { 1917 if (!apic_oneshot) { 1918 /* NOTE: this is not MT aware */ 1919 apic_hrtime_stamp++; 1920 apic_nsec_since_boot += apic_nsec_per_intr; 1921 apic_hrtime_stamp++; 1922 last_count_read = apic_hertz_count; 1923 apic_redistribute_compute(); 1924 } 1925 1926 /* We will avoid all the book keeping overhead for clock */ 1927 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1928 #if defined(__amd64) 1929 setcr8((ulong_t)apic_cr8pri[nipl]); 1930 #else 1931 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1932 #endif 1933 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1934 apicadr[APIC_EOI_REG] = 0; 1935 return (nipl); 1936 } 1937 1938 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1939 1940 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1941 cpu_infop->aci_spur_cnt++; 1942 return (APIC_INT_SPURIOUS); 1943 } 1944 1945 /* Check if the vector we got is really what we need */ 1946 if (apic_revector_pending) { 1947 /* 1948 * Disable interrupts for the duration of 1949 * the vector translation to prevent a self-race for 1950 * the apic_revector_lock. This cannot be done 1951 * in apic_xlate_vector because it is recursive and 1952 * we want the vector translation to be atomic with 1953 * respect to other (higher-priority) interrupts. 1954 */ 1955 iflag = intr_clear(); 1956 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1957 APIC_BASE_VECT; 1958 intr_restore(iflag); 1959 } 1960 1961 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1962 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1963 1964 #if defined(__amd64) 1965 setcr8((ulong_t)apic_cr8pri[nipl]); 1966 #else 1967 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1968 #endif 1969 1970 cpu_infop->aci_current[nipl] = (uchar_t)irq; 1971 cpu_infop->aci_curipl = (uchar_t)nipl; 1972 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 1973 1974 /* 1975 * apic_level_intr could have been assimilated into the irq struct. 1976 * but, having it as a character array is more efficient in terms of 1977 * cache usage. So, we leave it as is. 1978 */ 1979 if (!apic_level_intr[irq]) 1980 apicadr[APIC_EOI_REG] = 0; 1981 1982 #ifdef DEBUG 1983 APIC_DEBUG_BUF_PUT(vector); 1984 APIC_DEBUG_BUF_PUT(irq); 1985 APIC_DEBUG_BUF_PUT(nipl); 1986 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 1987 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 1988 drv_usecwait(apic_stretch_interrupts); 1989 1990 if (apic_break_on_cpu == psm_get_cpu_id()) 1991 apic_break(); 1992 #endif /* DEBUG */ 1993 return (nipl); 1994 } 1995 1996 static void 1997 apic_intr_exit(int prev_ipl, int irq) 1998 { 1999 apic_cpus_info_t *cpu_infop; 2000 2001 #if defined(__amd64) 2002 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 2003 #else 2004 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 2005 #endif 2006 2007 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 2008 if (apic_level_intr[irq]) 2009 apicadr[APIC_EOI_REG] = 0; 2010 2011 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 2012 /* ISR above current pri could not be in progress */ 2013 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 2014 } 2015 2016 /* 2017 * Mask all interrupts below or equal to the given IPL 2018 */ 2019 static void 2020 apic_setspl(int ipl) 2021 { 2022 2023 #if defined(__amd64) 2024 setcr8((ulong_t)apic_cr8pri[ipl]); 2025 #else 2026 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 2027 #endif 2028 2029 /* interrupts at ipl above this cannot be in progress */ 2030 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2031 /* 2032 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2033 * have enough time to come in before the priority is raised again 2034 * during the idle() loop. 2035 */ 2036 if (apic_setspl_delay) 2037 (void) get_apic_pri(); 2038 } 2039 2040 /* 2041 * trigger a software interrupt at the given IPL 2042 */ 2043 static void 2044 apic_set_softintr(int ipl) 2045 { 2046 int vector; 2047 uint_t flag; 2048 2049 vector = apic_resv_vector[ipl]; 2050 2051 flag = intr_clear(); 2052 2053 while (get_apic_cmd1() & AV_PENDING) 2054 apic_ret(); 2055 2056 /* generate interrupt at vector on itself only */ 2057 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2058 2059 intr_restore(flag); 2060 } 2061 2062 /* 2063 * generates an interprocessor interrupt to another CPU 2064 */ 2065 static void 2066 apic_send_ipi(int cpun, int ipl) 2067 { 2068 int vector; 2069 uint_t flag; 2070 2071 vector = apic_resv_vector[ipl]; 2072 2073 flag = intr_clear(); 2074 2075 while (get_apic_cmd1() & AV_PENDING) 2076 apic_ret(); 2077 2078 apicadr[APIC_INT_CMD2] = 2079 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2080 apicadr[APIC_INT_CMD1] = vector; 2081 2082 intr_restore(flag); 2083 } 2084 2085 2086 /*ARGSUSED*/ 2087 static void 2088 apic_set_idlecpu(processorid_t cpun) 2089 { 2090 } 2091 2092 /*ARGSUSED*/ 2093 static void 2094 apic_unset_idlecpu(processorid_t cpun) 2095 { 2096 } 2097 2098 2099 static void 2100 apic_ret() 2101 { 2102 } 2103 2104 static int 2105 get_apic_cmd1() 2106 { 2107 return (apicadr[APIC_INT_CMD1]); 2108 } 2109 2110 static int 2111 get_apic_pri() 2112 { 2113 #if defined(__amd64) 2114 return ((int)getcr8()); 2115 #else 2116 return (apicadr[APIC_TASK_REG]); 2117 #endif 2118 } 2119 2120 /* 2121 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2122 * apic_gethrtime(). This is used for performance instead of accuracy. 2123 */ 2124 2125 static hrtime_t 2126 apic_gettime() 2127 { 2128 int old_hrtime_stamp; 2129 hrtime_t temp; 2130 2131 /* 2132 * In one-shot mode, we do not keep time, so if anyone 2133 * calls psm_gettime() directly, we vector over to 2134 * gethrtime(). 2135 * one-shot mode MUST NOT be enabled if this psm is the source of 2136 * hrtime. 2137 */ 2138 2139 if (apic_oneshot) 2140 return (gethrtime()); 2141 2142 2143 gettime_again: 2144 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2145 apic_ret(); 2146 2147 temp = apic_nsec_since_boot; 2148 2149 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2150 goto gettime_again; 2151 } 2152 return (temp); 2153 } 2154 2155 /* 2156 * Here we return the number of nanoseconds since booting. Note every 2157 * clock interrupt increments apic_nsec_since_boot by the appropriate 2158 * amount. 2159 */ 2160 static hrtime_t 2161 apic_gethrtime() 2162 { 2163 int curr_timeval, countval, elapsed_ticks, oflags; 2164 int old_hrtime_stamp, status; 2165 hrtime_t temp; 2166 uchar_t cpun; 2167 2168 2169 /* 2170 * In one-shot mode, we do not keep time, so if anyone 2171 * calls psm_gethrtime() directly, we vector over to 2172 * gethrtime(). 2173 * one-shot mode MUST NOT be enabled if this psm is the source of 2174 * hrtime. 2175 */ 2176 2177 if (apic_oneshot) 2178 return (gethrtime()); 2179 2180 oflags = intr_clear(); /* prevent migration */ 2181 2182 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2183 2184 lock_set(&apic_gethrtime_lock); 2185 2186 gethrtime_again: 2187 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2188 apic_ret(); 2189 2190 /* 2191 * Check to see which CPU we are on. Note the time is kept on 2192 * the local APIC of CPU 0. If on CPU 0, simply read the current 2193 * counter. If on another CPU, issue a remote read command to CPU 0. 2194 */ 2195 if (cpun == apic_cpus[0].aci_local_id) { 2196 countval = apicadr[APIC_CURR_COUNT]; 2197 } else { 2198 while (get_apic_cmd1() & AV_PENDING) 2199 apic_ret(); 2200 2201 apicadr[APIC_INT_CMD2] = 2202 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2203 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2204 2205 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2206 apic_ret(); 2207 2208 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2209 countval = apicadr[APIC_REMOTE_READ]; 2210 else { /* 0 = invalid */ 2211 apic_remote_hrterr++; 2212 /* 2213 * return last hrtime right now, will need more 2214 * testing if change to retry 2215 */ 2216 temp = apic_last_hrtime; 2217 2218 lock_clear(&apic_gethrtime_lock); 2219 2220 intr_restore(oflags); 2221 2222 return (temp); 2223 } 2224 } 2225 if (countval > last_count_read) 2226 countval = 0; 2227 else 2228 last_count_read = countval; 2229 2230 elapsed_ticks = apic_hertz_count - countval; 2231 2232 curr_timeval = elapsed_ticks * apic_nsec_per_tick; 2233 temp = apic_nsec_since_boot + curr_timeval; 2234 2235 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2236 /* we might have clobbered last_count_read. Restore it */ 2237 last_count_read = apic_hertz_count; 2238 goto gethrtime_again; 2239 } 2240 2241 if (temp < apic_last_hrtime) { 2242 /* return last hrtime if error occurs */ 2243 apic_hrtime_error++; 2244 temp = apic_last_hrtime; 2245 } 2246 else 2247 apic_last_hrtime = temp; 2248 2249 lock_clear(&apic_gethrtime_lock); 2250 intr_restore(oflags); 2251 2252 return (temp); 2253 } 2254 2255 /* apic NMI handler */ 2256 /*ARGSUSED*/ 2257 static void 2258 apic_nmi_intr(caddr_t arg) 2259 { 2260 if (apic_shutdown_processors) { 2261 apic_disable_local_apic(); 2262 return; 2263 } 2264 2265 if (lock_try(&apic_nmi_lock)) { 2266 if (apic_kmdb_on_nmi) { 2267 if (psm_debugger() == 0) { 2268 cmn_err(CE_PANIC, 2269 "NMI detected, kmdb is not available."); 2270 } else { 2271 debug_enter("\nNMI detected, entering kmdb.\n"); 2272 } 2273 } else { 2274 if (apic_panic_on_nmi) { 2275 /* Keep panic from entering kmdb. */ 2276 nopanicdebug = 1; 2277 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2278 } else { 2279 /* 2280 * prom_printf is the best shot we have 2281 * of something which is problem free from 2282 * high level/NMI type of interrupts 2283 */ 2284 prom_printf("pcplusmp: NMI received\n"); 2285 apic_error |= APIC_ERR_NMI; 2286 apic_num_nmis++; 2287 } 2288 } 2289 lock_clear(&apic_nmi_lock); 2290 } 2291 } 2292 2293 /* 2294 * Add mask bits to disable interrupt vector from happening 2295 * at or above IPL. In addition, it should remove mask bits 2296 * to enable interrupt vectors below the given IPL. 2297 * 2298 * Both add and delspl are complicated by the fact that different interrupts 2299 * may share IRQs. This can happen in two ways. 2300 * 1. The same H/W line is shared by more than 1 device 2301 * 1a. with interrupts at different IPLs 2302 * 1b. with interrupts at same IPL 2303 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2304 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2305 * will get called often when no interrupt is pending for the device. 2306 * For 1a, we just hope that the machine blows up with the person who 2307 * set it up that way!. In the meantime, we handle it at the higher IPL. 2308 */ 2309 /*ARGSUSED*/ 2310 static int 2311 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2312 { 2313 uchar_t vector; 2314 int iflag; 2315 apic_irq_t *irqptr, *irqheadptr; 2316 int irqindex; 2317 2318 ASSERT(max_ipl <= UCHAR_MAX); 2319 irqindex = IRQINDEX(irqno); 2320 2321 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2322 return (PSM_FAILURE); 2323 2324 irqptr = irqheadptr = apic_irq_table[irqindex]; 2325 2326 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2327 "vector=0x%x\n", (void *)irqptr->airq_dip, 2328 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2329 2330 while (irqptr) { 2331 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2332 break; 2333 irqptr = irqptr->airq_next; 2334 } 2335 irqptr->airq_share++; 2336 2337 /* return if it is not hardware interrupt */ 2338 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2339 return (PSM_SUCCESS); 2340 2341 /* Or if there are more interupts at a higher IPL */ 2342 if (ipl != max_ipl) 2343 return (PSM_SUCCESS); 2344 2345 /* 2346 * if apic_picinit() has not been called yet, just return. 2347 * At the end of apic_picinit(), we will call setup_io_intr(). 2348 */ 2349 2350 if (!apic_flag) 2351 return (PSM_SUCCESS); 2352 2353 iflag = intr_clear(); 2354 2355 /* 2356 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2357 * return failure. Not very elegant, but then we hope the 2358 * machine will blow up with ... 2359 */ 2360 if (irqptr->airq_ipl != max_ipl) { 2361 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2362 if (vector == 0) { 2363 intr_restore(iflag); 2364 irqptr->airq_share--; 2365 return (PSM_FAILURE); 2366 } 2367 irqptr = irqheadptr; 2368 apic_mark_vector(irqptr->airq_vector, vector); 2369 while (irqptr) { 2370 irqptr->airq_vector = vector; 2371 irqptr->airq_ipl = (uchar_t)max_ipl; 2372 /* 2373 * reprogram irq being added and every one else 2374 * who is not in the UNINIT state 2375 */ 2376 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2377 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2378 apic_record_rdt_entry(irqptr, irqindex); 2379 (void) apic_setup_io_intr(irqptr, irqindex); 2380 } 2381 irqptr = irqptr->airq_next; 2382 } 2383 intr_restore(iflag); 2384 return (PSM_SUCCESS); 2385 } 2386 2387 ASSERT(irqptr); 2388 (void) apic_setup_io_intr(irqptr, irqindex); 2389 intr_restore(iflag); 2390 return (PSM_SUCCESS); 2391 } 2392 2393 /* 2394 * Recompute mask bits for the given interrupt vector. 2395 * If there is no interrupt servicing routine for this 2396 * vector, this function should disable interrupt vector 2397 * from happening at all IPLs. If there are still 2398 * handlers using the given vector, this function should 2399 * disable the given vector from happening below the lowest 2400 * IPL of the remaining hadlers. 2401 */ 2402 /*ARGSUSED*/ 2403 static int 2404 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2405 { 2406 uchar_t vector, bind_cpu; 2407 int iflag, intin, irqindex; 2408 volatile int32_t *ioapic; 2409 apic_irq_t *irqptr, *irqheadptr; 2410 2411 irqindex = IRQINDEX(irqno); 2412 irqptr = irqheadptr = apic_irq_table[irqindex]; 2413 2414 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2415 "vector=0x%x\n", (void *)irqptr->airq_dip, 2416 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2417 2418 while (irqptr) { 2419 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2420 break; 2421 irqptr = irqptr->airq_next; 2422 } 2423 ASSERT(irqptr); 2424 2425 irqptr->airq_share--; 2426 2427 if (ipl < max_ipl) 2428 return (PSM_SUCCESS); 2429 2430 /* return if it is not hardware interrupt */ 2431 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2432 return (PSM_SUCCESS); 2433 2434 if (!apic_flag) { 2435 /* 2436 * Clear irq_struct. If two devices shared an intpt 2437 * line & 1 unloaded before picinit, we are hosed. But, then 2438 * we hope the machine will ... 2439 */ 2440 irqptr->airq_mps_intr_index = FREE_INDEX; 2441 irqptr->airq_temp_cpu = IRQ_UNINIT; 2442 apic_free_vector(irqptr->airq_vector); 2443 return (PSM_SUCCESS); 2444 } 2445 /* 2446 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2447 * use old IPL. Not very elegant, but then we hope ... 2448 */ 2449 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2450 apic_irq_t *irqp; 2451 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2452 apic_mark_vector(irqheadptr->airq_vector, vector); 2453 irqp = irqheadptr; 2454 while (irqp) { 2455 irqp->airq_vector = vector; 2456 irqp->airq_ipl = (uchar_t)max_ipl; 2457 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2458 apic_record_rdt_entry(irqp, irqindex); 2459 (void) apic_setup_io_intr(irqp, 2460 irqindex); 2461 } 2462 irqp = irqp->airq_next; 2463 } 2464 } 2465 } 2466 2467 if (irqptr->airq_share) 2468 return (PSM_SUCCESS); 2469 2470 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2471 intin = irqptr->airq_intin_no; 2472 iflag = intr_clear(); 2473 lock_set(&apic_ioapic_lock); 2474 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2475 ioapic[APIC_IO_DATA] = AV_MASK; 2476 2477 /* Disable the MSI/X vector */ 2478 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2479 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2480 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2481 2482 /* 2483 * Make sure we only disable on the last 2484 * of the multi-MSI support 2485 */ 2486 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2487 (void) apic_pci_msi_unconfigure(irqptr->airq_dip, 2488 type, irqptr->airq_ioapicindex); 2489 (void) apic_pci_msi_disable_mode(irqptr->airq_dip, 2490 type, irqptr->airq_ioapicindex); 2491 } 2492 } 2493 2494 if (max_ipl == PSM_INVALID_IPL) { 2495 ASSERT(irqheadptr == irqptr); 2496 bind_cpu = irqptr->airq_temp_cpu; 2497 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2498 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2499 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2500 if (bind_cpu & IRQ_USER_BOUND) { 2501 /* If hardbound, temp_cpu == cpu */ 2502 bind_cpu &= ~IRQ_USER_BOUND; 2503 apic_cpus[bind_cpu].aci_bound--; 2504 } else 2505 apic_cpus[bind_cpu].aci_temp_bound--; 2506 } 2507 lock_clear(&apic_ioapic_lock); 2508 intr_restore(iflag); 2509 irqptr->airq_temp_cpu = IRQ_UNINIT; 2510 irqptr->airq_mps_intr_index = FREE_INDEX; 2511 apic_free_vector(irqptr->airq_vector); 2512 return (PSM_SUCCESS); 2513 } 2514 lock_clear(&apic_ioapic_lock); 2515 intr_restore(iflag); 2516 2517 mutex_enter(&airq_mutex); 2518 if ((irqptr == apic_irq_table[irqindex])) { 2519 apic_irq_t *oldirqptr; 2520 /* Move valid irq entry to the head */ 2521 irqheadptr = oldirqptr = irqptr; 2522 irqptr = irqptr->airq_next; 2523 ASSERT(irqptr); 2524 while (irqptr) { 2525 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2526 break; 2527 oldirqptr = irqptr; 2528 irqptr = irqptr->airq_next; 2529 } 2530 /* remove all invalid ones from the beginning */ 2531 apic_irq_table[irqindex] = irqptr; 2532 /* 2533 * and link them back after the head. The invalid ones 2534 * begin with irqheadptr and end at oldirqptr 2535 */ 2536 oldirqptr->airq_next = irqptr->airq_next; 2537 irqptr->airq_next = irqheadptr; 2538 } 2539 mutex_exit(&airq_mutex); 2540 2541 irqptr->airq_temp_cpu = IRQ_UNINIT; 2542 irqptr->airq_mps_intr_index = FREE_INDEX; 2543 return (PSM_SUCCESS); 2544 } 2545 2546 /* 2547 * Return HW interrupt number corresponding to the given IPL 2548 */ 2549 /*ARGSUSED*/ 2550 static int 2551 apic_softlvl_to_irq(int ipl) 2552 { 2553 /* 2554 * Do not use apic to trigger soft interrupt. 2555 * It will cause the system to hang when 2 hardware interrupts 2556 * at the same priority with the softint are already accepted 2557 * by the apic. Cause the AV_PENDING bit will not be cleared 2558 * until one of the hardware interrupt is eoi'ed. If we need 2559 * to send an ipi at this time, we will end up looping forever 2560 * to wait for the AV_PENDING bit to clear. 2561 */ 2562 return (PSM_SV_SOFTWARE); 2563 } 2564 2565 static int 2566 apic_post_cpu_start() 2567 { 2568 int i, cpun; 2569 apic_irq_t *irq_ptr; 2570 2571 apic_init_intr(); 2572 2573 /* 2574 * since some systems don't enable the internal cache on the non-boot 2575 * cpus, so we have to enable them here 2576 */ 2577 setcr0(getcr0() & ~(0x60000000)); 2578 2579 while (get_apic_cmd1() & AV_PENDING) 2580 apic_ret(); 2581 2582 cpun = psm_get_cpu_id(); 2583 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2584 2585 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2586 irq_ptr = apic_irq_table[i]; 2587 if ((irq_ptr == NULL) || 2588 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2589 continue; 2590 2591 while (irq_ptr) { 2592 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) 2593 (void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE); 2594 irq_ptr = irq_ptr->airq_next; 2595 } 2596 } 2597 2598 return (PSM_SUCCESS); 2599 } 2600 2601 processorid_t 2602 apic_get_next_processorid(processorid_t cpu_id) 2603 { 2604 2605 int i; 2606 2607 if (cpu_id == -1) 2608 return ((processorid_t)0); 2609 2610 for (i = cpu_id + 1; i < NCPU; i++) { 2611 if (CPU_IN_SET(apic_cpumask, i)) 2612 return (i); 2613 } 2614 2615 return ((processorid_t)-1); 2616 } 2617 2618 2619 /* 2620 * type == -1 indicates it is an internal request. Do not change 2621 * resv_vector for these requests 2622 */ 2623 static int 2624 apic_get_ipivect(int ipl, int type) 2625 { 2626 uchar_t vector; 2627 int irq; 2628 2629 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2630 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2631 apic_irq_table[irq]->airq_mps_intr_index = 2632 RESERVE_INDEX; 2633 apic_irq_table[irq]->airq_vector = vector; 2634 if (type != -1) { 2635 apic_resv_vector[ipl] = vector; 2636 } 2637 return (irq); 2638 } 2639 } 2640 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2641 return (-1); /* shouldn't happen */ 2642 } 2643 2644 static int 2645 apic_getclkirq(int ipl) 2646 { 2647 int irq; 2648 2649 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2650 return (-1); 2651 /* 2652 * Note the vector in apic_clkvect for per clock handling. 2653 */ 2654 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2655 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2656 apic_clkvect)); 2657 return (irq); 2658 } 2659 2660 /* 2661 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2662 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2663 */ 2664 static uint_t 2665 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2666 { 2667 uint8_t pit_tick_lo; 2668 uint16_t pit_tick, target_pit_tick; 2669 uint32_t start_apic_tick, end_apic_tick; 2670 int iflag; 2671 2672 addr += APIC_CURR_COUNT; 2673 2674 iflag = intr_clear(); 2675 2676 do { 2677 pit_tick_lo = inb(PITCTR0_PORT); 2678 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2679 } while (pit_tick < APIC_TIME_MIN || 2680 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2681 2682 /* 2683 * Wait for the 8254 to decrement by 5 ticks to ensure 2684 * we didn't start in the middle of a tick. 2685 * Compare with 0x10 for the wrap around case. 2686 */ 2687 target_pit_tick = pit_tick - 5; 2688 do { 2689 pit_tick_lo = inb(PITCTR0_PORT); 2690 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2691 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2692 2693 start_apic_tick = *addr; 2694 2695 /* 2696 * Wait for the 8254 to decrement by 2697 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2698 */ 2699 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2700 do { 2701 pit_tick_lo = inb(PITCTR0_PORT); 2702 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2703 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2704 2705 end_apic_tick = *addr; 2706 2707 *pit_ticks_adj = target_pit_tick - pit_tick; 2708 2709 intr_restore(iflag); 2710 2711 return (start_apic_tick - end_apic_tick); 2712 } 2713 2714 /* 2715 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2716 * frequency. Note at this stage in the boot sequence, the boot processor 2717 * is the only active processor. 2718 * hertz value of 0 indicates a one-shot mode request. In this case 2719 * the function returns the resolution (in nanoseconds) for the hardware 2720 * timer interrupt. If one-shot mode capability is not available, 2721 * the return value will be 0. apic_enable_oneshot is a global switch 2722 * for disabling the functionality. 2723 * A non-zero positive value for hertz indicates a periodic mode request. 2724 * In this case the hardware will be programmed to generate clock interrupts 2725 * at hertz frequency and returns the resolution of interrupts in 2726 * nanosecond. 2727 */ 2728 2729 static int 2730 apic_clkinit(int hertz) 2731 { 2732 2733 uint_t apic_ticks = 0; 2734 uint_t pit_time; 2735 int ret; 2736 uint16_t pit_ticks_adj; 2737 static int firsttime = 1; 2738 2739 if (firsttime) { 2740 /* first time calibrate */ 2741 2742 apicadr[APIC_DIVIDE_REG] = 0x0; 2743 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; 2744 2745 /* set periodic interrupt based on CLKIN */ 2746 apicadr[APIC_LOCAL_TIMER] = 2747 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2748 tenmicrosec(); 2749 2750 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2751 2752 apicadr[APIC_LOCAL_TIMER] = 2753 (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 2754 /* 2755 * pit time is the amount of real time (in nanoseconds ) it took 2756 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks 2757 */ 2758 pit_time = ((longlong_t)(APIC_TIME_COUNT + 2759 pit_ticks_adj) * NANOSEC) / PIT_HZ; 2760 2761 /* 2762 * Determine the number of nanoseconds per APIC clock tick 2763 * and then determine how many APIC ticks to interrupt at the 2764 * desired frequency 2765 */ 2766 apic_nsec_per_tick = pit_time / apic_ticks; 2767 if (apic_nsec_per_tick == 0) 2768 apic_nsec_per_tick = 1; 2769 2770 /* the interval timer initial count is 32 bit max */ 2771 apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL; 2772 firsttime = 0; 2773 } 2774 2775 if (hertz != 0) { 2776 /* periodic */ 2777 apic_nsec_per_intr = NANOSEC / hertz; 2778 apic_hertz_count = (longlong_t)apic_nsec_per_intr / 2779 apic_nsec_per_tick; 2780 apic_sample_factor_redistribution = hertz + 1; 2781 } 2782 2783 apic_int_busy_mark = (apic_int_busy_mark * 2784 apic_sample_factor_redistribution) / 100; 2785 apic_int_free_mark = (apic_int_free_mark * 2786 apic_sample_factor_redistribution) / 100; 2787 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2788 apic_sample_factor_redistribution) / 100; 2789 2790 if (hertz == 0) { 2791 /* requested one_shot */ 2792 if (!apic_oneshot_enable) 2793 return (0); 2794 apic_oneshot = 1; 2795 ret = (int)apic_nsec_per_tick; 2796 } else { 2797 /* program the local APIC to interrupt at the given frequency */ 2798 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2799 apicadr[APIC_LOCAL_TIMER] = 2800 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2801 apic_oneshot = 0; 2802 ret = NANOSEC / hertz; 2803 } 2804 2805 return (ret); 2806 2807 } 2808 2809 /* 2810 * apic_preshutdown: 2811 * Called early in shutdown whilst we can still access filesystems to do 2812 * things like loading modules which will be required to complete shutdown 2813 * after filesystems are all unmounted. 2814 */ 2815 static void 2816 apic_preshutdown(int cmd, int fcn) 2817 { 2818 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2819 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2820 2821 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2822 return; 2823 } 2824 } 2825 2826 static void 2827 apic_shutdown(int cmd, int fcn) 2828 { 2829 int iflag, restarts, attempts; 2830 int i, j; 2831 volatile int32_t *ioapic; 2832 uchar_t byte; 2833 2834 /* Send NMI to all CPUs except self to do per processor shutdown */ 2835 iflag = intr_clear(); 2836 while (get_apic_cmd1() & AV_PENDING) 2837 apic_ret(); 2838 apic_shutdown_processors = 1; 2839 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2840 2841 /* restore cmos shutdown byte before reboot */ 2842 if (apic_cmos_ssb_set) { 2843 outb(CMOS_ADDR, SSB); 2844 outb(CMOS_DATA, 0); 2845 } 2846 /* Disable the I/O APIC redirection entries */ 2847 for (j = 0; j < apic_io_max; j++) { 2848 int intin_max; 2849 ioapic = apicioadr[j]; 2850 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2851 /* Bits 23-16 define the maximum redirection entries */ 2852 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2853 for (i = 0; i < intin_max; i++) { 2854 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2855 ioapic[APIC_IO_DATA] = AV_MASK; 2856 } 2857 } 2858 2859 /* disable apic mode if imcr present */ 2860 if (apic_imcrp) { 2861 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2862 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2863 } 2864 2865 apic_disable_local_apic(); 2866 2867 intr_restore(iflag); 2868 2869 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2870 return; 2871 } 2872 2873 switch (apic_poweroff_method) { 2874 case APIC_POWEROFF_VIA_RTC: 2875 2876 /* select the extended NVRAM bank in the RTC */ 2877 outb(CMOS_ADDR, RTC_REGA); 2878 byte = inb(CMOS_DATA); 2879 outb(CMOS_DATA, (byte | EXT_BANK)); 2880 2881 outb(CMOS_ADDR, PFR_REG); 2882 2883 /* for Predator must toggle the PAB bit */ 2884 byte = inb(CMOS_DATA); 2885 2886 /* 2887 * clear power active bar, wakeup alarm and 2888 * kickstart 2889 */ 2890 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2891 outb(CMOS_DATA, byte); 2892 2893 /* delay before next write */ 2894 drv_usecwait(1000); 2895 2896 /* for S40 the following would suffice */ 2897 byte = inb(CMOS_DATA); 2898 2899 /* power active bar control bit */ 2900 byte |= PAB_CBIT; 2901 outb(CMOS_DATA, byte); 2902 2903 break; 2904 2905 case APIC_POWEROFF_VIA_ASPEN_BMC: 2906 restarts = 0; 2907 restart_aspen_bmc: 2908 if (++restarts == 3) 2909 break; 2910 attempts = 0; 2911 do { 2912 byte = inb(MISMIC_FLAG_REGISTER); 2913 byte &= MISMIC_BUSY_MASK; 2914 if (byte != 0) { 2915 drv_usecwait(1000); 2916 if (attempts >= 3) 2917 goto restart_aspen_bmc; 2918 ++attempts; 2919 } 2920 } while (byte != 0); 2921 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2922 byte = inb(MISMIC_FLAG_REGISTER); 2923 byte |= 0x1; 2924 outb(MISMIC_FLAG_REGISTER, byte); 2925 i = 0; 2926 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2927 i++) { 2928 attempts = 0; 2929 do { 2930 byte = inb(MISMIC_FLAG_REGISTER); 2931 byte &= MISMIC_BUSY_MASK; 2932 if (byte != 0) { 2933 drv_usecwait(1000); 2934 if (attempts >= 3) 2935 goto restart_aspen_bmc; 2936 ++attempts; 2937 } 2938 } while (byte != 0); 2939 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 2940 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 2941 byte = inb(MISMIC_FLAG_REGISTER); 2942 byte |= 0x1; 2943 outb(MISMIC_FLAG_REGISTER, byte); 2944 } 2945 break; 2946 2947 case APIC_POWEROFF_VIA_SITKA_BMC: 2948 restarts = 0; 2949 restart_sitka_bmc: 2950 if (++restarts == 3) 2951 break; 2952 attempts = 0; 2953 do { 2954 byte = inb(SMS_STATUS_REGISTER); 2955 byte &= SMS_STATE_MASK; 2956 if ((byte == SMS_READ_STATE) || 2957 (byte == SMS_WRITE_STATE)) { 2958 drv_usecwait(1000); 2959 if (attempts >= 3) 2960 goto restart_sitka_bmc; 2961 ++attempts; 2962 } 2963 } while ((byte == SMS_READ_STATE) || 2964 (byte == SMS_WRITE_STATE)); 2965 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 2966 i = 0; 2967 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 2968 i++) { 2969 attempts = 0; 2970 do { 2971 byte = inb(SMS_STATUS_REGISTER); 2972 byte &= SMS_IBF_MASK; 2973 if (byte != 0) { 2974 drv_usecwait(1000); 2975 if (attempts >= 3) 2976 goto restart_sitka_bmc; 2977 ++attempts; 2978 } 2979 } while (byte != 0); 2980 outb(sitka_bmc[i].port, sitka_bmc[i].data); 2981 } 2982 break; 2983 2984 case APIC_POWEROFF_NONE: 2985 2986 /* If no APIC direct method, we will try using ACPI */ 2987 if (apic_enable_acpi) { 2988 if (acpi_poweroff() == 1) 2989 return; 2990 } else 2991 return; 2992 2993 break; 2994 } 2995 /* 2996 * Wait a limited time here for power to go off. 2997 * If the power does not go off, then there was a 2998 * problem and we should continue to the halt which 2999 * prints a message for the user to press a key to 3000 * reboot. 3001 */ 3002 drv_usecwait(7000000); /* wait seven seconds */ 3003 3004 } 3005 3006 /* 3007 * Try and disable all interrupts. We just assign interrupts to other 3008 * processors based on policy. If any were bound by user request, we 3009 * let them continue and return failure. We do not bother to check 3010 * for cache affinity while rebinding. 3011 */ 3012 3013 static int 3014 apic_disable_intr(processorid_t cpun) 3015 { 3016 int bind_cpu = 0, i, hardbound = 0, iflag; 3017 apic_irq_t *irq_ptr; 3018 3019 iflag = intr_clear(); 3020 lock_set(&apic_ioapic_lock); 3021 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 3022 lock_clear(&apic_ioapic_lock); 3023 intr_restore(iflag); 3024 apic_cpus[cpun].aci_curipl = 0; 3025 i = apic_min_device_irq; 3026 for (; i <= apic_max_device_irq; i++) { 3027 /* 3028 * If there are bound interrupts on this cpu, then 3029 * rebind them to other processors. 3030 */ 3031 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3032 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3033 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3034 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3035 apic_nproc)); 3036 3037 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3038 hardbound = 1; 3039 continue; 3040 } 3041 3042 if (irq_ptr->airq_temp_cpu == cpun) { 3043 do { 3044 apic_next_bind_cpu += 2; 3045 bind_cpu = apic_next_bind_cpu / 2; 3046 if (bind_cpu >= apic_nproc) { 3047 apic_next_bind_cpu = 1; 3048 bind_cpu = 0; 3049 3050 } 3051 } while (apic_rebind_all(irq_ptr, bind_cpu, 1)); 3052 } 3053 } 3054 } 3055 if (hardbound) { 3056 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3057 "due to user bound interrupts", cpun); 3058 return (PSM_FAILURE); 3059 } 3060 else 3061 return (PSM_SUCCESS); 3062 } 3063 3064 static void 3065 apic_enable_intr(processorid_t cpun) 3066 { 3067 int i, iflag; 3068 apic_irq_t *irq_ptr; 3069 3070 iflag = intr_clear(); 3071 lock_set(&apic_ioapic_lock); 3072 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3073 lock_clear(&apic_ioapic_lock); 3074 intr_restore(iflag); 3075 3076 i = apic_min_device_irq; 3077 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3078 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3079 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3080 (void) apic_rebind_all(irq_ptr, 3081 irq_ptr->airq_cpu, 1); 3082 } 3083 } 3084 } 3085 } 3086 3087 /* 3088 * apic_introp_xlate() replaces apic_translate_irq() and is 3089 * called only from apic_intr_ops(). With the new ADII framework, 3090 * the priority can no longer be retrived through i_ddi_get_intrspec(). 3091 * It has to be passed in from the caller. 3092 */ 3093 int 3094 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3095 { 3096 char dev_type[16]; 3097 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3098 int irqno = ispec->intrspec_vec; 3099 ddi_acc_handle_t cfg_handle; 3100 uchar_t ipin; 3101 struct apic_io_intr *intrp; 3102 iflag_t intr_flag; 3103 APIC_HEADER *hp; 3104 MADT_INTERRUPT_OVERRIDE *isop; 3105 apic_irq_t *airqp; 3106 3107 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3108 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3109 irqno)); 3110 3111 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3112 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) 3113 return (apic_vector_to_irq[airqp->airq_vector]); 3114 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3115 NULL, type)); 3116 } 3117 3118 bustype = 0; 3119 3120 /* check if we have already translated this irq */ 3121 mutex_enter(&airq_mutex); 3122 newirq = apic_min_device_irq; 3123 for (; newirq <= apic_max_device_irq; newirq++) { 3124 airqp = apic_irq_table[newirq]; 3125 while (airqp) { 3126 if ((airqp->airq_dip == dip) && 3127 (airqp->airq_origirq == irqno) && 3128 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3129 3130 mutex_exit(&airq_mutex); 3131 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3132 } 3133 airqp = airqp->airq_next; 3134 } 3135 } 3136 mutex_exit(&airq_mutex); 3137 3138 if (apic_defconf) 3139 goto defconf; 3140 3141 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3142 goto nonpci; 3143 3144 dev_len = sizeof (dev_type); 3145 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 3146 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3147 &dev_len) != DDI_PROP_SUCCESS) { 3148 goto nonpci; 3149 } 3150 3151 if ((strcmp(dev_type, "pci") == 0) || 3152 (strcmp(dev_type, "pciex") == 0)) { 3153 /* pci device */ 3154 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3155 goto nonpci; 3156 if (busid == 0 && apic_pci_bus_total == 1) 3157 busid = (int)apic_single_pci_busid; 3158 3159 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3160 goto nonpci; 3161 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3162 pci_config_teardown(&cfg_handle); 3163 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3164 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3165 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3166 goto nonpci; 3167 3168 intr_flag.bustype = BUS_PCI; 3169 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3170 ispec, &intr_flag, type)) == -1) 3171 goto nonpci; 3172 return (newirq); 3173 } else { 3174 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3175 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3176 == NULL) { 3177 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3178 devid, ipin, &intrp)) == -1) 3179 goto nonpci; 3180 } 3181 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3182 ispec, NULL, type)) == -1) 3183 goto nonpci; 3184 return (newirq); 3185 } 3186 } else if (strcmp(dev_type, "isa") == 0) 3187 bustype = BUS_ISA; 3188 else if (strcmp(dev_type, "eisa") == 0) 3189 bustype = BUS_EISA; 3190 3191 nonpci: 3192 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3193 /* search iso entries first */ 3194 if (acpi_iso_cnt != 0) { 3195 hp = (APIC_HEADER *)acpi_isop; 3196 i = 0; 3197 while (i < acpi_iso_cnt) { 3198 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3199 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3200 if (isop->Bus == 0 && 3201 isop->Source == irqno) { 3202 newirq = isop->Interrupt; 3203 intr_flag.intr_po = 3204 isop->Polarity; 3205 intr_flag.intr_el = 3206 isop->TriggerMode; 3207 intr_flag.bustype = BUS_ISA; 3208 3209 return (apic_setup_irq_table( 3210 dip, newirq, NULL, ispec, 3211 &intr_flag, type)); 3212 3213 } 3214 i++; 3215 } 3216 hp = (APIC_HEADER *)(((char *)hp) + 3217 hp->Length); 3218 } 3219 } 3220 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3221 intr_flag.intr_el = INTR_EL_EDGE; 3222 intr_flag.bustype = BUS_ISA; 3223 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3224 &intr_flag, type)); 3225 } else { 3226 if (bustype == 0) 3227 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3228 for (i = 0; i < 2; i++) { 3229 if (((busid = apic_find_bus_id(bustype)) != -1) && 3230 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3231 != NULL)) { 3232 if ((newirq = apic_setup_irq_table(dip, irqno, 3233 intrp, ispec, NULL, type)) != -1) { 3234 return (newirq); 3235 } 3236 goto defconf; 3237 } 3238 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3239 } 3240 } 3241 3242 /* MPS default configuration */ 3243 defconf: 3244 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3245 if (newirq == -1) 3246 return (newirq); 3247 ASSERT(IRQINDEX(newirq) == irqno); 3248 ASSERT(apic_irq_table[irqno]); 3249 return (newirq); 3250 } 3251 3252 3253 3254 3255 3256 3257 /* 3258 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3259 * needs special handling. We may need to chase up the device tree, 3260 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3261 * to find the IPIN at the root bus that relates to the IPIN on the 3262 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3263 * in the MP table or the ACPI namespace for this device itself. 3264 * We handle both cases in the search below. 3265 */ 3266 /* this is the non-acpi version */ 3267 static int 3268 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3269 struct apic_io_intr **intrp) 3270 { 3271 dev_info_t *dipp, *dip; 3272 int pci_irq; 3273 ddi_acc_handle_t cfg_handle; 3274 int bridge_devno, bridge_bus; 3275 int ipin; 3276 3277 dip = idip; 3278 3279 /*CONSTCOND*/ 3280 while (1) { 3281 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3282 return (-1); 3283 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3284 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3285 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3286 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3287 pci_config_teardown(&cfg_handle); 3288 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3289 NULL) != 0) 3290 return (-1); 3291 /* 3292 * This is the rotating scheme that Compaq is using 3293 * and documented in the pci to pci spec. Also, if 3294 * the pci to pci bridge is behind another pci to 3295 * pci bridge, then it need to keep transversing 3296 * up until an interrupt entry is found or reach 3297 * the top of the tree 3298 */ 3299 ipin = (child_devno + child_ipin) % PCI_INTD; 3300 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3301 bridge_bus = (int)apic_single_pci_busid; 3302 pci_irq = ((bridge_devno & 0x1f) << 2) | 3303 (ipin & 0x3); 3304 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3305 bridge_bus)) != NULL) { 3306 return (pci_irq); 3307 } 3308 dip = dipp; 3309 child_devno = bridge_devno; 3310 child_ipin = ipin; 3311 } else 3312 return (-1); 3313 } 3314 /*LINTED: function will not fall off the bottom */ 3315 } 3316 3317 3318 3319 3320 static uchar_t 3321 acpi_find_ioapic(int irq) 3322 { 3323 int i; 3324 3325 for (i = 0; i < apic_io_max; i++) { 3326 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3327 return (i); 3328 } 3329 return (0xFF); /* shouldn't happen */ 3330 } 3331 3332 /* 3333 * See if two irqs are compatible for sharing a vector. 3334 * Currently we only support sharing of PCI devices. 3335 */ 3336 static int 3337 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3338 { 3339 uint_t level1, po1; 3340 uint_t level2, po2; 3341 3342 /* Assume active high by default */ 3343 po1 = 0; 3344 po2 = 0; 3345 3346 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3347 return (0); 3348 3349 if (iflag1.intr_el == INTR_EL_CONFORM) 3350 level1 = AV_LEVEL; 3351 else 3352 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3353 3354 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3355 (iflag1.intr_po == INTR_PO_CONFORM))) 3356 po1 = AV_ACTIVE_LOW; 3357 3358 if (iflag2.intr_el == INTR_EL_CONFORM) 3359 level2 = AV_LEVEL; 3360 else 3361 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3362 3363 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3364 (iflag2.intr_po == INTR_PO_CONFORM))) 3365 po2 = AV_ACTIVE_LOW; 3366 3367 if ((level1 == level2) && (po1 == po2)) 3368 return (1); 3369 3370 return (0); 3371 } 3372 3373 /* 3374 * Attempt to share vector with someone else 3375 */ 3376 static int 3377 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3378 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3379 { 3380 #ifdef DEBUG 3381 apic_irq_t *tmpirqp = NULL; 3382 #endif /* DEBUG */ 3383 apic_irq_t *irqptr, dummyirq; 3384 int newirq, chosen_irq = -1, share = 127; 3385 int lowest, highest, i; 3386 uchar_t share_id; 3387 3388 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3389 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3390 3391 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3392 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3393 3394 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3395 lowest -= APIC_VECTOR_PER_IPL; 3396 dummyirq.airq_mps_intr_index = intr_index; 3397 dummyirq.airq_ioapicindex = ioapicindex; 3398 dummyirq.airq_intin_no = ipin; 3399 if (intr_flagp) 3400 dummyirq.airq_iflag = *intr_flagp; 3401 apic_record_rdt_entry(&dummyirq, irqno); 3402 for (i = lowest; i <= highest; i++) { 3403 newirq = apic_vector_to_irq[i]; 3404 if (newirq == APIC_RESV_IRQ) 3405 continue; 3406 irqptr = apic_irq_table[newirq]; 3407 3408 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3409 (irqptr->airq_rdt_entry & 0xFF00)) 3410 /* not compatible */ 3411 continue; 3412 3413 if (irqptr->airq_share < share) { 3414 share = irqptr->airq_share; 3415 chosen_irq = newirq; 3416 } 3417 } 3418 if (chosen_irq != -1) { 3419 /* 3420 * Assign a share id which is free or which is larger 3421 * than the largest one. 3422 */ 3423 share_id = 1; 3424 mutex_enter(&airq_mutex); 3425 irqptr = apic_irq_table[chosen_irq]; 3426 while (irqptr) { 3427 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3428 share_id = irqptr->airq_share_id; 3429 break; 3430 } 3431 if (share_id <= irqptr->airq_share_id) 3432 share_id = irqptr->airq_share_id + 1; 3433 #ifdef DEBUG 3434 tmpirqp = irqptr; 3435 #endif /* DEBUG */ 3436 irqptr = irqptr->airq_next; 3437 } 3438 if (!irqptr) { 3439 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3440 irqptr->airq_temp_cpu = IRQ_UNINIT; 3441 irqptr->airq_next = 3442 apic_irq_table[chosen_irq]->airq_next; 3443 apic_irq_table[chosen_irq]->airq_next = irqptr; 3444 #ifdef DEBUG 3445 tmpirqp = apic_irq_table[chosen_irq]; 3446 #endif /* DEBUG */ 3447 } 3448 irqptr->airq_mps_intr_index = intr_index; 3449 irqptr->airq_ioapicindex = ioapicindex; 3450 irqptr->airq_intin_no = ipin; 3451 if (intr_flagp) 3452 irqptr->airq_iflag = *intr_flagp; 3453 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3454 irqptr->airq_share_id = share_id; 3455 apic_record_rdt_entry(irqptr, irqno); 3456 *irqptrp = irqptr; 3457 #ifdef DEBUG 3458 /* shuffle the pointers to test apic_delspl path */ 3459 if (tmpirqp) { 3460 tmpirqp->airq_next = irqptr->airq_next; 3461 irqptr->airq_next = apic_irq_table[chosen_irq]; 3462 apic_irq_table[chosen_irq] = irqptr; 3463 } 3464 #endif /* DEBUG */ 3465 mutex_exit(&airq_mutex); 3466 return (VIRTIRQ(chosen_irq, share_id)); 3467 } 3468 return (-1); 3469 } 3470 3471 /* 3472 * 3473 */ 3474 static int 3475 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3476 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3477 { 3478 int origirq = ispec->intrspec_vec; 3479 uchar_t ipl = ispec->intrspec_pri; 3480 int newirq, intr_index; 3481 uchar_t ipin, ioapic, ioapicindex, vector; 3482 apic_irq_t *irqptr; 3483 major_t major; 3484 dev_info_t *sdip; 3485 3486 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3487 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3488 3489 ASSERT(ispec != NULL); 3490 3491 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3492 3493 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3494 /* MSI/X doesn't need to setup ioapic stuffs */ 3495 ioapicindex = 0xff; 3496 ioapic = 0xff; 3497 ipin = (uchar_t)0xff; 3498 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3499 MSIX_INDEX; 3500 mutex_enter(&airq_mutex); 3501 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 3502 mutex_exit(&airq_mutex); 3503 /* need an irq for MSI/X to index into autovect[] */ 3504 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3505 ddi_get_name(dip), ddi_get_instance(dip)); 3506 return (-1); 3507 } 3508 mutex_exit(&airq_mutex); 3509 3510 } else if (intrp != NULL) { 3511 intr_index = (int)(intrp - apic_io_intrp); 3512 ioapic = intrp->intr_destid; 3513 ipin = intrp->intr_destintin; 3514 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3515 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3516 if (apic_io_id[ioapicindex] == ioapic) 3517 break; 3518 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3519 (ioapic == INTR_ALL_APIC)); 3520 3521 /* check whether this intin# has been used by another irqno */ 3522 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3523 return (newirq); 3524 } 3525 3526 } else if (intr_flagp != NULL) { 3527 /* ACPI case */ 3528 intr_index = ACPI_INDEX; 3529 ioapicindex = acpi_find_ioapic(irqno); 3530 ASSERT(ioapicindex != 0xFF); 3531 ioapic = apic_io_id[ioapicindex]; 3532 ipin = irqno - apic_io_vectbase[ioapicindex]; 3533 if (apic_irq_table[irqno] && 3534 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3535 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3536 apic_irq_table[irqno]->airq_ioapicindex == 3537 ioapicindex); 3538 return (irqno); 3539 } 3540 3541 } else { 3542 /* default configuration */ 3543 ioapicindex = 0; 3544 ioapic = apic_io_id[ioapicindex]; 3545 ipin = (uchar_t)irqno; 3546 intr_index = DEFAULT_INDEX; 3547 } 3548 3549 if (ispec == NULL) { 3550 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3551 irqno)); 3552 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3553 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3554 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3555 irqptr->airq_ipl = ipl; 3556 irqptr->airq_origirq = (uchar_t)origirq; 3557 irqptr->airq_dip = dip; 3558 irqptr->airq_major = major; 3559 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3560 /* This is OK to do really */ 3561 if (sdip == NULL) { 3562 cmn_err(CE_WARN, "Sharing vectors: %s" 3563 " instance %d and SCI", 3564 ddi_get_name(dip), ddi_get_instance(dip)); 3565 } else { 3566 cmn_err(CE_WARN, "Sharing vectors: %s" 3567 " instance %d and %s instance %d", 3568 ddi_get_name(sdip), ddi_get_instance(sdip), 3569 ddi_get_name(dip), ddi_get_instance(dip)); 3570 } 3571 return (newirq); 3572 } 3573 /* try high priority allocation now that share has failed */ 3574 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3575 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3576 ddi_get_name(dip), ddi_get_instance(dip)); 3577 return (-1); 3578 } 3579 } 3580 3581 mutex_enter(&airq_mutex); 3582 if (apic_irq_table[irqno] == NULL) { 3583 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3584 irqptr->airq_temp_cpu = IRQ_UNINIT; 3585 apic_irq_table[irqno] = irqptr; 3586 } else { 3587 irqptr = apic_irq_table[irqno]; 3588 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3589 /* 3590 * The slot is used by another irqno, so allocate 3591 * a free irqno for this interrupt 3592 */ 3593 newirq = apic_allocate_irq(apic_first_avail_irq); 3594 if (newirq == -1) { 3595 mutex_exit(&airq_mutex); 3596 return (-1); 3597 } 3598 irqno = newirq; 3599 irqptr = apic_irq_table[irqno]; 3600 if (irqptr == NULL) { 3601 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3602 KM_SLEEP); 3603 irqptr->airq_temp_cpu = IRQ_UNINIT; 3604 apic_irq_table[irqno] = irqptr; 3605 } 3606 apic_modify_vector(vector, newirq); 3607 } 3608 } 3609 apic_max_device_irq = max(irqno, apic_max_device_irq); 3610 apic_min_device_irq = min(irqno, apic_min_device_irq); 3611 mutex_exit(&airq_mutex); 3612 irqptr->airq_ioapicindex = ioapicindex; 3613 irqptr->airq_intin_no = ipin; 3614 irqptr->airq_ipl = ipl; 3615 irqptr->airq_vector = vector; 3616 irqptr->airq_origirq = (uchar_t)origirq; 3617 irqptr->airq_share_id = 0; 3618 irqptr->airq_mps_intr_index = (short)intr_index; 3619 irqptr->airq_dip = dip; 3620 irqptr->airq_major = major; 3621 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3622 if (intr_flagp) 3623 irqptr->airq_iflag = *intr_flagp; 3624 3625 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3626 /* setup I/O APIC entry for non-MSI/X interrupts */ 3627 apic_record_rdt_entry(irqptr, irqno); 3628 } 3629 return (irqno); 3630 } 3631 3632 /* 3633 * return the cpu to which this intr should be bound. 3634 * Check properties or any other mechanism to see if user wants it 3635 * bound to a specific CPU. If so, return the cpu id with high bit set. 3636 * If not, use the policy to choose a cpu and return the id. 3637 */ 3638 uchar_t 3639 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3640 { 3641 int instance, instno, prop_len, bind_cpu, count; 3642 uint_t i, rc; 3643 uchar_t cpu; 3644 major_t major; 3645 char *name, *drv_name, *prop_val, *cptr; 3646 char prop_name[32]; 3647 3648 3649 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3650 return (IRQ_UNBOUND); 3651 3652 drv_name = NULL; 3653 rc = DDI_PROP_NOT_FOUND; 3654 major = (major_t)-1; 3655 if (dip != NULL) { 3656 name = ddi_get_name(dip); 3657 major = ddi_name_to_major(name); 3658 drv_name = ddi_major_to_name(major); 3659 instance = ddi_get_instance(dip); 3660 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3661 i = apic_min_device_irq; 3662 for (; i <= apic_max_device_irq; i++) { 3663 3664 if ((i == irq) || (apic_irq_table[i] == NULL) || 3665 (apic_irq_table[i]->airq_mps_intr_index 3666 == FREE_INDEX)) 3667 continue; 3668 3669 if ((apic_irq_table[i]->airq_major == major) && 3670 (!(apic_irq_table[i]->airq_cpu & 3671 IRQ_USER_BOUND))) { 3672 3673 cpu = apic_irq_table[i]->airq_cpu; 3674 3675 cmn_err(CE_CONT, 3676 "!pcplusmp: %s (%s) instance #%d " 3677 "vector 0x%x ioapic 0x%x " 3678 "intin 0x%x is bound to cpu %d\n", 3679 name, drv_name, instance, irq, 3680 ioapicid, intin, cpu); 3681 return (cpu); 3682 } 3683 } 3684 } 3685 /* 3686 * search for "drvname"_intpt_bind_cpus property first, the 3687 * syntax of the property should be "a[,b,c,...]" where 3688 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3689 * instance 3 binds to cpu c... 3690 * ddi_getlongprop() will search /option first, then / 3691 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3692 * intpt_bind_cpus property. The syntax is the same, and 3693 * it applies to all the devices if its "drvname" specific 3694 * property doesn't exist 3695 */ 3696 (void) strcpy(prop_name, drv_name); 3697 (void) strcat(prop_name, "_intpt_bind_cpus"); 3698 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3699 (caddr_t)&prop_val, &prop_len); 3700 if (rc != DDI_PROP_SUCCESS) { 3701 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3702 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3703 } 3704 } 3705 if (rc == DDI_PROP_SUCCESS) { 3706 for (i = count = 0; i < (prop_len - 1); i++) 3707 if (prop_val[i] == ',') 3708 count++; 3709 if (prop_val[i-1] != ',') 3710 count++; 3711 /* 3712 * if somehow the binding instances defined in the 3713 * property are not enough for this instno., then 3714 * reuse the pattern for the next instance until 3715 * it reaches the requested instno 3716 */ 3717 instno = instance % count; 3718 i = 0; 3719 cptr = prop_val; 3720 while (i < instno) 3721 if (*cptr++ == ',') 3722 i++; 3723 bind_cpu = stoi(&cptr); 3724 kmem_free(prop_val, prop_len); 3725 /* if specific cpu is bogus, then default to cpu 0 */ 3726 if (bind_cpu >= apic_nproc) { 3727 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3728 prop_name, prop_val, bind_cpu); 3729 bind_cpu = 0; 3730 } else { 3731 /* indicate that we are bound at user request */ 3732 bind_cpu |= IRQ_USER_BOUND; 3733 } 3734 /* 3735 * no need to check apic_cpus[].aci_status, if specific cpu is 3736 * not up, then post_cpu_start will handle it. 3737 */ 3738 } else { 3739 bind_cpu = apic_next_bind_cpu++; 3740 if (bind_cpu >= apic_nproc) { 3741 apic_next_bind_cpu = 1; 3742 bind_cpu = 0; 3743 } 3744 } 3745 if (drv_name != NULL) 3746 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3747 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3748 name, drv_name, instance, 3749 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3750 else 3751 cmn_err(CE_CONT, "!pcplusmp: " 3752 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3753 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3754 3755 return ((uchar_t)bind_cpu); 3756 } 3757 3758 static struct apic_io_intr * 3759 apic_find_io_intr_w_busid(int irqno, int busid) 3760 { 3761 struct apic_io_intr *intrp; 3762 3763 /* 3764 * It can have more than 1 entry with same source bus IRQ, 3765 * but unique with the source bus id 3766 */ 3767 intrp = apic_io_intrp; 3768 if (intrp != NULL) { 3769 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3770 if (intrp->intr_irq == irqno && 3771 intrp->intr_busid == busid && 3772 intrp->intr_type == IO_INTR_INT) 3773 return (intrp); 3774 intrp++; 3775 } 3776 } 3777 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3778 "busid %x:%x\n", irqno, busid)); 3779 return ((struct apic_io_intr *)NULL); 3780 } 3781 3782 3783 struct mps_bus_info { 3784 char *bus_name; 3785 int bus_id; 3786 } bus_info_array[] = { 3787 "ISA ", BUS_ISA, 3788 "PCI ", BUS_PCI, 3789 "EISA ", BUS_EISA, 3790 "XPRESS", BUS_XPRESS, 3791 "PCMCIA", BUS_PCMCIA, 3792 "VL ", BUS_VL, 3793 "CBUS ", BUS_CBUS, 3794 "CBUSII", BUS_CBUSII, 3795 "FUTURE", BUS_FUTURE, 3796 "INTERN", BUS_INTERN, 3797 "MBI ", BUS_MBI, 3798 "MBII ", BUS_MBII, 3799 "MPI ", BUS_MPI, 3800 "MPSA ", BUS_MPSA, 3801 "NUBUS ", BUS_NUBUS, 3802 "TC ", BUS_TC, 3803 "VME ", BUS_VME 3804 }; 3805 3806 static int 3807 apic_find_bus_type(char *bus) 3808 { 3809 int i = 0; 3810 3811 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3812 if (strncmp(bus, bus_info_array[i].bus_name, 3813 strlen(bus_info_array[i].bus_name)) == 0) 3814 return (bus_info_array[i].bus_id); 3815 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3816 return (0); 3817 } 3818 3819 static int 3820 apic_find_bus(int busid) 3821 { 3822 struct apic_bus *busp; 3823 3824 busp = apic_busp; 3825 while (busp->bus_entry == APIC_BUS_ENTRY) { 3826 if (busp->bus_id == busid) 3827 return (apic_find_bus_type((char *)&busp->bus_str1)); 3828 busp++; 3829 } 3830 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3831 return (0); 3832 } 3833 3834 static int 3835 apic_find_bus_id(int bustype) 3836 { 3837 struct apic_bus *busp; 3838 3839 busp = apic_busp; 3840 while (busp->bus_entry == APIC_BUS_ENTRY) { 3841 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3842 return (busp->bus_id); 3843 busp++; 3844 } 3845 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3846 bustype)); 3847 return (-1); 3848 } 3849 3850 /* 3851 * Check if a particular irq need to be reserved for any io_intr 3852 */ 3853 static struct apic_io_intr * 3854 apic_find_io_intr(int irqno) 3855 { 3856 struct apic_io_intr *intrp; 3857 3858 intrp = apic_io_intrp; 3859 if (intrp != NULL) { 3860 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3861 if (intrp->intr_irq == irqno && 3862 intrp->intr_type == IO_INTR_INT) 3863 return (intrp); 3864 intrp++; 3865 } 3866 } 3867 return ((struct apic_io_intr *)NULL); 3868 } 3869 3870 /* 3871 * Check if the given ioapicindex intin combination has already been assigned 3872 * an irq. If so return irqno. Else -1 3873 */ 3874 static int 3875 apic_find_intin(uchar_t ioapic, uchar_t intin) 3876 { 3877 apic_irq_t *irqptr; 3878 int i; 3879 3880 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3881 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3882 irqptr = apic_irq_table[i]; 3883 while (irqptr) { 3884 if ((irqptr->airq_mps_intr_index >= 0) && 3885 (irqptr->airq_intin_no == intin) && 3886 (irqptr->airq_ioapicindex == ioapic)) { 3887 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3888 "entry for ioapic:intin %x:%x " 3889 "shared interrupts ?", ioapic, intin)); 3890 return (i); 3891 } 3892 irqptr = irqptr->airq_next; 3893 } 3894 } 3895 return (-1); 3896 } 3897 3898 int 3899 apic_allocate_irq(int irq) 3900 { 3901 int freeirq, i; 3902 3903 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 3904 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 3905 (irq - 1))) == -1) { 3906 /* 3907 * if BIOS really defines every single irq in the mps 3908 * table, then don't worry about conflicting with 3909 * them, just use any free slot in apic_irq_table 3910 */ 3911 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 3912 if ((apic_irq_table[i] == NULL) || 3913 apic_irq_table[i]->airq_mps_intr_index == 3914 FREE_INDEX) { 3915 freeirq = i; 3916 break; 3917 } 3918 } 3919 if (freeirq == -1) { 3920 /* This shouldn't happen, but just in case */ 3921 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 3922 return (-1); 3923 } 3924 } 3925 if (apic_irq_table[freeirq] == NULL) { 3926 apic_irq_table[freeirq] = 3927 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 3928 if (apic_irq_table[freeirq] == NULL) { 3929 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 3930 return (-1); 3931 } 3932 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 3933 } 3934 return (freeirq); 3935 } 3936 3937 static int 3938 apic_find_free_irq(int start, int end) 3939 { 3940 int i; 3941 3942 for (i = start; i <= end; i++) 3943 /* Check if any I/O entry needs this IRQ */ 3944 if (apic_find_io_intr(i) == NULL) { 3945 /* Then see if it is free */ 3946 if ((apic_irq_table[i] == NULL) || 3947 (apic_irq_table[i]->airq_mps_intr_index == 3948 FREE_INDEX)) { 3949 return (i); 3950 } 3951 } 3952 return (-1); 3953 } 3954 3955 /* 3956 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 3957 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 3958 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 3959 * requests and allocated only when pri is set. 3960 */ 3961 static uchar_t 3962 apic_allocate_vector(int ipl, int irq, int pri) 3963 { 3964 int lowest, highest, i; 3965 3966 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3967 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 3968 3969 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 3970 lowest -= APIC_VECTOR_PER_IPL; 3971 3972 #ifdef DEBUG 3973 if (apic_restrict_vector) /* for testing shared interrupt logic */ 3974 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 3975 #endif /* DEBUG */ 3976 if (pri == 0) 3977 highest -= APIC_HI_PRI_VECTS; 3978 3979 for (i = lowest; i < highest; i++) { 3980 if (APIC_CHECK_RESERVE_VECTORS(i)) 3981 continue; 3982 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 3983 apic_vector_to_irq[i] = (uchar_t)irq; 3984 return (i); 3985 } 3986 } 3987 3988 return (0); 3989 } 3990 3991 static void 3992 apic_modify_vector(uchar_t vector, int irq) 3993 { 3994 apic_vector_to_irq[vector] = (uchar_t)irq; 3995 } 3996 3997 /* 3998 * Mark vector as being in the process of being deleted. Interrupts 3999 * may still come in on some CPU. The moment an interrupt comes with 4000 * the new vector, we know we can free the old one. Called only from 4001 * addspl and delspl with interrupts disabled. Because an interrupt 4002 * can be shared, but no interrupt from either device may come in, 4003 * we also use a timeout mechanism, which we arbitrarily set to 4004 * apic_revector_timeout microseconds. 4005 */ 4006 static void 4007 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4008 { 4009 int iflag = intr_clear(); 4010 lock_set(&apic_revector_lock); 4011 if (!apic_oldvec_to_newvec) { 4012 apic_oldvec_to_newvec = 4013 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4014 KM_NOSLEEP); 4015 4016 if (!apic_oldvec_to_newvec) { 4017 /* 4018 * This failure is not catastrophic. 4019 * But, the oldvec will never be freed. 4020 */ 4021 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4022 lock_clear(&apic_revector_lock); 4023 intr_restore(iflag); 4024 return; 4025 } 4026 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4027 } 4028 4029 /* See if we already did this for drivers which do double addintrs */ 4030 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4031 apic_oldvec_to_newvec[oldvector] = newvector; 4032 apic_newvec_to_oldvec[newvector] = oldvector; 4033 apic_revector_pending++; 4034 } 4035 lock_clear(&apic_revector_lock); 4036 intr_restore(iflag); 4037 (void) timeout(apic_xlate_vector_free_timeout_handler, 4038 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4039 } 4040 4041 /* 4042 * xlate_vector is called from intr_enter if revector_pending is set. 4043 * It will xlate it if needed and mark the old vector as free. 4044 */ 4045 static uchar_t 4046 apic_xlate_vector(uchar_t vector) 4047 { 4048 uchar_t newvector, oldvector = 0; 4049 4050 lock_set(&apic_revector_lock); 4051 /* Do we really need to do this ? */ 4052 if (!apic_revector_pending) { 4053 lock_clear(&apic_revector_lock); 4054 return (vector); 4055 } 4056 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4057 oldvector = vector; 4058 else { 4059 /* 4060 * The incoming vector is new . See if a stale entry is 4061 * remaining 4062 */ 4063 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4064 newvector = vector; 4065 } 4066 4067 if (oldvector) { 4068 apic_revector_pending--; 4069 apic_oldvec_to_newvec[oldvector] = 0; 4070 apic_newvec_to_oldvec[newvector] = 0; 4071 apic_free_vector(oldvector); 4072 lock_clear(&apic_revector_lock); 4073 /* There could have been more than one reprogramming! */ 4074 return (apic_xlate_vector(newvector)); 4075 } 4076 lock_clear(&apic_revector_lock); 4077 return (vector); 4078 } 4079 4080 void 4081 apic_xlate_vector_free_timeout_handler(void *arg) 4082 { 4083 int iflag; 4084 uchar_t oldvector, newvector; 4085 4086 oldvector = (uchar_t)(uintptr_t)arg; 4087 iflag = intr_clear(); 4088 lock_set(&apic_revector_lock); 4089 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4090 apic_free_vector(oldvector); 4091 apic_oldvec_to_newvec[oldvector] = 0; 4092 apic_newvec_to_oldvec[newvector] = 0; 4093 apic_revector_pending--; 4094 } 4095 4096 lock_clear(&apic_revector_lock); 4097 intr_restore(iflag); 4098 } 4099 4100 4101 /* Mark vector as not being used by any irq */ 4102 static void 4103 apic_free_vector(uchar_t vector) 4104 { 4105 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4106 } 4107 4108 /* 4109 * compute the polarity, trigger mode and vector for programming into 4110 * the I/O apic and record in airq_rdt_entry. 4111 */ 4112 static void 4113 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4114 { 4115 int ioapicindex, bus_type, vector; 4116 short intr_index; 4117 uint_t level, po, io_po; 4118 struct apic_io_intr *iointrp; 4119 4120 intr_index = irqptr->airq_mps_intr_index; 4121 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4122 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4123 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4124 4125 if (intr_index == RESERVE_INDEX) { 4126 apic_error |= APIC_ERR_INVALID_INDEX; 4127 return; 4128 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4129 return; 4130 } 4131 4132 vector = irqptr->airq_vector; 4133 ioapicindex = irqptr->airq_ioapicindex; 4134 /* Assume edge triggered by default */ 4135 level = 0; 4136 /* Assume active high by default */ 4137 po = 0; 4138 4139 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4140 ASSERT(irq < 16); 4141 if (eisa_level_intr_mask & (1 << irq)) 4142 level = AV_LEVEL; 4143 if (intr_index == FREE_INDEX && apic_defconf == 0) 4144 apic_error |= APIC_ERR_INVALID_INDEX; 4145 } else if (intr_index == ACPI_INDEX) { 4146 bus_type = irqptr->airq_iflag.bustype; 4147 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4148 if (bus_type == BUS_PCI) 4149 level = AV_LEVEL; 4150 } else 4151 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4152 AV_LEVEL : 0; 4153 if (level && 4154 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4155 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4156 bus_type == BUS_PCI))) 4157 po = AV_ACTIVE_LOW; 4158 } else { 4159 iointrp = apic_io_intrp + intr_index; 4160 bus_type = apic_find_bus(iointrp->intr_busid); 4161 if (iointrp->intr_el == INTR_EL_CONFORM) { 4162 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4163 level = AV_LEVEL; 4164 else if (bus_type == BUS_PCI) 4165 level = AV_LEVEL; 4166 } else 4167 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4168 AV_LEVEL : 0; 4169 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4170 (iointrp->intr_po == INTR_PO_CONFORM && 4171 bus_type == BUS_PCI))) 4172 po = AV_ACTIVE_LOW; 4173 } 4174 if (level) 4175 apic_level_intr[irq] = 1; 4176 /* 4177 * The 82489DX External APIC cannot do active low polarity interrupts. 4178 */ 4179 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4180 io_po = po; 4181 else 4182 io_po = 0; 4183 4184 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4185 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4186 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4187 4188 irqptr->airq_rdt_entry = level|io_po|vector; 4189 } 4190 4191 /* 4192 * Call rebind to do the actual programming. 4193 */ 4194 static int 4195 apic_setup_io_intr(apic_irq_t *irqptr, int irq) 4196 { 4197 int rv; 4198 4199 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4200 IMMEDIATE)) 4201 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4202 rv = apic_rebind(irqptr, 0, 1, IMMEDIATE); 4203 4204 return (rv); 4205 } 4206 4207 /* 4208 * Deferred reprogramming: Call apic_rebind to do the real work. 4209 */ 4210 static int 4211 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq) 4212 { 4213 int rv; 4214 4215 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4216 DEFERRED)) 4217 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4218 rv = apic_rebind(irqptr, 0, 1, DEFERRED); 4219 4220 return (rv); 4221 } 4222 4223 /* 4224 * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock 4225 * if false (0) means lock is already held (e.g: in rebind_all). 4226 */ 4227 static int 4228 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when) 4229 { 4230 int intin_no; 4231 volatile int32_t *ioapic; 4232 uchar_t airq_temp_cpu; 4233 apic_cpus_info_t *cpu_infop; 4234 int iflag; 4235 int which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4236 boolean_t restore_iflag = B_TRUE; 4237 4238 intin_no = irq_ptr->airq_intin_no; 4239 ioapic = apicioadr[irq_ptr->airq_ioapicindex]; 4240 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4241 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4242 if (airq_temp_cpu & IRQ_USER_BOUND) 4243 /* Mask off high bit so it can be used as array index */ 4244 airq_temp_cpu &= ~IRQ_USER_BOUND; 4245 4246 ASSERT(airq_temp_cpu < apic_nproc); 4247 } 4248 4249 iflag = intr_clear(); 4250 4251 if (acquire_lock) 4252 lock_set(&apic_ioapic_lock); 4253 4254 /* 4255 * Can't bind to a CPU that's not online: 4256 */ 4257 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4258 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) { 4259 4260 if (acquire_lock) 4261 lock_clear(&apic_ioapic_lock); 4262 4263 intr_restore(iflag); 4264 return (1); 4265 } 4266 4267 /* 4268 * If this is a deferred reprogramming attempt, ensure we have 4269 * not been passed stale data: 4270 */ 4271 if ((when == DEFERRED) && 4272 (apic_reprogram_info[which_irq].valid == 0)) { 4273 /* stale info, so just return */ 4274 if (acquire_lock) 4275 lock_clear(&apic_ioapic_lock); 4276 4277 intr_restore(iflag); 4278 return (0); 4279 } 4280 4281 /* 4282 * If this interrupt has been delivered to a CPU and that CPU 4283 * has not handled it yet, we cannot reprogram the IOAPIC now: 4284 */ 4285 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) && 4286 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu, 4287 ioapic, intin_no, which_irq, iflag, &restore_iflag) != 0) { 4288 4289 if (acquire_lock) 4290 lock_clear(&apic_ioapic_lock); 4291 4292 if (restore_iflag) 4293 intr_restore(iflag); 4294 return (0); 4295 } 4296 4297 /* 4298 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4299 * come in before we have a chance to reprogram it below. The 4300 * reprogramming below will simultaneously change and unmask the 4301 * RDT entry. 4302 */ 4303 4304 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4305 /* Write the RDT entry -- no specific CPU binding */ 4306 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4307 4308 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4309 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4310 4311 /* Write the vector, trigger, and polarity portion of the RDT */ 4312 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4313 AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry); 4314 if (acquire_lock) 4315 lock_clear(&apic_ioapic_lock); 4316 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4317 intr_restore(iflag); 4318 return (0); 4319 } 4320 4321 if (bind_cpu & IRQ_USER_BOUND) { 4322 cpu_infop->aci_bound++; 4323 } else { 4324 cpu_infop->aci_temp_bound++; 4325 } 4326 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4327 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4328 /* Write the RDT entry -- bind to a specific CPU: */ 4329 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4330 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4331 } 4332 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4333 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4334 } 4335 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4336 /* Write the vector, trigger, and polarity portion of the RDT */ 4337 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4338 AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry); 4339 } else { 4340 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4341 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 4342 (void) apic_pci_msi_disable_mode(irq_ptr->airq_dip, type, 4343 irq_ptr->airq_ioapicindex); 4344 if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) { 4345 /* first one */ 4346 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4347 "apic_pci_msi_enable_vector\n")); 4348 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type, 4349 which_irq, irq_ptr->airq_vector, 4350 irq_ptr->airq_intin_no, 4351 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4352 cmn_err(CE_WARN, "pcplusmp: " 4353 "apic_pci_msi_enable_vector " 4354 "returned PSM_FAILURE"); 4355 } 4356 } 4357 if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) == 4358 irq_ptr->airq_origirq) { /* last one */ 4359 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4360 "pci_msi_enable_mode\n")); 4361 if (apic_pci_msi_enable_mode(irq_ptr->airq_dip, 4362 type, which_irq) != PSM_SUCCESS) { 4363 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4364 "pci_msi_enable failed\n")); 4365 (void) apic_pci_msi_unconfigure( 4366 irq_ptr->airq_dip, type, which_irq); 4367 } 4368 } 4369 } 4370 if (acquire_lock) 4371 lock_clear(&apic_ioapic_lock); 4372 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4373 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4374 intr_restore(iflag); 4375 return (0); 4376 } 4377 4378 /* 4379 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4380 * bit set. Sets up a timeout to perform the reprogramming at a later time 4381 * if it cannot wait for the Remote IRR bit to clear (or if waiting did not 4382 * result in the bit's clearing). 4383 * 4384 * This function will mask the RDT entry if the Remote IRR bit is set. 4385 * 4386 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4387 */ 4388 static int 4389 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4390 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq, 4391 int iflag, boolean_t *intr_restorep) 4392 { 4393 int32_t rdt_entry; 4394 int waited; 4395 4396 /* Mask the RDT entry, but only if it's a level-triggered interrupt */ 4397 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4398 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4399 4400 /* Mask it */ 4401 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4402 AV_MASK | rdt_entry); 4403 } 4404 4405 /* 4406 * Wait for the delivery pending bit to clear. 4407 */ 4408 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4409 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4410 4411 /* 4412 * If we're still waiting on the delivery of this interrupt, 4413 * continue to wait here until it is delivered (this should be 4414 * a very small amount of time, but include a timeout just in 4415 * case). 4416 */ 4417 for (waited = 0; waited < apic_max_usecs_clear_pending; 4418 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4419 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4420 & AV_PENDING) == 0) { 4421 break; 4422 } 4423 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4424 } 4425 4426 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4427 AV_PENDING) != 0) { 4428 cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not " 4429 "deliver interrupt to local APIC within " 4430 "%d usecs.", irq_ptr->airq_ioapicindex, 4431 irq_ptr->airq_intin_no, 4432 apic_max_usecs_clear_pending); 4433 } 4434 } 4435 4436 /* 4437 * If the remote IRR bit is set, then the interrupt has been sent 4438 * to a CPU for processing. We have no choice but to wait for 4439 * that CPU to process the interrupt, at which point the remote IRR 4440 * bit will be cleared. 4441 */ 4442 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4443 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4444 4445 /* 4446 * If the CPU that this RDT is bound to is NOT the current 4447 * CPU, wait until that CPU handles the interrupt and ACKs 4448 * it. If this interrupt is not bound to any CPU (that is, 4449 * if it's bound to the logical destination of "anyone"), it 4450 * may have been delivered to the current CPU so handle that 4451 * case by deferring the reprogramming (below). 4452 */ 4453 kpreempt_disable(); 4454 if ((old_bind_cpu != IRQ_UNBOUND) && 4455 (old_bind_cpu != IRQ_UNINIT) && 4456 (old_bind_cpu != psm_get_cpu_id())) { 4457 for (waited = 0; waited < apic_max_usecs_clear_pending; 4458 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4459 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4460 intin_no) & AV_REMOTE_IRR) == 0) { 4461 4462 /* Clear the reprogramming state: */ 4463 lock_set(&apic_ioapic_reprogram_lock); 4464 4465 apic_reprogram_info[which_irq].valid 4466 = 0; 4467 apic_reprogram_info[which_irq].bindcpu 4468 = 0; 4469 apic_reprogram_info[which_irq].timeouts 4470 = 0; 4471 4472 lock_clear(&apic_ioapic_reprogram_lock); 4473 4474 /* Remote IRR has cleared! */ 4475 kpreempt_enable(); 4476 return (0); 4477 } 4478 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4479 } 4480 } 4481 kpreempt_enable(); 4482 4483 /* 4484 * If we waited and the Remote IRR bit is still not cleared, 4485 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4486 * times for this interrupt, try the last-ditch workarounds: 4487 */ 4488 if (apic_reprogram_info[which_irq].timeouts >= 4489 APIC_REPROGRAM_MAX_TIMEOUTS) { 4490 4491 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4492 & AV_REMOTE_IRR) != 0) { 4493 /* 4494 * Trying to clear the bit through normal 4495 * channels has failed. So as a last-ditch 4496 * effort, try to set the trigger mode to 4497 * edge, then to level. This has been 4498 * observed to work on many systems. 4499 */ 4500 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4501 intin_no, 4502 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4503 intin_no) & ~AV_LEVEL); 4504 4505 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4506 intin_no, 4507 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4508 intin_no) | AV_LEVEL); 4509 4510 /* 4511 * If the bit's STILL set, declare total and 4512 * utter failure 4513 */ 4514 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4515 intin_no) & AV_REMOTE_IRR) != 0) { 4516 cmn_err(CE_WARN, "!IOAPIC %d intin %d: " 4517 "Remote IRR failed to reset " 4518 "within %d usecs. Interrupts to " 4519 "this pin may cease to function.", 4520 irq_ptr->airq_ioapicindex, 4521 irq_ptr->airq_intin_no, 4522 apic_max_usecs_clear_pending); 4523 } 4524 } 4525 /* Clear the reprogramming state: */ 4526 lock_set(&apic_ioapic_reprogram_lock); 4527 4528 apic_reprogram_info[which_irq].valid = 0; 4529 apic_reprogram_info[which_irq].bindcpu = 0; 4530 apic_reprogram_info[which_irq].timeouts = 0; 4531 4532 lock_clear(&apic_ioapic_reprogram_lock); 4533 } else { 4534 #ifdef DEBUG 4535 cmn_err(CE_WARN, "Deferring reprogramming of irq %d", 4536 which_irq); 4537 #endif /* DEBUG */ 4538 /* 4539 * If waiting for the Remote IRR bit (above) didn't 4540 * allow it to clear, defer the reprogramming: 4541 */ 4542 lock_set(&apic_ioapic_reprogram_lock); 4543 4544 apic_reprogram_info[which_irq].valid = 1; 4545 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4546 apic_reprogram_info[which_irq].timeouts++; 4547 4548 lock_clear(&apic_ioapic_reprogram_lock); 4549 4550 *intr_restorep = B_FALSE; 4551 intr_restore(iflag); 4552 4553 /* Fire up a timeout to handle this later */ 4554 (void) timeout(apic_reprogram_timeout_handler, 4555 (void *) 0, 4556 drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY)); 4557 4558 /* Inform caller to defer IOAPIC programming: */ 4559 return (1); 4560 } 4561 } 4562 return (0); 4563 } 4564 4565 /* 4566 * Timeout handler that performs the APIC reprogramming 4567 */ 4568 /*ARGSUSED*/ 4569 static void 4570 apic_reprogram_timeout_handler(void *arg) 4571 { 4572 /*LINTED: set but not used in function*/ 4573 int i, result; 4574 4575 /* Serialize access to this function */ 4576 mutex_enter(&apic_reprogram_timeout_mutex); 4577 4578 /* 4579 * For each entry in the reprogramming state that's valid, 4580 * try the reprogramming again: 4581 */ 4582 for (i = 0; i < APIC_MAX_VECTOR; i++) { 4583 if (apic_reprogram_info[i].valid == 0) 4584 continue; 4585 /* 4586 * Though we can't really do anything about errors 4587 * at this point, keep track of them for reporting. 4588 * Note that it is very possible for apic_setup_io_intr 4589 * to re-register this very timeout if the Remote IRR bit 4590 * has not yet cleared. 4591 */ 4592 result = apic_setup_io_intr_deferred(apic_irq_table[i], i); 4593 4594 #ifdef DEBUG 4595 if (result) 4596 cmn_err(CE_WARN, "apic_reprogram_timeout: " 4597 "apic_setup_io_intr returned nonzero for " 4598 "irq=%d!", i); 4599 #endif /* DEBUG */ 4600 } 4601 4602 mutex_exit(&apic_reprogram_timeout_mutex); 4603 } 4604 4605 4606 /* 4607 * Called to migrate all interrupts at an irq to another cpu. safe 4608 * if true means we are not being called from an interrupt 4609 * context and hence it is safe to do a lock_set. If false 4610 * do only a lock_try and return failure ( non 0 ) if we cannot get it 4611 */ 4612 int 4613 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe) 4614 { 4615 apic_irq_t *irqptr = irq_ptr; 4616 int retval = 0; 4617 int iflag; 4618 4619 iflag = intr_clear(); 4620 if (!safe) { 4621 if (lock_try(&apic_ioapic_lock) == 0) { 4622 intr_restore(iflag); 4623 return (1); 4624 } 4625 } else 4626 lock_set(&apic_ioapic_lock); 4627 4628 while (irqptr) { 4629 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4630 retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE); 4631 irqptr = irqptr->airq_next; 4632 } 4633 lock_clear(&apic_ioapic_lock); 4634 intr_restore(iflag); 4635 return (retval); 4636 } 4637 4638 /* 4639 * apic_intr_redistribute does all the messy computations for identifying 4640 * which interrupt to move to which CPU. Currently we do just one interrupt 4641 * at a time. This reduces the time we spent doing all this within clock 4642 * interrupt. When it is done in idle, we could do more than 1. 4643 * First we find the most busy and the most free CPU (time in ISR only) 4644 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4645 * Then we look for IRQs which are closest to the difference between the 4646 * most busy CPU and the average ISR load. We try to find one whose load 4647 * is less than difference.If none exists, then we chose one larger than the 4648 * difference, provided it does not make the most idle CPU worse than the 4649 * most busy one. In the end, we clear all the busy fields for CPUs. For 4650 * IRQs, they are cleared as they are scanned. 4651 */ 4652 static void 4653 apic_intr_redistribute() 4654 { 4655 int busiest_cpu, most_free_cpu; 4656 int cpu_free, cpu_busy, max_busy, min_busy; 4657 int min_free, diff; 4658 int average_busy, cpus_online; 4659 int i, busy; 4660 apic_cpus_info_t *cpu_infop; 4661 apic_irq_t *min_busy_irq = NULL; 4662 apic_irq_t *max_busy_irq = NULL; 4663 4664 busiest_cpu = most_free_cpu = -1; 4665 cpu_free = cpu_busy = max_busy = average_busy = 0; 4666 min_free = apic_sample_factor_redistribution; 4667 cpus_online = 0; 4668 /* 4669 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4670 * without ioapic_lock. That is OK as we are just doing statistical 4671 * sampling anyway and any inaccuracy now will get corrected next time 4672 * The call to rebind which actually changes things will make sure 4673 * we are consistent. 4674 */ 4675 for (i = 0; i < apic_nproc; i++) { 4676 if (!(apic_redist_cpu_skip & (1 << i)) && 4677 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4678 4679 cpu_infop = &apic_cpus[i]; 4680 /* 4681 * If no unbound interrupts or only 1 total on this 4682 * CPU, skip 4683 */ 4684 if (!cpu_infop->aci_temp_bound || 4685 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4686 == 1) { 4687 apic_redist_cpu_skip |= 1 << i; 4688 continue; 4689 } 4690 4691 busy = cpu_infop->aci_busy; 4692 average_busy += busy; 4693 cpus_online++; 4694 if (max_busy < busy) { 4695 max_busy = busy; 4696 busiest_cpu = i; 4697 } 4698 if (min_free > busy) { 4699 min_free = busy; 4700 most_free_cpu = i; 4701 } 4702 if (busy > apic_int_busy_mark) { 4703 cpu_busy |= 1 << i; 4704 } else { 4705 if (busy < apic_int_free_mark) 4706 cpu_free |= 1 << i; 4707 } 4708 } 4709 } 4710 if ((cpu_busy && cpu_free) || 4711 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4712 4713 apic_num_imbalance++; 4714 #ifdef DEBUG 4715 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4716 prom_printf( 4717 "redistribute busy=%x free=%x max=%x min=%x", 4718 cpu_busy, cpu_free, max_busy, min_free); 4719 } 4720 #endif /* DEBUG */ 4721 4722 4723 average_busy /= cpus_online; 4724 4725 diff = max_busy - average_busy; 4726 min_busy = max_busy; /* start with the max possible value */ 4727 max_busy = 0; 4728 min_busy_irq = max_busy_irq = NULL; 4729 i = apic_min_device_irq; 4730 for (; i < apic_max_device_irq; i++) { 4731 apic_irq_t *irq_ptr; 4732 /* Change to linked list per CPU ? */ 4733 if ((irq_ptr = apic_irq_table[i]) == NULL) 4734 continue; 4735 /* Check for irq_busy & decide which one to move */ 4736 /* Also zero them for next round */ 4737 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4738 irq_ptr->airq_busy) { 4739 if (irq_ptr->airq_busy < diff) { 4740 /* 4741 * Check for least busy CPU, 4742 * best fit or what ? 4743 */ 4744 if (max_busy < irq_ptr->airq_busy) { 4745 /* 4746 * Most busy within the 4747 * required differential 4748 */ 4749 max_busy = irq_ptr->airq_busy; 4750 max_busy_irq = irq_ptr; 4751 } 4752 } else { 4753 if (min_busy > irq_ptr->airq_busy) { 4754 /* 4755 * least busy, but more than 4756 * the reqd diff 4757 */ 4758 if (min_busy < 4759 (diff + average_busy - 4760 min_free)) { 4761 /* 4762 * Making sure new cpu 4763 * will not end up 4764 * worse 4765 */ 4766 min_busy = 4767 irq_ptr->airq_busy; 4768 4769 min_busy_irq = irq_ptr; 4770 } 4771 } 4772 } 4773 } 4774 irq_ptr->airq_busy = 0; 4775 } 4776 4777 if (max_busy_irq != NULL) { 4778 #ifdef DEBUG 4779 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4780 prom_printf("rebinding %x to %x", 4781 max_busy_irq->airq_vector, most_free_cpu); 4782 } 4783 #endif /* DEBUG */ 4784 if (apic_rebind_all(max_busy_irq, most_free_cpu, 0) 4785 == 0) 4786 /* Make change permenant */ 4787 max_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4788 } else if (min_busy_irq != NULL) { 4789 #ifdef DEBUG 4790 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4791 prom_printf("rebinding %x to %x", 4792 min_busy_irq->airq_vector, most_free_cpu); 4793 } 4794 #endif /* DEBUG */ 4795 4796 if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) == 4797 0) 4798 /* Make change permenant */ 4799 min_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4800 } else { 4801 if (cpu_busy != (1 << busiest_cpu)) { 4802 apic_redist_cpu_skip |= 1 << busiest_cpu; 4803 /* 4804 * We leave cpu_skip set so that next time we 4805 * can choose another cpu 4806 */ 4807 } 4808 } 4809 apic_num_rebind++; 4810 } else { 4811 /* 4812 * found nothing. Could be that we skipped over valid CPUs 4813 * or we have balanced everything. If we had a variable 4814 * ticks_for_redistribution, it could be increased here. 4815 * apic_int_busy, int_free etc would also need to be 4816 * changed. 4817 */ 4818 if (apic_redist_cpu_skip) 4819 apic_redist_cpu_skip = 0; 4820 } 4821 for (i = 0; i < apic_nproc; i++) { 4822 apic_cpus[i].aci_busy = 0; 4823 } 4824 } 4825 4826 static void 4827 apic_cleanup_busy() 4828 { 4829 int i; 4830 apic_irq_t *irq_ptr; 4831 4832 for (i = 0; i < apic_nproc; i++) { 4833 apic_cpus[i].aci_busy = 0; 4834 } 4835 4836 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 4837 if ((irq_ptr = apic_irq_table[i]) != NULL) 4838 irq_ptr->airq_busy = 0; 4839 } 4840 apic_skipped_redistribute = 0; 4841 } 4842 4843 4844 /* 4845 * This function will reprogram the timer. 4846 * 4847 * When in oneshot mode the argument is the absolute time in future to 4848 * generate the interrupt at. 4849 * 4850 * When in periodic mode, the argument is the interval at which the 4851 * interrupts should be generated. There is no need to support the periodic 4852 * mode timer change at this time. 4853 */ 4854 static void 4855 apic_timer_reprogram(hrtime_t time) 4856 { 4857 hrtime_t now; 4858 uint_t ticks; 4859 4860 /* 4861 * We should be called from high PIL context (CBE_HIGH_PIL), 4862 * so kpreempt is disabled. 4863 */ 4864 4865 if (!apic_oneshot) { 4866 /* time is the interval for periodic mode */ 4867 ticks = (uint_t)((time) / apic_nsec_per_tick); 4868 } else { 4869 /* one shot mode */ 4870 4871 now = gethrtime(); 4872 4873 if (time <= now) { 4874 /* 4875 * requested to generate an interrupt in the past 4876 * generate an interrupt as soon as possible 4877 */ 4878 ticks = apic_min_timer_ticks; 4879 } else if ((time - now) > apic_nsec_max) { 4880 /* 4881 * requested to generate an interrupt at a time 4882 * further than what we are capable of. Set to max 4883 * the hardware can handle 4884 */ 4885 4886 ticks = APIC_MAXVAL; 4887 #ifdef DEBUG 4888 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 4889 " %lld too far in future, current time" 4890 " %lld \n", time, now); 4891 #endif /* DEBUG */ 4892 } else 4893 ticks = (uint_t)((time - now) / apic_nsec_per_tick); 4894 } 4895 4896 if (ticks < apic_min_timer_ticks) 4897 ticks = apic_min_timer_ticks; 4898 4899 apicadr[APIC_INIT_COUNT] = ticks; 4900 4901 } 4902 4903 /* 4904 * This function will enable timer interrupts. 4905 */ 4906 static void 4907 apic_timer_enable(void) 4908 { 4909 /* 4910 * We should be Called from high PIL context (CBE_HIGH_PIL), 4911 * so kpreempt is disabled. 4912 */ 4913 4914 if (!apic_oneshot) 4915 apicadr[APIC_LOCAL_TIMER] = 4916 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 4917 else { 4918 /* one shot */ 4919 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 4920 } 4921 } 4922 4923 /* 4924 * This function will disable timer interrupts. 4925 */ 4926 static void 4927 apic_timer_disable(void) 4928 { 4929 /* 4930 * We should be Called from high PIL context (CBE_HIGH_PIL), 4931 * so kpreempt is disabled. 4932 */ 4933 4934 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 4935 } 4936 4937 4938 cyclic_id_t apic_cyclic_id; 4939 4940 /* 4941 * If this module needs to be a consumer of cyclic subsystem, they 4942 * can be added here, since at this time kernel cyclic subsystem is initialized 4943 * argument is not currently used, and is reserved for future. 4944 */ 4945 static void 4946 apic_post_cyclic_setup(void *arg) 4947 { 4948 _NOTE(ARGUNUSED(arg)) 4949 cyc_handler_t hdlr; 4950 cyc_time_t when; 4951 4952 /* cpu_lock is held */ 4953 4954 /* set up cyclics for intr redistribution */ 4955 4956 /* 4957 * In peridoc mode intr redistribution processing is done in 4958 * apic_intr_enter during clk intr processing 4959 */ 4960 if (!apic_oneshot) 4961 return; 4962 4963 hdlr.cyh_level = CY_LOW_LEVEL; 4964 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 4965 hdlr.cyh_arg = NULL; 4966 4967 when.cyt_when = 0; 4968 when.cyt_interval = apic_redistribute_sample_interval; 4969 apic_cyclic_id = cyclic_add(&hdlr, &when); 4970 4971 4972 } 4973 4974 static void 4975 apic_redistribute_compute(void) 4976 { 4977 int i, j, max_busy; 4978 4979 if (apic_enable_dynamic_migration) { 4980 if (++apic_nticks == apic_sample_factor_redistribution) { 4981 /* 4982 * Time to call apic_intr_redistribute(). 4983 * reset apic_nticks. This will cause max_busy 4984 * to be calculated below and if it is more than 4985 * apic_int_busy, we will do the whole thing 4986 */ 4987 apic_nticks = 0; 4988 } 4989 max_busy = 0; 4990 for (i = 0; i < apic_nproc; i++) { 4991 4992 /* 4993 * Check if curipl is non zero & if ISR is in 4994 * progress 4995 */ 4996 if (((j = apic_cpus[i].aci_curipl) != 0) && 4997 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 4998 4999 int irq; 5000 apic_cpus[i].aci_busy++; 5001 irq = apic_cpus[i].aci_current[j]; 5002 apic_irq_table[irq]->airq_busy++; 5003 } 5004 5005 if (!apic_nticks && 5006 (apic_cpus[i].aci_busy > max_busy)) 5007 max_busy = apic_cpus[i].aci_busy; 5008 } 5009 if (!apic_nticks) { 5010 if (max_busy > apic_int_busy_mark) { 5011 /* 5012 * We could make the following check be 5013 * skipped > 1 in which case, we get a 5014 * redistribution at half the busy mark (due to 5015 * double interval). Need to be able to collect 5016 * more empirical data to decide if that is a 5017 * good strategy. Punt for now. 5018 */ 5019 if (apic_skipped_redistribute) 5020 apic_cleanup_busy(); 5021 else 5022 apic_intr_redistribute(); 5023 } else 5024 apic_skipped_redistribute++; 5025 } 5026 } 5027 } 5028 5029 5030 static int 5031 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5032 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5033 { 5034 5035 int status; 5036 acpi_psm_lnk_t acpipsmlnk; 5037 5038 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5039 intr_flagp)) == ACPI_PSM_SUCCESS) { 5040 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5041 "from cache for device %s, instance #%d\n", *pci_irqp, 5042 ddi_get_name(dip), ddi_get_instance(dip))); 5043 return (status); 5044 } 5045 5046 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5047 5048 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5049 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5050 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5051 " acpi_translate_pci_irq failed for device %s, instance" 5052 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5053 return (status); 5054 } 5055 5056 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5057 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5058 intr_flagp); 5059 if (status != ACPI_PSM_SUCCESS) { 5060 status = acpi_get_current_irq_resource(&acpipsmlnk, 5061 pci_irqp, intr_flagp); 5062 } 5063 } 5064 5065 if (status == ACPI_PSM_SUCCESS) { 5066 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5067 intr_flagp, &acpipsmlnk); 5068 5069 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5070 "new irq %d for device %s, instance #%d\n", 5071 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5072 } 5073 5074 return (status); 5075 } 5076 5077 /* 5078 * Configures the irq for the interrupt link device identified by 5079 * acpipsmlnkp. 5080 * 5081 * Gets the current and the list of possible irq settings for the 5082 * device. If apic_unconditional_srs is not set, and the current 5083 * resource setting is in the list of possible irq settings, 5084 * current irq resource setting is passed to the caller. 5085 * 5086 * Otherwise, picks an irq number from the list of possible irq 5087 * settings, and sets the irq of the device to this value. 5088 * If prefer_crs is set, among a set of irq numbers in the list that have 5089 * the least number of devices sharing the interrupt, we pick current irq 5090 * resource setting if it is a member of this set. 5091 * 5092 * Passes the irq number in the value pointed to by pci_irqp, and 5093 * polarity and sensitivity in the structure pointed to by dipintrflagp 5094 * to the caller. 5095 * 5096 * Note that if setting the irq resource failed, but successfuly obtained 5097 * the current irq resource settings, passes the current irq resources 5098 * and considers it a success. 5099 * 5100 * Returns: 5101 * ACPI_PSM_SUCCESS on success. 5102 * 5103 * ACPI_PSM_FAILURE if an error occured during the configuration or 5104 * if a suitable irq was not found for this device, or if setting the 5105 * irq resource and obtaining the current resource fails. 5106 * 5107 */ 5108 static int 5109 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5110 int *pci_irqp, iflag_t *dipintr_flagp) 5111 { 5112 5113 int i, min_share, foundnow, done = 0; 5114 int32_t irq; 5115 int32_t share_irq = -1; 5116 int32_t chosen_irq = -1; 5117 int cur_irq = -1; 5118 acpi_irqlist_t *irqlistp; 5119 acpi_irqlist_t *irqlistent; 5120 5121 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5122 == ACPI_PSM_FAILURE) { 5123 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5124 "or assign IRQ for device %s, instance #%d: The system was " 5125 "unable to get the list of potential IRQs from ACPI.", 5126 ddi_get_name(dip), ddi_get_instance(dip))); 5127 5128 return (ACPI_PSM_FAILURE); 5129 } 5130 5131 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5132 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5133 (cur_irq > 0)) { 5134 /* 5135 * If an IRQ is set in CRS and that IRQ exists in the set 5136 * returned from _PRS, return that IRQ, otherwise print 5137 * a warning 5138 */ 5139 5140 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5141 == ACPI_PSM_SUCCESS) { 5142 5143 acpi_free_irqlist(irqlistp); 5144 ASSERT(pci_irqp != NULL); 5145 *pci_irqp = cur_irq; 5146 return (ACPI_PSM_SUCCESS); 5147 } 5148 5149 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5150 "current irq %d for device %s, instance #%d in ACPI's " 5151 "list of possible irqs for this device. Picking one from " 5152 " the latter list.", cur_irq, ddi_get_name(dip), 5153 ddi_get_instance(dip))); 5154 } 5155 5156 irqlistent = irqlistp; 5157 min_share = 255; 5158 5159 while (irqlistent != NULL) { 5160 irqlistent->intr_flags.bustype = BUS_PCI; 5161 5162 for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) { 5163 5164 irq = irqlistent->irqs[i]; 5165 5166 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5167 continue; 5168 5169 if (irq == 0) { 5170 /* invalid irq number */ 5171 continue; 5172 } 5173 5174 if ((apic_irq_table[irq] == NULL) || 5175 (apic_irq_table[irq]->airq_dip == dip)) { 5176 chosen_irq = irq; 5177 foundnow = 1; 5178 /* 5179 * If we do not prefer current irq from crs 5180 * or if we do and this irq is the same as 5181 * current irq from crs, this is the one 5182 * to pick. 5183 */ 5184 if (!(apic_prefer_crs) || (irq == cur_irq)) { 5185 done = 1; 5186 break; 5187 } 5188 continue; 5189 } 5190 5191 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5192 continue; 5193 5194 if (!acpi_intr_compatible(irqlistent->intr_flags, 5195 apic_irq_table[irq]->airq_iflag)) 5196 continue; 5197 5198 if ((apic_irq_table[irq]->airq_share < min_share) || 5199 ((apic_irq_table[irq]->airq_share == min_share) && 5200 (cur_irq == irq) && (apic_prefer_crs))) { 5201 min_share = apic_irq_table[irq]->airq_share; 5202 share_irq = irq; 5203 foundnow = 1; 5204 } 5205 } 5206 5207 /* 5208 * If we found an IRQ in the inner loop this time, save the 5209 * details from the irqlist for later use. 5210 */ 5211 if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) { 5212 /* 5213 * Copy the acpi_prs_private_t and flags from this 5214 * irq list entry, since we found an irq from this 5215 * entry. 5216 */ 5217 acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv; 5218 *dipintr_flagp = irqlistent->intr_flags; 5219 } 5220 5221 if (done) 5222 break; 5223 5224 /* Go to the next irqlist entry */ 5225 irqlistent = irqlistent->next; 5226 } 5227 5228 5229 acpi_free_irqlist(irqlistp); 5230 if (chosen_irq != -1) 5231 irq = chosen_irq; 5232 else if (share_irq != -1) 5233 irq = share_irq; 5234 else { 5235 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5236 "suitable irq from the list of possible irqs for device " 5237 "%s, instance #%d in ACPI's list of possible irqs", 5238 ddi_get_name(dip), ddi_get_instance(dip))); 5239 return (ACPI_PSM_FAILURE); 5240 } 5241 5242 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s " 5243 "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip))); 5244 5245 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) { 5246 /* 5247 * setting irq was successful, check to make sure CRS 5248 * reflects that. If CRS does not agree with what we 5249 * set, return the irq that was set. 5250 */ 5251 5252 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5253 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5254 5255 if (cur_irq != irq) 5256 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: " 5257 "IRQ resource set (irqno %d) for device %s " 5258 "instance #%d, differs from current " 5259 "setting irqno %d", 5260 irq, ddi_get_name(dip), 5261 ddi_get_instance(dip), cur_irq)); 5262 } 5263 5264 /* 5265 * return the irq that was set, and not what CRS reports, 5266 * since CRS has been seen to be bogus on some systems 5267 */ 5268 cur_irq = irq; 5269 } else { 5270 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d " 5271 "failed for device %s instance #%d", 5272 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5273 5274 if (cur_irq == -1) 5275 return (ACPI_PSM_FAILURE); 5276 } 5277 5278 ASSERT(pci_irqp != NULL); 5279 *pci_irqp = cur_irq; 5280 return (ACPI_PSM_SUCCESS); 5281 } 5282