1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 31 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 32 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 33 * PSMI 1.5 extensions are supported in Solaris Nevada. 34 */ 35 #define PSMI_1_5 36 37 #include <sys/processor.h> 38 #include <sys/time.h> 39 #include <sys/psm.h> 40 #include <sys/smp_impldefs.h> 41 #include <sys/cram.h> 42 #include <sys/acpi/acpi.h> 43 #include <sys/acpica.h> 44 #include <sys/psm_common.h> 45 #include "apic.h" 46 #include <sys/pit.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/ddi_impldefs.h> 50 #include <sys/pci.h> 51 #include <sys/promif.h> 52 #include <sys/x86_archext.h> 53 #include <sys/cpc_impl.h> 54 #include <sys/uadmin.h> 55 #include <sys/panic.h> 56 #include <sys/debug.h> 57 #include <sys/archsystm.h> 58 #include <sys/trap.h> 59 #include <sys/machsystm.h> 60 #include <sys/cpuvar.h> 61 #include <sys/rm_platter.h> 62 #include <sys/privregs.h> 63 #include <sys/cyclic.h> 64 #include <sys/note.h> 65 #include <sys/pci_intr_lib.h> 66 67 /* 68 * Local Function Prototypes 69 */ 70 static void apic_init_intr(); 71 static void apic_ret(); 72 static int apic_handle_defconf(); 73 static int apic_parse_mpct(caddr_t mpct, int bypass); 74 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 75 static int apic_checksum(caddr_t bptr, int len); 76 static int get_apic_cmd1(); 77 static int get_apic_pri(); 78 static int apic_find_bus_type(char *bus); 79 static int apic_find_bus(int busid); 80 static int apic_find_bus_id(int bustype); 81 static struct apic_io_intr *apic_find_io_intr(int irqno); 82 int apic_allocate_irq(int irq); 83 static int apic_find_free_irq(int start, int end); 84 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 85 static void apic_modify_vector(uchar_t vector, int irq); 86 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 87 static uchar_t apic_xlate_vector(uchar_t oldvector); 88 static void apic_xlate_vector_free_timeout_handler(void *arg); 89 static void apic_free_vector(uchar_t vector); 90 static void apic_reprogram_timeout_handler(void *arg); 91 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 92 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq); 93 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq); 94 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq); 95 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 96 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 97 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 98 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 99 int child_ipin, struct apic_io_intr **intrp); 100 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 101 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 102 int type); 103 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 104 iflag_t *intr_flagp); 105 static void apic_nmi_intr(caddr_t arg); 106 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 107 uchar_t intin); 108 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, 109 int when); 110 static int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe); 111 static void apic_intr_redistribute(); 112 static void apic_cleanup_busy(); 113 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 114 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 115 116 /* ACPI support routines */ 117 static int acpi_probe(void); 118 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 119 int *pci_irqp, iflag_t *intr_flagp); 120 121 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 122 int ipin, int *pci_irqp, iflag_t *intr_flagp); 123 static uchar_t acpi_find_ioapic(int irq); 124 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 125 126 /* 127 * standard MP entries 128 */ 129 static int apic_probe(); 130 static int apic_clkinit(); 131 static int apic_getclkirq(int ipl); 132 static uint_t apic_calibrate(volatile uint32_t *addr, 133 uint16_t *pit_ticks_adj); 134 static hrtime_t apic_gettime(); 135 static hrtime_t apic_gethrtime(); 136 static void apic_init(); 137 static void apic_picinit(void); 138 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 139 static int apic_post_cpu_start(void); 140 static void apic_send_ipi(int cpun, int ipl); 141 static void apic_set_softintr(int softintr); 142 static void apic_set_idlecpu(processorid_t cpun); 143 static void apic_unset_idlecpu(processorid_t cpun); 144 static int apic_softlvl_to_irq(int ipl); 145 static int apic_intr_enter(int ipl, int *vect); 146 static void apic_intr_exit(int ipl, int vect); 147 static void apic_setspl(int ipl); 148 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 149 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 150 static void apic_shutdown(int cmd, int fcn); 151 static void apic_preshutdown(int cmd, int fcn); 152 static int apic_disable_intr(processorid_t cpun); 153 static void apic_enable_intr(processorid_t cpun); 154 static processorid_t apic_get_next_processorid(processorid_t cpun); 155 static int apic_get_ipivect(int ipl, int type); 156 static void apic_timer_reprogram(hrtime_t time); 157 static void apic_timer_enable(void); 158 static void apic_timer_disable(void); 159 static void apic_post_cyclic_setup(void *arg); 160 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 161 psm_intr_op_t, int *); 162 163 static int apic_oneshot = 0; 164 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 165 166 /* 167 * These variables are frequently accessed in apic_intr_enter(), 168 * apic_intr_exit and apic_setspl, so group them together 169 */ 170 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 171 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 172 int apic_clkvect; 173 174 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 175 int apic_sci_vect = -1; 176 iflag_t apic_sci_flags; 177 178 /* vector at which error interrupts come in */ 179 int apic_errvect; 180 int apic_enable_error_intr = 1; 181 int apic_error_display_delay = 100; 182 183 /* vector at which performance counter overflow interrupts come in */ 184 int apic_cpcovf_vect; 185 int apic_enable_cpcovf_intr = 1; 186 187 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */ 188 static int apic_max_usecs_clear_pending = 1000; 189 190 /* Amt of usecs to wait before checking if RDT flags have reset. */ 191 #define APIC_USECS_PER_WAIT_INTERVAL 100 192 193 /* Maximum number of times to retry reprogramming via the timeout */ 194 #define APIC_REPROGRAM_MAX_TIMEOUTS 10 195 196 /* timeout delay for IOAPIC delayed reprogramming */ 197 #define APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */ 198 199 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */ 200 #define DEFERRED 1 201 #define IMMEDIATE 0 202 203 /* 204 * number of bits per byte, from <sys/param.h> 205 */ 206 #define UCHAR_MAX ((1 << NBBY) - 1) 207 208 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ]; 209 210 /* 211 * The following vector assignments influence the value of ipltopri and 212 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 213 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 214 * we care to do so in future. Note some IPLs which are rarely used 215 * will share the vector ranges and heavily used IPLs (5 and 6) have 216 * a wide range. 217 * IPL Vector range. as passed to intr_enter 218 * 0 none. 219 * 1,2,3 0x20-0x2f 0x0-0xf 220 * 4 0x30-0x3f 0x10-0x1f 221 * 5 0x40-0x5f 0x20-0x3f 222 * 6 0x60-0x7f 0x40-0x5f 223 * 7,8,9 0x80-0x8f 0x60-0x6f 224 * 10 0x90-0x9f 0x70-0x7f 225 * 11 0xa0-0xaf 0x80-0x8f 226 * ... ... 227 * 16 0xf0-0xff 0xd0-0xdf 228 */ 229 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 230 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 231 }; 232 /* 233 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 234 * NOTE that this is vector as passed into intr_enter which is 235 * programmed vector - 0x20 (APIC_BASE_VECT) 236 */ 237 238 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 239 /* The taskpri to be programmed into apic to mask given ipl */ 240 241 #if defined(__amd64) 242 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 243 #endif 244 245 /* 246 * Patchable global variables. 247 */ 248 int apic_forceload = 0; 249 250 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 251 #define INTR_ROUND_ROBIN 1 252 #define INTR_LOWEST_PRIORITY 2 253 254 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 255 256 static int apic_next_bind_cpu = 2; /* For round robin assignment */ 257 /* start with cpu 1 */ 258 259 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 260 /* 1 - use gettime() for performance */ 261 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 262 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 263 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 264 int apic_panic_on_nmi = 0; 265 int apic_panic_on_apic_error = 0; 266 267 int apic_verbose = 0; 268 269 /* Flag definitions for apic_verbose */ 270 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 271 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 272 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 273 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 274 275 276 #define APIC_VERBOSE_IOAPIC(fmt) \ 277 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 278 cmn_err fmt; 279 280 #define APIC_VERBOSE_IRQ(fmt) \ 281 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 282 cmn_err fmt; 283 284 #define APIC_VERBOSE_POWEROFF(fmt) \ 285 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 286 prom_printf fmt; 287 288 289 /* Now the ones for Dynamic Interrupt distribution */ 290 int apic_enable_dynamic_migration = 1; 291 292 /* 293 * If enabled, the distribution works as follows: 294 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 295 * and the irq corresponding to the ipl is also set in the aci_current array. 296 * interrupt exit and setspl (due to soft interrupts) will cause the current 297 * ipl to be be changed. This is cache friendly as these frequently used 298 * paths write into a per cpu structure. 299 * 300 * Sampling is done by checking the structures for all CPUs and incrementing 301 * the busy field of the irq (if any) executing on each CPU and the busy field 302 * of the corresponding CPU. 303 * In periodic mode this is done on every clock interrupt. 304 * In one-shot mode, this is done thru a cyclic with an interval of 305 * apic_redistribute_sample_interval (default 10 milli sec). 306 * 307 * Every apic_sample_factor_redistribution times we sample, we do computations 308 * to decide which interrupt needs to be migrated (see comments 309 * before apic_intr_redistribute(). 310 */ 311 312 /* 313 * Following 3 variables start as % and can be patched or set using an 314 * API to be defined in future. They will be scaled to 315 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 316 * mode), or 101 in one-shot mode to stagger it away from one sec processing 317 */ 318 319 int apic_int_busy_mark = 60; 320 int apic_int_free_mark = 20; 321 int apic_diff_for_redistribution = 10; 322 323 /* sampling interval for interrupt redistribution for dynamic migration */ 324 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 325 326 /* 327 * number of times we sample before deciding to redistribute interrupts 328 * for dynamic migration 329 */ 330 int apic_sample_factor_redistribution = 101; 331 332 /* timeout for xlate_vector, mark_vector */ 333 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 334 335 int apic_redist_cpu_skip = 0; 336 int apic_num_imbalance = 0; 337 int apic_num_rebind = 0; 338 339 int apic_nproc = 0; 340 int apic_defconf = 0; 341 int apic_irq_translate = 0; 342 int apic_spec_rev = 0; 343 int apic_imcrp = 0; 344 345 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 346 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 347 348 /* 349 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 350 * will be assigned (via _SRS). If it is not set, use the current 351 * irq setting (via _CRS), but only if that irq is in the set of possible 352 * irqs (returned by _PRS) for the device. 353 */ 354 int apic_unconditional_srs = 1; 355 356 /* 357 * For interrupt link devices, if apic_prefer_crs is set when we are 358 * assigning an IRQ resource to a device, prefer the current IRQ setting 359 * over other possible irq settings under same conditions. 360 */ 361 362 int apic_prefer_crs = 1; 363 364 365 /* minimum number of timer ticks to program to */ 366 int apic_min_timer_ticks = 1; 367 /* 368 * Local static data 369 */ 370 static struct psm_ops apic_ops = { 371 apic_probe, 372 373 apic_init, 374 apic_picinit, 375 apic_intr_enter, 376 apic_intr_exit, 377 apic_setspl, 378 apic_addspl, 379 apic_delspl, 380 apic_disable_intr, 381 apic_enable_intr, 382 apic_softlvl_to_irq, 383 apic_set_softintr, 384 385 apic_set_idlecpu, 386 apic_unset_idlecpu, 387 388 apic_clkinit, 389 apic_getclkirq, 390 (void (*)(void))NULL, /* psm_hrtimeinit */ 391 apic_gethrtime, 392 393 apic_get_next_processorid, 394 apic_cpu_start, 395 apic_post_cpu_start, 396 apic_shutdown, 397 apic_get_ipivect, 398 apic_send_ipi, 399 400 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 401 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 402 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 403 (void (*)(int, char *))NULL, /* psm_notify_error */ 404 (void (*)(int))NULL, /* psm_notify_func */ 405 apic_timer_reprogram, 406 apic_timer_enable, 407 apic_timer_disable, 408 apic_post_cyclic_setup, 409 apic_preshutdown, 410 apic_intr_ops /* Advanced DDI Interrupt framework */ 411 }; 412 413 414 static struct psm_info apic_psm_info = { 415 PSM_INFO_VER01_5, /* version */ 416 PSM_OWN_EXCLUSIVE, /* ownership */ 417 (struct psm_ops *)&apic_ops, /* operation */ 418 "pcplusmp", /* machine name */ 419 "pcplusmp v1.4 compatible %I%", 420 }; 421 422 static void *apic_hdlp; 423 424 #ifdef DEBUG 425 #define DENT 0x0001 426 int apic_debug = 0; 427 /* 428 * set apic_restrict_vector to the # of vectors we want to allow per range 429 * useful in testing shared interrupt logic by setting it to 2 or 3 430 */ 431 int apic_restrict_vector = 0; 432 433 #define APIC_DEBUG_MSGBUFSIZE 2048 434 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 435 int apic_debug_msgbufindex = 0; 436 437 /* 438 * Put "int" info into debug buffer. No MP consistency, but light weight. 439 * Good enough for most debugging. 440 */ 441 #define APIC_DEBUG_BUF_PUT(x) \ 442 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 443 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 444 apic_debug_msgbufindex = 0; 445 446 #endif /* DEBUG */ 447 448 apic_cpus_info_t *apic_cpus; 449 450 static uint_t apic_cpumask = 0; 451 static uint_t apic_flag; 452 453 /* Flag to indicate that we need to shut down all processors */ 454 static uint_t apic_shutdown_processors; 455 456 uint_t apic_nsec_per_intr = 0; 457 458 /* 459 * apic_let_idle_redistribute can have the following values: 460 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 461 * apic_redistribute_lock prevents multiple idle cpus from redistributing 462 */ 463 int apic_num_idle_redistributions = 0; 464 static int apic_let_idle_redistribute = 0; 465 static uint_t apic_nticks = 0; 466 static uint_t apic_skipped_redistribute = 0; 467 468 /* to gather intr data and redistribute */ 469 static void apic_redistribute_compute(void); 470 471 static uint_t last_count_read = 0; 472 static lock_t apic_gethrtime_lock; 473 volatile int apic_hrtime_stamp = 0; 474 volatile hrtime_t apic_nsec_since_boot = 0; 475 static uint_t apic_hertz_count, apic_nsec_per_tick; 476 static hrtime_t apic_nsec_max; 477 478 static hrtime_t apic_last_hrtime = 0; 479 int apic_hrtime_error = 0; 480 int apic_remote_hrterr = 0; 481 int apic_num_nmis = 0; 482 int apic_apic_error = 0; 483 int apic_num_apic_errors = 0; 484 int apic_num_cksum_errors = 0; 485 486 static uchar_t apic_io_id[MAX_IO_APIC]; 487 static uchar_t apic_io_ver[MAX_IO_APIC]; 488 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 489 static uchar_t apic_io_vectend[MAX_IO_APIC]; 490 volatile int32_t *apicioadr[MAX_IO_APIC]; 491 /* 492 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 493 * and bound elements of cpus_info and the temp_cpu element of irq_struct 494 */ 495 lock_t apic_ioapic_lock; 496 497 /* 498 * apic_ioapic_reprogram_lock prevents a CPU from exiting 499 * apic_intr_exit before IOAPIC reprogramming information 500 * is collected. 501 */ 502 static lock_t apic_ioapic_reprogram_lock; 503 static int apic_io_max = 0; /* no. of i/o apics enabled */ 504 505 static struct apic_io_intr *apic_io_intrp = 0; 506 static struct apic_bus *apic_busp; 507 508 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 509 static uchar_t apic_resv_vector[MAXIPL+1]; 510 511 static char apic_level_intr[APIC_MAX_VECTOR+1]; 512 static int apic_error = 0; 513 /* values which apic_error can take. Not catastrophic, but may help debug */ 514 #define APIC_ERR_BOOT_EOI 0x1 515 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 516 #define APIC_ERR_INVALID_INDEX 0x4 517 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 518 #define APIC_ERR_APIC_ERROR 0x40000000 519 #define APIC_ERR_NMI 0x80000000 520 521 static int apic_cmos_ssb_set = 0; 522 523 static uint32_t eisa_level_intr_mask = 0; 524 /* At least MSB will be set if EISA bus */ 525 526 static int apic_pci_bus_total = 0; 527 static uchar_t apic_single_pci_busid = 0; 528 529 530 /* 531 * airq_mutex protects additions to the apic_irq_table - the first 532 * pointer and any airq_nexts off of that one. It also protects 533 * apic_max_device_irq & apic_min_device_irq. It also guarantees 534 * that share_id is unique as new ids are generated only when new 535 * irq_t structs are linked in. Once linked in the structs are never 536 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 537 * or allocated. Note that there is a slight gap between allocating in 538 * apic_introp_xlate and programming in addspl. 539 */ 540 kmutex_t airq_mutex; 541 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 542 int apic_max_device_irq = 0; 543 int apic_min_device_irq = APIC_MAX_VECTOR; 544 545 /* use to make sure only one cpu handles the nmi */ 546 static lock_t apic_nmi_lock; 547 /* use to make sure only one cpu handles the error interrupt */ 548 static lock_t apic_error_lock; 549 550 /* 551 * Following declarations are for revectoring; used when ISRs at different 552 * IPLs share an irq. 553 */ 554 static lock_t apic_revector_lock; 555 static int apic_revector_pending = 0; 556 static uchar_t *apic_oldvec_to_newvec; 557 static uchar_t *apic_newvec_to_oldvec; 558 559 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */ 560 static kmutex_t apic_reprogram_timeout_mutex; 561 562 static struct ioapic_reprogram_data { 563 int valid; /* This entry is valid */ 564 int bindcpu; /* The CPU to which the int will be bound */ 565 unsigned timeouts; /* # times the reprogram timeout was called */ 566 } apic_reprogram_info[APIC_MAX_VECTOR+1]; 567 /* 568 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 569 * is indexed by IRQ number, NOT by vector number. 570 */ 571 572 573 /* 574 * The following added to identify a software poweroff method if available. 575 */ 576 577 static struct { 578 int poweroff_method; 579 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 580 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 581 } apic_mps_ids[] = { 582 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 583 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 584 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 585 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 586 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 587 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 588 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 589 }; 590 591 int apic_poweroff_method = APIC_POWEROFF_NONE; 592 593 static struct { 594 uchar_t cntl; 595 uchar_t data; 596 } aspen_bmc[] = { 597 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 598 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 599 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 600 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 601 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 602 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 603 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 604 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 605 606 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 607 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 608 }; 609 610 static struct { 611 int port; 612 uchar_t data; 613 } sitka_bmc[] = { 614 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 615 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 616 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 617 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 618 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 619 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 620 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 621 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 622 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 623 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 624 625 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 626 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 627 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 628 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 629 }; 630 631 632 /* Patchable global variables. */ 633 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 634 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 635 636 /* 637 * ACPI definitions 638 */ 639 /* _PIC method arguments */ 640 #define ACPI_PIC_MODE 0 641 #define ACPI_APIC_MODE 1 642 643 /* APIC error flags we care about */ 644 #define APIC_SEND_CS_ERROR 0x01 645 #define APIC_RECV_CS_ERROR 0x02 646 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 647 648 /* 649 * ACPI variables 650 */ 651 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 652 static int apic_enable_acpi = 0; 653 654 /* ACPI Multiple APIC Description Table ptr */ 655 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 656 657 /* ACPI Interrupt Source Override Structure ptr */ 658 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 659 static int acpi_iso_cnt = 0; 660 661 /* ACPI Non-maskable Interrupt Sources ptr */ 662 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 663 static int acpi_nmi_scnt = 0; 664 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 665 static int acpi_nmi_ccnt = 0; 666 667 /* 668 * extern declarations 669 */ 670 extern int intr_clear(void); 671 extern void intr_restore(uint_t); 672 #if defined(__amd64) 673 extern int intpri_use_cr8; 674 #endif /* __amd64 */ 675 676 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 677 int, int, int); 678 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 679 680 /* 681 * This is the loadable module wrapper 682 */ 683 684 int 685 _init(void) 686 { 687 if (apic_coarse_hrtime) 688 apic_ops.psm_gethrtime = &apic_gettime; 689 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 690 } 691 692 int 693 _fini(void) 694 { 695 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 696 } 697 698 int 699 _info(struct modinfo *modinfop) 700 { 701 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 702 } 703 704 /* 705 * Auto-configuration routines 706 */ 707 708 /* 709 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 710 * May work with 1.1 - but not guaranteed. 711 * According to the MP Spec, the MP floating pointer structure 712 * will be searched in the order described below: 713 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 714 * 2. Within the last kilobyte of system base memory 715 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 716 * Once we find the right signature with proper checksum, we call 717 * either handle_defconf or parse_mpct to get all info necessary for 718 * subsequent operations. 719 */ 720 static int 721 apic_probe() 722 { 723 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 724 caddr_t biosdatap; 725 caddr_t mpct; 726 caddr_t fptr; 727 int i, mpct_size, mapsize, retval = PSM_FAILURE; 728 ushort_t ebda_seg, base_mem_size; 729 struct apic_mpfps_hdr *fpsp; 730 struct apic_mp_cnf_hdr *hdrp; 731 int bypass_cpu_and_ioapics_in_mptables; 732 int acpi_user_options; 733 734 if (apic_forceload < 0) 735 return (retval); 736 737 /* Allow override for MADT-only mode */ 738 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 739 "acpi-user-options", 0); 740 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 741 742 /* Allow apic_use_acpi to override MADT-only mode */ 743 if (!apic_use_acpi) 744 apic_use_acpi_madt_only = 0; 745 746 retval = acpi_probe(); 747 748 /* 749 * mapin the bios data area 40:0 750 * 40:13h - two-byte location reports the base memory size 751 * 40:0Eh - two-byte location for the exact starting address of 752 * the EBDA segment for EISA 753 */ 754 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 755 if (!biosdatap) 756 return (retval); 757 fpsp = (struct apic_mpfps_hdr *)NULL; 758 mapsize = MPFPS_RAM_WIN_LEN; 759 /*LINTED: pointer cast may result in improper alignment */ 760 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 761 /* check the 1k of EBDA */ 762 if (ebda_seg) { 763 ebda_start = ((uint32_t)ebda_seg) << 4; 764 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 765 if (fptr) { 766 if (!(fpsp = 767 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 768 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 769 } 770 } 771 /* If not in EBDA, check the last k of system base memory */ 772 if (!fpsp) { 773 /*LINTED: pointer cast may result in improper alignment */ 774 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 775 776 if (base_mem_size > 512) 777 base_mem_end = 639 * 1024; 778 else 779 base_mem_end = 511 * 1024; 780 /* if ebda == last k of base mem, skip to check BIOS ROM */ 781 if (base_mem_end != ebda_start) { 782 783 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 784 PROT_READ); 785 786 if (fptr) { 787 if (!(fpsp = apic_find_fps_sig(fptr, 788 MPFPS_RAM_WIN_LEN))) 789 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 790 } 791 } 792 } 793 psm_unmap_phys(biosdatap, 0x20); 794 795 /* If still cannot find it, check the BIOS ROM space */ 796 if (!fpsp) { 797 mapsize = MPFPS_ROM_WIN_LEN; 798 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 799 MPFPS_ROM_WIN_LEN, PROT_READ); 800 if (fptr) { 801 if (!(fpsp = 802 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 803 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 804 return (retval); 805 } 806 } 807 } 808 809 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 810 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 811 return (retval); 812 } 813 814 apic_spec_rev = fpsp->mpfps_spec_rev; 815 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 816 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 817 return (retval); 818 } 819 820 /* check IMCR is present or not */ 821 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 822 823 /* check default configuration (dual CPUs) */ 824 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 825 psm_unmap_phys(fptr, mapsize); 826 return (apic_handle_defconf()); 827 } 828 829 /* MP Configuration Table */ 830 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 831 832 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 833 834 /* 835 * Map in enough memory for the MP Configuration Table Header. 836 * Use this table to read the total length of the BIOS data and 837 * map in all the info 838 */ 839 /*LINTED: pointer cast may result in improper alignment */ 840 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 841 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 842 if (!hdrp) 843 return (retval); 844 845 /* check mp configuration table signature PCMP */ 846 if (hdrp->mpcnf_sig != 0x504d4350) { 847 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 848 return (retval); 849 } 850 mpct_size = (int)hdrp->mpcnf_tbl_length; 851 852 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 853 854 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 855 856 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 857 /* This is an ACPI machine No need for further checks */ 858 return (retval); 859 } 860 861 /* 862 * Map in the entries for this machine, ie. Processor 863 * Entry Tables, Bus Entry Tables, etc. 864 * They are in fixed order following one another 865 */ 866 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 867 if (!mpct) 868 return (retval); 869 870 if (apic_checksum(mpct, mpct_size) != 0) 871 goto apic_fail1; 872 873 874 /*LINTED: pointer cast may result in improper alignment */ 875 hdrp = (struct apic_mp_cnf_hdr *)mpct; 876 /*LINTED: pointer cast may result in improper alignment */ 877 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 878 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 879 if (!apicadr) 880 goto apic_fail1; 881 882 /* Parse all information in the tables */ 883 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 884 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 885 PSM_SUCCESS) 886 return (PSM_SUCCESS); 887 888 for (i = 0; i < apic_io_max; i++) 889 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 890 if (apic_cpus) 891 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 892 if (apicadr) 893 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 894 apic_fail1: 895 psm_unmap_phys(mpct, mpct_size); 896 return (retval); 897 } 898 899 static void 900 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 901 { 902 int i; 903 904 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 905 i++) { 906 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 907 strlen(apic_mps_ids[i].oem_id)) == 0) && 908 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 909 strlen(apic_mps_ids[i].prod_id)) == 0)) { 910 911 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 912 break; 913 } 914 } 915 916 if (apic_debug_mps_id != 0) { 917 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 918 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 919 hdrp->mpcnf_oem_str[0], 920 hdrp->mpcnf_oem_str[1], 921 hdrp->mpcnf_oem_str[2], 922 hdrp->mpcnf_oem_str[3], 923 hdrp->mpcnf_oem_str[4], 924 hdrp->mpcnf_oem_str[5], 925 hdrp->mpcnf_oem_str[6], 926 hdrp->mpcnf_oem_str[7], 927 hdrp->mpcnf_prod_str[0], 928 hdrp->mpcnf_prod_str[1], 929 hdrp->mpcnf_prod_str[2], 930 hdrp->mpcnf_prod_str[3], 931 hdrp->mpcnf_prod_str[4], 932 hdrp->mpcnf_prod_str[5], 933 hdrp->mpcnf_prod_str[6], 934 hdrp->mpcnf_prod_str[7], 935 hdrp->mpcnf_prod_str[8], 936 hdrp->mpcnf_prod_str[9], 937 hdrp->mpcnf_prod_str[10], 938 hdrp->mpcnf_prod_str[11]); 939 } 940 } 941 942 static int 943 acpi_probe(void) 944 { 945 int i, id, intmax, ver, index, rv; 946 int acpi_verboseflags = 0; 947 int madt_seen, madt_size; 948 APIC_HEADER *ap; 949 MADT_PROCESSOR_APIC *mpa; 950 MADT_IO_APIC *mia; 951 MADT_IO_SAPIC *misa; 952 MADT_INTERRUPT_OVERRIDE *mio; 953 MADT_NMI_SOURCE *mns; 954 MADT_INTERRUPT_SOURCE *mis; 955 MADT_LOCAL_APIC_NMI *mlan; 956 MADT_ADDRESS_OVERRIDE *mao; 957 ACPI_OBJECT_LIST arglist; 958 ACPI_OBJECT arg; 959 int sci; 960 iflag_t sci_flags; 961 volatile int32_t *ioapic; 962 char local_ids[NCPU]; 963 char proc_ids[NCPU]; 964 uchar_t hid; 965 966 if (!apic_use_acpi) 967 return (PSM_FAILURE); 968 969 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 970 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 971 return (PSM_FAILURE); 972 973 apicadr = (uint32_t *)psm_map_phys( 974 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 975 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 976 if (!apicadr) 977 return (PSM_FAILURE); 978 979 id = apicadr[APIC_LID_REG]; 980 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 981 apic_nproc = index = 1; 982 apic_io_max = 0; 983 984 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 985 madt_size = acpi_mapic_dtp->Length; 986 madt_seen = sizeof (*acpi_mapic_dtp); 987 988 while (madt_seen < madt_size) { 989 switch (ap->Type) { 990 case APIC_PROCESSOR: 991 mpa = (MADT_PROCESSOR_APIC *) ap; 992 if (mpa->ProcessorEnabled) { 993 if (mpa->LocalApicId == local_ids[0]) 994 proc_ids[0] = mpa->ProcessorId; 995 else if (apic_nproc < NCPU) { 996 local_ids[index] = mpa->LocalApicId; 997 proc_ids[index] = mpa->ProcessorId; 998 index++; 999 apic_nproc++; 1000 } else 1001 cmn_err(CE_WARN, "pcplusmp: exceeded " 1002 "maximum no. of CPUs (= %d)", NCPU); 1003 } 1004 break; 1005 1006 case APIC_IO: 1007 mia = (MADT_IO_APIC *) ap; 1008 if (apic_io_max < MAX_IO_APIC) { 1009 apic_io_id[apic_io_max] = mia->IoApicId; 1010 apic_io_vectbase[apic_io_max] = 1011 mia->Interrupt; 1012 ioapic = apicioadr[apic_io_max] = 1013 (int32_t *)psm_map_phys( 1014 (uint32_t)mia->Address, 1015 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1016 if (!ioapic) 1017 goto cleanup; 1018 apic_io_max++; 1019 } 1020 break; 1021 1022 case APIC_XRUPT_OVERRIDE: 1023 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1024 if (acpi_isop == NULL) 1025 acpi_isop = mio; 1026 acpi_iso_cnt++; 1027 break; 1028 1029 case APIC_NMI: 1030 /* UNIMPLEMENTED */ 1031 mns = (MADT_NMI_SOURCE *) ap; 1032 if (acpi_nmi_sp == NULL) 1033 acpi_nmi_sp = mns; 1034 acpi_nmi_scnt++; 1035 1036 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1037 mns->Interrupt, mns->Polarity, 1038 mns->TriggerMode); 1039 break; 1040 1041 case APIC_LOCAL_NMI: 1042 /* UNIMPLEMENTED */ 1043 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1044 if (acpi_nmi_cp == NULL) 1045 acpi_nmi_cp = mlan; 1046 acpi_nmi_ccnt++; 1047 1048 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1049 mlan->ProcessorId, mlan->Polarity, 1050 mlan->TriggerMode, mlan->Lint); 1051 break; 1052 1053 case APIC_ADDRESS_OVERRIDE: 1054 /* UNIMPLEMENTED */ 1055 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1056 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1057 (long)mao->Address); 1058 break; 1059 1060 case APIC_IO_SAPIC: 1061 /* UNIMPLEMENTED */ 1062 misa = (MADT_IO_SAPIC *) ap; 1063 1064 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1065 misa->IoSapicId, misa->InterruptBase, 1066 (long)misa->Address); 1067 break; 1068 1069 case APIC_XRUPT_SOURCE: 1070 /* UNIMPLEMENTED */ 1071 mis = (MADT_INTERRUPT_SOURCE *) ap; 1072 1073 cmn_err(CE_NOTE, 1074 "!apic: irq source: %d %d %d %d %d %d %d\n", 1075 mis->ProcessorId, mis->ProcessorEid, 1076 mis->Interrupt, mis->Polarity, 1077 mis->TriggerMode, mis->InterruptType, 1078 mis->IoSapicVector); 1079 break; 1080 case APIC_RESERVED: 1081 default: 1082 goto cleanup; 1083 } 1084 1085 /* advance to next entry */ 1086 madt_seen += ap->Length; 1087 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1088 } 1089 1090 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1091 KM_NOSLEEP)) == NULL) 1092 goto cleanup; 1093 1094 apic_cpumask = (1 << apic_nproc) - 1; 1095 1096 /* 1097 * ACPI doesn't provide the local apic ver, get it directly from the 1098 * local apic 1099 */ 1100 ver = apicadr[APIC_VERS_REG]; 1101 for (i = 0; i < apic_nproc; i++) { 1102 apic_cpus[i].aci_local_id = local_ids[i]; 1103 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1104 } 1105 for (i = 0; i < apic_io_max; i++) { 1106 ioapic = apicioadr[i]; 1107 1108 /* 1109 * need to check Sitka on the following acpi problem 1110 * On the Sitka, the ioapic's apic_id field isn't reporting 1111 * the actual io apic id. We have reported this problem 1112 * to Intel. Until they fix the problem, we will get the 1113 * actual id directly from the ioapic. 1114 */ 1115 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1116 id = ioapic[APIC_IO_DATA]; 1117 hid = (uchar_t)(((uint_t)id) >> 24); 1118 1119 if (hid != apic_io_id[i]) { 1120 if (apic_io_id[i] == 0) 1121 apic_io_id[i] = hid; 1122 else { /* set ioapic id to whatever reported by ACPI */ 1123 id = ((int32_t)apic_io_id[i]) << 24; 1124 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1125 ioapic[APIC_IO_DATA] = id; 1126 } 1127 } 1128 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1129 ver = ioapic[APIC_IO_DATA]; 1130 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1131 intmax = (ver >> 16) & 0xff; 1132 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1133 } 1134 1135 1136 /* 1137 * Process SCI configuration here 1138 * An error may be returned here if 1139 * acpi-user-options specifies legacy mode 1140 * (no SCI, no ACPI mode) 1141 */ 1142 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1143 sci = -1; 1144 1145 /* 1146 * Now call acpi_init() to generate namespaces 1147 * If this fails, we don't attempt to use ACPI 1148 * even if we were able to get a MADT above 1149 */ 1150 if (acpica_init() != AE_OK) 1151 goto cleanup; 1152 1153 /* 1154 * Squirrel away the SCI and flags for later on 1155 * in apic_picinit() when we're ready 1156 */ 1157 apic_sci_vect = sci; 1158 apic_sci_flags = sci_flags; 1159 1160 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1161 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1162 1163 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1164 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1165 1166 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1167 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1168 1169 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1170 ACPI_PSM_FAILURE) 1171 goto cleanup; 1172 1173 /* Enable ACPI APIC interrupt routing */ 1174 arglist.Count = 1; 1175 arglist.Pointer = &arg; 1176 arg.Type = ACPI_TYPE_INTEGER; 1177 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1178 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1179 if (rv == AE_OK) { 1180 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1181 apic_enable_acpi = 1; 1182 if (apic_use_acpi_madt_only) { 1183 cmn_err(CE_CONT, 1184 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1185 } 1186 return (PSM_SUCCESS); 1187 } 1188 /* if setting APIC mode failed above, we fall through to cleanup */ 1189 1190 cleanup: 1191 if (apicadr != NULL) { 1192 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1193 apicadr = NULL; 1194 } 1195 apic_nproc = 0; 1196 for (i = 0; i < apic_io_max; i++) { 1197 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1198 apicioadr[i] = NULL; 1199 } 1200 apic_io_max = 0; 1201 acpi_isop = NULL; 1202 acpi_iso_cnt = 0; 1203 acpi_nmi_sp = NULL; 1204 acpi_nmi_scnt = 0; 1205 acpi_nmi_cp = NULL; 1206 acpi_nmi_ccnt = 0; 1207 return (PSM_FAILURE); 1208 } 1209 1210 /* 1211 * Handle default configuration. Fill in reqd global variables & tables 1212 * Fill all details as MP table does not give any more info 1213 */ 1214 static int 1215 apic_handle_defconf() 1216 { 1217 uint_t lid; 1218 1219 /*LINTED: pointer cast may result in improper alignment */ 1220 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1221 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1222 /*LINTED: pointer cast may result in improper alignment */ 1223 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1224 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1225 apic_cpus = (apic_cpus_info_t *) 1226 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1227 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1228 goto apic_handle_defconf_fail; 1229 apic_cpumask = 3; 1230 apic_nproc = 2; 1231 lid = apicadr[APIC_LID_REG]; 1232 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1233 /* 1234 * According to the PC+MP spec 1.1, the local ids 1235 * for the default configuration has to be 0 or 1 1236 */ 1237 if (apic_cpus[0].aci_local_id == 1) 1238 apic_cpus[1].aci_local_id = 0; 1239 else if (apic_cpus[0].aci_local_id == 0) 1240 apic_cpus[1].aci_local_id = 1; 1241 else 1242 goto apic_handle_defconf_fail; 1243 1244 apic_io_id[0] = 2; 1245 apic_io_max = 1; 1246 if (apic_defconf >= 5) { 1247 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1248 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1249 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1250 } else { 1251 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1252 apic_cpus[1].aci_local_ver = 0; 1253 apic_io_ver[0] = 0; 1254 } 1255 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1256 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1257 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1258 return (PSM_SUCCESS); 1259 1260 apic_handle_defconf_fail: 1261 if (apic_cpus) 1262 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1263 if (apicadr) 1264 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1265 if (apicioadr[0]) 1266 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1267 return (PSM_FAILURE); 1268 } 1269 1270 /* Parse the entries in MP configuration table and collect info that we need */ 1271 static int 1272 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1273 { 1274 struct apic_procent *procp; 1275 struct apic_bus *busp; 1276 struct apic_io_entry *ioapicp; 1277 struct apic_io_intr *intrp; 1278 volatile int32_t *ioapic; 1279 uint_t lid; 1280 int id; 1281 uchar_t hid; 1282 1283 /*LINTED: pointer cast may result in improper alignment */ 1284 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1285 1286 /* No need to count cpu entries if we won't use them */ 1287 if (!bypass_cpus_and_ioapics) { 1288 1289 /* Find max # of CPUS and allocate structure accordingly */ 1290 apic_nproc = 0; 1291 while (procp->proc_entry == APIC_CPU_ENTRY) { 1292 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1293 apic_nproc++; 1294 } 1295 procp++; 1296 } 1297 if (apic_nproc > NCPU) 1298 cmn_err(CE_WARN, "pcplusmp: exceeded " 1299 "maximum no. of CPUs (= %d)", NCPU); 1300 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1301 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1302 return (PSM_FAILURE); 1303 } 1304 1305 /*LINTED: pointer cast may result in improper alignment */ 1306 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1307 1308 /* 1309 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1310 * if we're bypassing this information, it has already been filled 1311 * in by acpi_probe(), so don't overwrite it. 1312 */ 1313 if (!bypass_cpus_and_ioapics) 1314 apic_nproc = 1; 1315 1316 while (procp->proc_entry == APIC_CPU_ENTRY) { 1317 /* check whether the cpu exists or not */ 1318 if (!bypass_cpus_and_ioapics && 1319 procp->proc_cpuflags & CPUFLAGS_EN) { 1320 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1321 lid = apicadr[APIC_LID_REG]; 1322 apic_cpus[0].aci_local_id = procp->proc_apicid; 1323 if (apic_cpus[0].aci_local_id != 1324 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1325 return (PSM_FAILURE); 1326 } 1327 apic_cpus[0].aci_local_ver = 1328 procp->proc_version; 1329 } else { 1330 1331 apic_cpus[apic_nproc].aci_local_id = 1332 procp->proc_apicid; 1333 apic_cpus[apic_nproc].aci_local_ver = 1334 procp->proc_version; 1335 apic_nproc++; 1336 1337 } 1338 } 1339 procp++; 1340 } 1341 1342 if (!bypass_cpus_and_ioapics) { 1343 /* convert the number of processors into a cpumask */ 1344 apic_cpumask = (1 << apic_nproc) - 1; 1345 } 1346 1347 /* 1348 * Save start of bus entries for later use. 1349 * Get EISA level cntrl if EISA bus is present. 1350 * Also get the CPI bus id for single CPI bus case 1351 */ 1352 apic_busp = busp = (struct apic_bus *)procp; 1353 while (busp->bus_entry == APIC_BUS_ENTRY) { 1354 lid = apic_find_bus_type((char *)&busp->bus_str1); 1355 if (lid == BUS_EISA) { 1356 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1357 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1358 } else if (lid == BUS_PCI) { 1359 /* 1360 * apic_single_pci_busid will be used only if 1361 * apic_pic_bus_total is equal to 1 1362 */ 1363 apic_pci_bus_total++; 1364 apic_single_pci_busid = busp->bus_id; 1365 } 1366 busp++; 1367 } 1368 1369 ioapicp = (struct apic_io_entry *)busp; 1370 1371 if (!bypass_cpus_and_ioapics) 1372 apic_io_max = 0; 1373 do { 1374 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1375 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1376 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1377 apic_io_ver[apic_io_max] = ioapicp->io_version; 1378 /*LINTED: pointer cast may result in improper alignment */ 1379 apicioadr[apic_io_max] = 1380 (int32_t *)psm_map_phys( 1381 (uint32_t)ioapicp->io_apic_addr, 1382 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1383 1384 if (!apicioadr[apic_io_max]) 1385 return (PSM_FAILURE); 1386 1387 ioapic = apicioadr[apic_io_max]; 1388 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1389 id = ioapic[APIC_IO_DATA]; 1390 hid = (uchar_t)(((uint_t)id) >> 24); 1391 1392 if (hid != apic_io_id[apic_io_max]) { 1393 if (apic_io_id[apic_io_max] == 0) 1394 apic_io_id[apic_io_max] = hid; 1395 else { 1396 /* 1397 * set ioapic id to whatever 1398 * reported by MPS 1399 * 1400 * may not need to set index 1401 * again ??? 1402 * take it out and try 1403 */ 1404 1405 id = ((int32_t) 1406 apic_io_id[apic_io_max]) << 1407 24; 1408 1409 ioapic[APIC_IO_REG] = 1410 APIC_ID_CMD; 1411 1412 ioapic[APIC_IO_DATA] = id; 1413 1414 } 1415 } 1416 apic_io_max++; 1417 } 1418 } 1419 ioapicp++; 1420 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1421 1422 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1423 1424 intrp = apic_io_intrp; 1425 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1426 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1427 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1428 apic_irq_translate = 1; 1429 break; 1430 } 1431 intrp++; 1432 } 1433 1434 return (PSM_SUCCESS); 1435 } 1436 1437 static struct apic_mpfps_hdr * 1438 apic_find_fps_sig(caddr_t cptr, int len) 1439 { 1440 int i; 1441 1442 /* Look for the pattern "_MP_" */ 1443 for (i = 0; i < len; i += 16) { 1444 if ((*(cptr+i) == '_') && 1445 (*(cptr+i+1) == 'M') && 1446 (*(cptr+i+2) == 'P') && 1447 (*(cptr+i+3) == '_')) 1448 /*LINTED: pointer cast may result in improper alignment */ 1449 return ((struct apic_mpfps_hdr *)(cptr + i)); 1450 } 1451 return (NULL); 1452 } 1453 1454 static int 1455 apic_checksum(caddr_t bptr, int len) 1456 { 1457 int i; 1458 uchar_t cksum; 1459 1460 cksum = 0; 1461 for (i = 0; i < len; i++) 1462 cksum += *bptr++; 1463 return ((int)cksum); 1464 } 1465 1466 1467 /* 1468 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1469 * are also set to NULL. vector->irq is set to a value which cannot map 1470 * to a real irq to show that it is free. 1471 */ 1472 void 1473 apic_init() 1474 { 1475 int i; 1476 int *iptr; 1477 1478 int j = 1; 1479 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1480 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1481 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1482 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1483 /* get to highest vector at the same ipl */ 1484 continue; 1485 for (; j <= apic_vectortoipl[i]; j++) { 1486 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1487 APIC_BASE_VECT; 1488 } 1489 } 1490 for (; j < MAXIPL + 1; j++) 1491 /* fill up any empty ipltopri slots */ 1492 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1493 1494 /* cpu 0 is always up */ 1495 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1496 1497 iptr = (int *)&apic_irq_table[0]; 1498 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1499 apic_level_intr[i] = 0; 1500 *iptr++ = NULL; 1501 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1502 apic_reprogram_info[i].valid = 0; 1503 apic_reprogram_info[i].bindcpu = 0; 1504 apic_reprogram_info[i].timeouts = 0; 1505 } 1506 1507 /* 1508 * Allocate a dummy irq table entry for the reserved entry. 1509 * This takes care of the race between removing an irq and 1510 * clock detecting a CPU in that irq during interrupt load 1511 * sampling. 1512 */ 1513 apic_irq_table[APIC_RESV_IRQ] = 1514 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1515 1516 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1517 mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL); 1518 #if defined(__amd64) 1519 /* 1520 * Make cpu-specific interrupt info point to cr8pri vector 1521 */ 1522 for (i = 0; i <= MAXIPL; i++) 1523 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1524 CPU->cpu_pri_data = apic_cr8pri; 1525 intpri_use_cr8 = 1; 1526 #endif /* __amd64 */ 1527 } 1528 1529 /* 1530 * handler for APIC Error interrupt. Just print a warning and continue 1531 */ 1532 static int 1533 apic_error_intr() 1534 { 1535 uint_t error0, error1, error; 1536 uint_t i; 1537 1538 /* 1539 * We need to write before read as per 7.4.17 of system prog manual. 1540 * We do both and or the results to be safe 1541 */ 1542 error0 = apicadr[APIC_ERROR_STATUS]; 1543 apicadr[APIC_ERROR_STATUS] = 0; 1544 error1 = apicadr[APIC_ERROR_STATUS]; 1545 error = error0 | error1; 1546 1547 /* 1548 * Prevent more than 1 CPU from handling error interrupt causing 1549 * double printing (interleave of characters from multiple 1550 * CPU's when using prom_printf) 1551 */ 1552 if (lock_try(&apic_error_lock) == 0) 1553 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1554 if (error) { 1555 #if DEBUG 1556 if (apic_debug) 1557 debug_enter("pcplusmp: APIC Error interrupt received"); 1558 #endif /* DEBUG */ 1559 if (apic_panic_on_apic_error) 1560 cmn_err(CE_PANIC, 1561 "APIC Error interrupt on CPU %d. Status = %x\n", 1562 psm_get_cpu_id(), error); 1563 else { 1564 if ((error & ~APIC_CS_ERRORS) == 0) { 1565 /* cksum error only */ 1566 apic_error |= APIC_ERR_APIC_ERROR; 1567 apic_apic_error |= error; 1568 apic_num_apic_errors++; 1569 apic_num_cksum_errors++; 1570 } else { 1571 /* 1572 * prom_printf is the best shot we have of 1573 * something which is problem free from 1574 * high level/NMI type of interrupts 1575 */ 1576 prom_printf("APIC Error interrupt on CPU %d. " 1577 "Status 0 = %x, Status 1 = %x\n", 1578 psm_get_cpu_id(), error0, error1); 1579 apic_error |= APIC_ERR_APIC_ERROR; 1580 apic_apic_error |= error; 1581 apic_num_apic_errors++; 1582 for (i = 0; i < apic_error_display_delay; i++) { 1583 tenmicrosec(); 1584 } 1585 /* 1586 * provide more delay next time limited to 1587 * roughly 1 clock tick time 1588 */ 1589 if (apic_error_display_delay < 500) 1590 apic_error_display_delay *= 2; 1591 } 1592 } 1593 lock_clear(&apic_error_lock); 1594 return (DDI_INTR_CLAIMED); 1595 } else { 1596 lock_clear(&apic_error_lock); 1597 return (DDI_INTR_UNCLAIMED); 1598 } 1599 /* NOTREACHED */ 1600 } 1601 1602 /* 1603 * Turn off the mask bit in the performance counter Local Vector Table entry. 1604 */ 1605 static void 1606 apic_cpcovf_mask_clear(void) 1607 { 1608 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1609 } 1610 1611 static void 1612 apic_init_intr() 1613 { 1614 processorid_t cpun = psm_get_cpu_id(); 1615 1616 #if defined(__amd64) 1617 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1618 #else 1619 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1620 #endif 1621 1622 if (apic_flat_model) 1623 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1624 else 1625 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1626 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1627 1628 /* need to enable APIC before unmasking NMI */ 1629 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1630 1631 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1632 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1633 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1634 1635 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1636 return; 1637 1638 /* Enable performance counter overflow interrupt */ 1639 1640 if ((x86_feature & X86_MSR) != X86_MSR) 1641 apic_enable_cpcovf_intr = 0; 1642 if (apic_enable_cpcovf_intr) { 1643 if (apic_cpcovf_vect == 0) { 1644 int ipl = APIC_PCINT_IPL; 1645 int irq = apic_get_ipivect(ipl, -1); 1646 1647 ASSERT(irq != -1); 1648 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1649 ASSERT(apic_cpcovf_vect); 1650 (void) add_avintr(NULL, ipl, 1651 (avfunc)kcpc_hw_overflow_intr, 1652 "apic pcint", irq, NULL, NULL, NULL); 1653 kcpc_hw_overflow_intr_installed = 1; 1654 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1655 } 1656 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1657 } 1658 1659 /* Enable error interrupt */ 1660 1661 if (apic_enable_error_intr) { 1662 if (apic_errvect == 0) { 1663 int ipl = 0xf; /* get highest priority intr */ 1664 int irq = apic_get_ipivect(ipl, -1); 1665 1666 ASSERT(irq != -1); 1667 apic_errvect = apic_irq_table[irq]->airq_vector; 1668 ASSERT(apic_errvect); 1669 /* 1670 * Not PSMI compliant, but we are going to merge 1671 * with ON anyway 1672 */ 1673 (void) add_avintr((void *)NULL, ipl, 1674 (avfunc)apic_error_intr, "apic error intr", 1675 irq, NULL, NULL, NULL); 1676 } 1677 apicadr[APIC_ERR_VECT] = apic_errvect; 1678 apicadr[APIC_ERROR_STATUS] = 0; 1679 apicadr[APIC_ERROR_STATUS] = 0; 1680 } 1681 } 1682 1683 static void 1684 apic_disable_local_apic() 1685 { 1686 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1687 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1688 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1689 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1690 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1691 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1692 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1693 } 1694 1695 static void 1696 apic_picinit(void) 1697 { 1698 int i, j; 1699 uint_t isr; 1700 volatile int32_t *ioapic; 1701 apic_irq_t *irqptr; 1702 1703 /* 1704 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1705 * bit on without clearing it with EOI. Since softint 1706 * uses vector 0x20 to interrupt itself, so softint will 1707 * not work on this machine. In order to fix this problem 1708 * a check is made to verify all the isr bits are clear. 1709 * If not, EOIs are issued to clear the bits. 1710 */ 1711 for (i = 7; i >= 1; i--) { 1712 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1713 for (j = 0; ((j < 32) && (isr != 0)); j++) 1714 if (isr & (1 << j)) { 1715 apicadr[APIC_EOI_REG] = 0; 1716 isr &= ~(1 << j); 1717 apic_error |= APIC_ERR_BOOT_EOI; 1718 } 1719 } 1720 1721 /* set a flag so we know we have run apic_picinit() */ 1722 apic_flag = 1; 1723 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1724 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1725 LOCK_INIT_CLEAR(&apic_revector_lock); 1726 LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock); 1727 LOCK_INIT_CLEAR(&apic_error_lock); 1728 1729 picsetup(); /* initialise the 8259 */ 1730 1731 /* add nmi handler - least priority nmi handler */ 1732 LOCK_INIT_CLEAR(&apic_nmi_lock); 1733 1734 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1735 "pcplusmp NMI handler", (caddr_t)NULL)) 1736 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1737 1738 apic_init_intr(); 1739 1740 /* enable apic mode if imcr present */ 1741 if (apic_imcrp) { 1742 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1743 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1744 } 1745 1746 /* mask interrupt vectors */ 1747 for (j = 0; j < apic_io_max; j++) { 1748 int intin_max; 1749 ioapic = apicioadr[j]; 1750 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1751 /* Bits 23-16 define the maximum redirection entries */ 1752 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1753 for (i = 0; i < intin_max; i++) { 1754 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1755 ioapic[APIC_IO_DATA] = AV_MASK; 1756 } 1757 } 1758 1759 /* 1760 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1761 */ 1762 if (apic_sci_vect >= 0) { 1763 /* 1764 * acpica has already done add_avintr(); we just 1765 * to finish the job by mimicing translate_irq() 1766 */ 1767 if (apic_setup_sci_irq_table(apic_sci_vect, SCI_IPL, 1768 &apic_sci_flags) < 0) { 1769 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1770 return; 1771 } 1772 irqptr = apic_irq_table[apic_sci_vect]; 1773 1774 /* Assert we're the sole entry in the list */ 1775 ASSERT(irqptr != NULL); 1776 ASSERT(irqptr->airq_next == NULL); 1777 1778 /* Program I/O APIC */ 1779 (void) apic_setup_io_intr(irqptr, apic_sci_vect); 1780 } 1781 } 1782 1783 1784 static void 1785 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1786 { 1787 int loop_count; 1788 uint32_t vector; 1789 uint_t cpu_id, iflag; 1790 1791 cpu_id = apic_cpus[cpun].aci_local_id; 1792 1793 apic_cmos_ssb_set = 1; 1794 1795 /* 1796 * Interrupts on BSP cpu will be disabled during these startup 1797 * steps in order to avoid unwanted side effects from 1798 * executing interrupt handlers on a problematic BIOS. 1799 */ 1800 1801 iflag = intr_clear(); 1802 outb(CMOS_ADDR, SSB); 1803 outb(CMOS_DATA, BIOS_SHUTDOWN); 1804 1805 while (get_apic_cmd1() & AV_PENDING) 1806 apic_ret(); 1807 1808 /* for integrated - make sure there is one INIT IPI in buffer */ 1809 /* for external - it will wake up the cpu */ 1810 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1811 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1812 1813 /* If only 1 CPU is installed, PENDING bit will not go low */ 1814 for (loop_count = 0x1000; loop_count; loop_count--) 1815 if (get_apic_cmd1() & AV_PENDING) 1816 apic_ret(); 1817 else 1818 break; 1819 1820 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1821 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1822 1823 drv_usecwait(20000); /* 20 milli sec */ 1824 1825 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1826 /* integrated apic */ 1827 1828 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1829 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1830 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1831 1832 /* to offset the INIT IPI queue up in the buffer */ 1833 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1834 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1835 1836 drv_usecwait(200); /* 20 micro sec */ 1837 1838 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1839 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1840 1841 drv_usecwait(200); /* 20 micro sec */ 1842 } 1843 intr_restore(iflag); 1844 } 1845 1846 1847 #ifdef DEBUG 1848 int apic_break_on_cpu = 9; 1849 int apic_stretch_interrupts = 0; 1850 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1851 1852 void 1853 apic_break() 1854 { 1855 } 1856 #endif /* DEBUG */ 1857 1858 /* 1859 * platform_intr_enter 1860 * 1861 * Called at the beginning of the interrupt service routine to 1862 * mask all level equal to and below the interrupt priority 1863 * of the interrupting vector. An EOI should be given to 1864 * the interrupt controller to enable other HW interrupts. 1865 * 1866 * Return -1 for spurious interrupts 1867 * 1868 */ 1869 /*ARGSUSED*/ 1870 static int 1871 apic_intr_enter(int ipl, int *vectorp) 1872 { 1873 uchar_t vector; 1874 int nipl; 1875 int irq, iflag; 1876 apic_cpus_info_t *cpu_infop; 1877 1878 /* 1879 * The real vector programmed in APIC is *vectorp + 0x20 1880 * But, cmnint code subtracts 0x20 before pushing it. 1881 * Hence APIC_BASE_VECT is 0x20. 1882 */ 1883 1884 vector = (uchar_t)*vectorp; 1885 1886 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1887 if (vector == apic_clkvect) { 1888 if (!apic_oneshot) { 1889 /* NOTE: this is not MT aware */ 1890 apic_hrtime_stamp++; 1891 apic_nsec_since_boot += apic_nsec_per_intr; 1892 apic_hrtime_stamp++; 1893 last_count_read = apic_hertz_count; 1894 apic_redistribute_compute(); 1895 } 1896 1897 /* We will avoid all the book keeping overhead for clock */ 1898 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1899 #if defined(__amd64) 1900 setcr8((ulong_t)apic_cr8pri[nipl]); 1901 #else 1902 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1903 #endif 1904 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1905 apicadr[APIC_EOI_REG] = 0; 1906 return (nipl); 1907 } 1908 1909 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1910 1911 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1912 cpu_infop->aci_spur_cnt++; 1913 return (APIC_INT_SPURIOUS); 1914 } 1915 1916 /* Check if the vector we got is really what we need */ 1917 if (apic_revector_pending) { 1918 /* 1919 * Disable interrupts for the duration of 1920 * the vector translation to prevent a self-race for 1921 * the apic_revector_lock. This cannot be done 1922 * in apic_xlate_vector because it is recursive and 1923 * we want the vector translation to be atomic with 1924 * respect to other (higher-priority) interrupts. 1925 */ 1926 iflag = intr_clear(); 1927 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1928 APIC_BASE_VECT; 1929 intr_restore(iflag); 1930 } 1931 1932 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1933 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1934 1935 #if defined(__amd64) 1936 setcr8((ulong_t)apic_cr8pri[nipl]); 1937 #else 1938 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1939 #endif 1940 1941 cpu_infop->aci_current[nipl] = (uchar_t)irq; 1942 cpu_infop->aci_curipl = (uchar_t)nipl; 1943 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 1944 1945 /* 1946 * apic_level_intr could have been assimilated into the irq struct. 1947 * but, having it as a character array is more efficient in terms of 1948 * cache usage. So, we leave it as is. 1949 */ 1950 if (!apic_level_intr[irq]) 1951 apicadr[APIC_EOI_REG] = 0; 1952 1953 #ifdef DEBUG 1954 APIC_DEBUG_BUF_PUT(vector); 1955 APIC_DEBUG_BUF_PUT(irq); 1956 APIC_DEBUG_BUF_PUT(nipl); 1957 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 1958 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 1959 drv_usecwait(apic_stretch_interrupts); 1960 1961 if (apic_break_on_cpu == psm_get_cpu_id()) 1962 apic_break(); 1963 #endif /* DEBUG */ 1964 return (nipl); 1965 } 1966 1967 static void 1968 apic_intr_exit(int prev_ipl, int irq) 1969 { 1970 apic_cpus_info_t *cpu_infop; 1971 1972 #if defined(__amd64) 1973 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 1974 #else 1975 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 1976 #endif 1977 1978 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1979 if (apic_level_intr[irq]) 1980 apicadr[APIC_EOI_REG] = 0; 1981 1982 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 1983 /* ISR above current pri could not be in progress */ 1984 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 1985 } 1986 1987 /* 1988 * Mask all interrupts below or equal to the given IPL 1989 */ 1990 static void 1991 apic_setspl(int ipl) 1992 { 1993 1994 #if defined(__amd64) 1995 setcr8((ulong_t)apic_cr8pri[ipl]); 1996 #else 1997 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 1998 #endif 1999 2000 /* interrupts at ipl above this cannot be in progress */ 2001 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2002 /* 2003 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2004 * have enough time to come in before the priority is raised again 2005 * during the idle() loop. 2006 */ 2007 if (apic_setspl_delay) 2008 (void) get_apic_pri(); 2009 } 2010 2011 /* 2012 * trigger a software interrupt at the given IPL 2013 */ 2014 static void 2015 apic_set_softintr(int ipl) 2016 { 2017 int vector; 2018 uint_t flag; 2019 2020 vector = apic_resv_vector[ipl]; 2021 2022 flag = intr_clear(); 2023 2024 while (get_apic_cmd1() & AV_PENDING) 2025 apic_ret(); 2026 2027 /* generate interrupt at vector on itself only */ 2028 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2029 2030 intr_restore(flag); 2031 } 2032 2033 /* 2034 * generates an interprocessor interrupt to another CPU 2035 */ 2036 static void 2037 apic_send_ipi(int cpun, int ipl) 2038 { 2039 int vector; 2040 uint_t flag; 2041 2042 vector = apic_resv_vector[ipl]; 2043 2044 flag = intr_clear(); 2045 2046 while (get_apic_cmd1() & AV_PENDING) 2047 apic_ret(); 2048 2049 apicadr[APIC_INT_CMD2] = 2050 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2051 apicadr[APIC_INT_CMD1] = vector; 2052 2053 intr_restore(flag); 2054 } 2055 2056 2057 /*ARGSUSED*/ 2058 static void 2059 apic_set_idlecpu(processorid_t cpun) 2060 { 2061 } 2062 2063 /*ARGSUSED*/ 2064 static void 2065 apic_unset_idlecpu(processorid_t cpun) 2066 { 2067 } 2068 2069 2070 static void 2071 apic_ret() 2072 { 2073 } 2074 2075 static int 2076 get_apic_cmd1() 2077 { 2078 return (apicadr[APIC_INT_CMD1]); 2079 } 2080 2081 static int 2082 get_apic_pri() 2083 { 2084 #if defined(__amd64) 2085 return ((int)getcr8()); 2086 #else 2087 return (apicadr[APIC_TASK_REG]); 2088 #endif 2089 } 2090 2091 /* 2092 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2093 * apic_gethrtime(). This is used for performance instead of accuracy. 2094 */ 2095 2096 static hrtime_t 2097 apic_gettime() 2098 { 2099 int old_hrtime_stamp; 2100 hrtime_t temp; 2101 2102 /* 2103 * In one-shot mode, we do not keep time, so if anyone 2104 * calls psm_gettime() directly, we vector over to 2105 * gethrtime(). 2106 * one-shot mode MUST NOT be enabled if this psm is the source of 2107 * hrtime. 2108 */ 2109 2110 if (apic_oneshot) 2111 return (gethrtime()); 2112 2113 2114 gettime_again: 2115 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2116 apic_ret(); 2117 2118 temp = apic_nsec_since_boot; 2119 2120 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2121 goto gettime_again; 2122 } 2123 return (temp); 2124 } 2125 2126 /* 2127 * Here we return the number of nanoseconds since booting. Note every 2128 * clock interrupt increments apic_nsec_since_boot by the appropriate 2129 * amount. 2130 */ 2131 static hrtime_t 2132 apic_gethrtime() 2133 { 2134 int curr_timeval, countval, elapsed_ticks, oflags; 2135 int old_hrtime_stamp, status; 2136 hrtime_t temp; 2137 uchar_t cpun; 2138 2139 2140 /* 2141 * In one-shot mode, we do not keep time, so if anyone 2142 * calls psm_gethrtime() directly, we vector over to 2143 * gethrtime(). 2144 * one-shot mode MUST NOT be enabled if this psm is the source of 2145 * hrtime. 2146 */ 2147 2148 if (apic_oneshot) 2149 return (gethrtime()); 2150 2151 oflags = intr_clear(); /* prevent migration */ 2152 2153 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2154 2155 lock_set(&apic_gethrtime_lock); 2156 2157 gethrtime_again: 2158 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2159 apic_ret(); 2160 2161 /* 2162 * Check to see which CPU we are on. Note the time is kept on 2163 * the local APIC of CPU 0. If on CPU 0, simply read the current 2164 * counter. If on another CPU, issue a remote read command to CPU 0. 2165 */ 2166 if (cpun == apic_cpus[0].aci_local_id) { 2167 countval = apicadr[APIC_CURR_COUNT]; 2168 } else { 2169 while (get_apic_cmd1() & AV_PENDING) 2170 apic_ret(); 2171 2172 apicadr[APIC_INT_CMD2] = 2173 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2174 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2175 2176 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2177 apic_ret(); 2178 2179 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2180 countval = apicadr[APIC_REMOTE_READ]; 2181 else { /* 0 = invalid */ 2182 apic_remote_hrterr++; 2183 /* 2184 * return last hrtime right now, will need more 2185 * testing if change to retry 2186 */ 2187 temp = apic_last_hrtime; 2188 2189 lock_clear(&apic_gethrtime_lock); 2190 2191 intr_restore(oflags); 2192 2193 return (temp); 2194 } 2195 } 2196 if (countval > last_count_read) 2197 countval = 0; 2198 else 2199 last_count_read = countval; 2200 2201 elapsed_ticks = apic_hertz_count - countval; 2202 2203 curr_timeval = elapsed_ticks * apic_nsec_per_tick; 2204 temp = apic_nsec_since_boot + curr_timeval; 2205 2206 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2207 /* we might have clobbered last_count_read. Restore it */ 2208 last_count_read = apic_hertz_count; 2209 goto gethrtime_again; 2210 } 2211 2212 if (temp < apic_last_hrtime) { 2213 /* return last hrtime if error occurs */ 2214 apic_hrtime_error++; 2215 temp = apic_last_hrtime; 2216 } 2217 else 2218 apic_last_hrtime = temp; 2219 2220 lock_clear(&apic_gethrtime_lock); 2221 intr_restore(oflags); 2222 2223 return (temp); 2224 } 2225 2226 /* apic NMI handler */ 2227 /*ARGSUSED*/ 2228 static void 2229 apic_nmi_intr(caddr_t arg) 2230 { 2231 if (apic_shutdown_processors) { 2232 apic_disable_local_apic(); 2233 return; 2234 } 2235 2236 if (lock_try(&apic_nmi_lock)) { 2237 if (apic_kmdb_on_nmi) { 2238 if (psm_debugger() == 0) { 2239 cmn_err(CE_PANIC, 2240 "NMI detected, kmdb is not available."); 2241 } else { 2242 debug_enter("\nNMI detected, entering kmdb.\n"); 2243 } 2244 } else { 2245 if (apic_panic_on_nmi) { 2246 /* Keep panic from entering kmdb. */ 2247 nopanicdebug = 1; 2248 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2249 } else { 2250 /* 2251 * prom_printf is the best shot we have 2252 * of something which is problem free from 2253 * high level/NMI type of interrupts 2254 */ 2255 prom_printf("pcplusmp: NMI received\n"); 2256 apic_error |= APIC_ERR_NMI; 2257 apic_num_nmis++; 2258 } 2259 } 2260 lock_clear(&apic_nmi_lock); 2261 } 2262 } 2263 2264 /* 2265 * Add mask bits to disable interrupt vector from happening 2266 * at or above IPL. In addition, it should remove mask bits 2267 * to enable interrupt vectors below the given IPL. 2268 * 2269 * Both add and delspl are complicated by the fact that different interrupts 2270 * may share IRQs. This can happen in two ways. 2271 * 1. The same H/W line is shared by more than 1 device 2272 * 1a. with interrupts at different IPLs 2273 * 1b. with interrupts at same IPL 2274 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2275 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2276 * will get called often when no interrupt is pending for the device. 2277 * For 1a, we just hope that the machine blows up with the person who 2278 * set it up that way!. In the meantime, we handle it at the higher IPL. 2279 */ 2280 /*ARGSUSED*/ 2281 static int 2282 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2283 { 2284 uchar_t vector; 2285 int iflag; 2286 apic_irq_t *irqptr, *irqheadptr; 2287 int irqindex; 2288 2289 ASSERT(max_ipl <= UCHAR_MAX); 2290 irqindex = IRQINDEX(irqno); 2291 2292 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2293 return (PSM_FAILURE); 2294 2295 irqptr = irqheadptr = apic_irq_table[irqindex]; 2296 2297 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2298 "vector=0x%x\n", (void *)irqptr->airq_dip, 2299 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2300 2301 while (irqptr) { 2302 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2303 break; 2304 irqptr = irqptr->airq_next; 2305 } 2306 irqptr->airq_share++; 2307 2308 /* return if it is not hardware interrupt */ 2309 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2310 return (PSM_SUCCESS); 2311 2312 /* Or if there are more interupts at a higher IPL */ 2313 if (ipl != max_ipl) 2314 return (PSM_SUCCESS); 2315 2316 /* 2317 * if apic_picinit() has not been called yet, just return. 2318 * At the end of apic_picinit(), we will call setup_io_intr(). 2319 */ 2320 2321 if (!apic_flag) 2322 return (PSM_SUCCESS); 2323 2324 iflag = intr_clear(); 2325 2326 /* 2327 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2328 * return failure. Not very elegant, but then we hope the 2329 * machine will blow up with ... 2330 */ 2331 if (irqptr->airq_ipl != max_ipl) { 2332 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2333 if (vector == 0) { 2334 intr_restore(iflag); 2335 irqptr->airq_share--; 2336 return (PSM_FAILURE); 2337 } 2338 irqptr = irqheadptr; 2339 apic_mark_vector(irqptr->airq_vector, vector); 2340 while (irqptr) { 2341 irqptr->airq_vector = vector; 2342 irqptr->airq_ipl = (uchar_t)max_ipl; 2343 /* 2344 * reprogram irq being added and every one else 2345 * who is not in the UNINIT state 2346 */ 2347 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2348 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2349 apic_record_rdt_entry(irqptr, irqindex); 2350 (void) apic_setup_io_intr(irqptr, irqindex); 2351 } 2352 irqptr = irqptr->airq_next; 2353 } 2354 intr_restore(iflag); 2355 return (PSM_SUCCESS); 2356 } 2357 2358 ASSERT(irqptr); 2359 (void) apic_setup_io_intr(irqptr, irqindex); 2360 intr_restore(iflag); 2361 return (PSM_SUCCESS); 2362 } 2363 2364 /* 2365 * Recompute mask bits for the given interrupt vector. 2366 * If there is no interrupt servicing routine for this 2367 * vector, this function should disable interrupt vector 2368 * from happening at all IPLs. If there are still 2369 * handlers using the given vector, this function should 2370 * disable the given vector from happening below the lowest 2371 * IPL of the remaining hadlers. 2372 */ 2373 /*ARGSUSED*/ 2374 static int 2375 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2376 { 2377 uchar_t vector, bind_cpu; 2378 int iflag, intin, irqindex; 2379 volatile int32_t *ioapic; 2380 apic_irq_t *irqptr, *irqheadptr; 2381 2382 irqindex = IRQINDEX(irqno); 2383 irqptr = irqheadptr = apic_irq_table[irqindex]; 2384 2385 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2386 "vector=0x%x\n", (void *)irqptr->airq_dip, 2387 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2388 2389 while (irqptr) { 2390 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2391 break; 2392 irqptr = irqptr->airq_next; 2393 } 2394 ASSERT(irqptr); 2395 2396 irqptr->airq_share--; 2397 2398 if (ipl < max_ipl) 2399 return (PSM_SUCCESS); 2400 2401 /* return if it is not hardware interrupt */ 2402 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2403 return (PSM_SUCCESS); 2404 2405 if (!apic_flag) { 2406 /* 2407 * Clear irq_struct. If two devices shared an intpt 2408 * line & 1 unloaded before picinit, we are hosed. But, then 2409 * we hope the machine will ... 2410 */ 2411 irqptr->airq_mps_intr_index = FREE_INDEX; 2412 irqptr->airq_temp_cpu = IRQ_UNINIT; 2413 apic_free_vector(irqptr->airq_vector); 2414 return (PSM_SUCCESS); 2415 } 2416 /* 2417 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2418 * use old IPL. Not very elegant, but then we hope ... 2419 */ 2420 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2421 apic_irq_t *irqp; 2422 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2423 apic_mark_vector(irqheadptr->airq_vector, vector); 2424 irqp = irqheadptr; 2425 while (irqp) { 2426 irqp->airq_vector = vector; 2427 irqp->airq_ipl = (uchar_t)max_ipl; 2428 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2429 apic_record_rdt_entry(irqp, irqindex); 2430 (void) apic_setup_io_intr(irqp, 2431 irqindex); 2432 } 2433 irqp = irqp->airq_next; 2434 } 2435 } 2436 } 2437 2438 if (irqptr->airq_share) 2439 return (PSM_SUCCESS); 2440 2441 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2442 intin = irqptr->airq_intin_no; 2443 iflag = intr_clear(); 2444 lock_set(&apic_ioapic_lock); 2445 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2446 ioapic[APIC_IO_DATA] = AV_MASK; 2447 2448 /* Disable the MSI/X vector */ 2449 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2450 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2451 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2452 2453 /* 2454 * Make sure we only disable on the last 2455 * of the multi-MSI support 2456 */ 2457 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2458 (void) pci_msi_unconfigure(irqptr->airq_dip, type, 2459 irqptr->airq_ioapicindex); 2460 2461 (void) pci_msi_disable_mode(irqptr->airq_dip, type, 2462 irqptr->airq_ioapicindex); 2463 } 2464 } 2465 2466 if (max_ipl == PSM_INVALID_IPL) { 2467 ASSERT(irqheadptr == irqptr); 2468 bind_cpu = irqptr->airq_temp_cpu; 2469 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2470 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2471 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2472 if (bind_cpu & IRQ_USER_BOUND) { 2473 /* If hardbound, temp_cpu == cpu */ 2474 bind_cpu &= ~IRQ_USER_BOUND; 2475 apic_cpus[bind_cpu].aci_bound--; 2476 } else 2477 apic_cpus[bind_cpu].aci_temp_bound--; 2478 } 2479 lock_clear(&apic_ioapic_lock); 2480 intr_restore(iflag); 2481 irqptr->airq_temp_cpu = IRQ_UNINIT; 2482 irqptr->airq_mps_intr_index = FREE_INDEX; 2483 apic_free_vector(irqptr->airq_vector); 2484 return (PSM_SUCCESS); 2485 } 2486 lock_clear(&apic_ioapic_lock); 2487 intr_restore(iflag); 2488 2489 mutex_enter(&airq_mutex); 2490 if ((irqptr == apic_irq_table[irqindex])) { 2491 apic_irq_t *oldirqptr; 2492 /* Move valid irq entry to the head */ 2493 irqheadptr = oldirqptr = irqptr; 2494 irqptr = irqptr->airq_next; 2495 ASSERT(irqptr); 2496 while (irqptr) { 2497 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2498 break; 2499 oldirqptr = irqptr; 2500 irqptr = irqptr->airq_next; 2501 } 2502 /* remove all invalid ones from the beginning */ 2503 apic_irq_table[irqindex] = irqptr; 2504 /* 2505 * and link them back after the head. The invalid ones 2506 * begin with irqheadptr and end at oldirqptr 2507 */ 2508 oldirqptr->airq_next = irqptr->airq_next; 2509 irqptr->airq_next = irqheadptr; 2510 } 2511 mutex_exit(&airq_mutex); 2512 2513 irqptr->airq_temp_cpu = IRQ_UNINIT; 2514 irqptr->airq_mps_intr_index = FREE_INDEX; 2515 return (PSM_SUCCESS); 2516 } 2517 2518 /* 2519 * Return HW interrupt number corresponding to the given IPL 2520 */ 2521 /*ARGSUSED*/ 2522 static int 2523 apic_softlvl_to_irq(int ipl) 2524 { 2525 /* 2526 * Do not use apic to trigger soft interrupt. 2527 * It will cause the system to hang when 2 hardware interrupts 2528 * at the same priority with the softint are already accepted 2529 * by the apic. Cause the AV_PENDING bit will not be cleared 2530 * until one of the hardware interrupt is eoi'ed. If we need 2531 * to send an ipi at this time, we will end up looping forever 2532 * to wait for the AV_PENDING bit to clear. 2533 */ 2534 return (PSM_SV_SOFTWARE); 2535 } 2536 2537 static int 2538 apic_post_cpu_start() 2539 { 2540 int i, cpun; 2541 apic_irq_t *irq_ptr; 2542 2543 apic_init_intr(); 2544 2545 /* 2546 * since some systems don't enable the internal cache on the non-boot 2547 * cpus, so we have to enable them here 2548 */ 2549 setcr0(getcr0() & ~(0x60000000)); 2550 2551 while (get_apic_cmd1() & AV_PENDING) 2552 apic_ret(); 2553 2554 cpun = psm_get_cpu_id(); 2555 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2556 2557 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2558 irq_ptr = apic_irq_table[i]; 2559 if ((irq_ptr == NULL) || 2560 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2561 continue; 2562 2563 while (irq_ptr) { 2564 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) 2565 (void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE); 2566 irq_ptr = irq_ptr->airq_next; 2567 } 2568 } 2569 2570 return (PSM_SUCCESS); 2571 } 2572 2573 processorid_t 2574 apic_get_next_processorid(processorid_t cpu_id) 2575 { 2576 2577 int i; 2578 2579 if (cpu_id == -1) 2580 return ((processorid_t)0); 2581 2582 for (i = cpu_id + 1; i < NCPU; i++) { 2583 if (apic_cpumask & (1 << i)) 2584 return (i); 2585 } 2586 2587 return ((processorid_t)-1); 2588 } 2589 2590 2591 /* 2592 * type == -1 indicates it is an internal request. Do not change 2593 * resv_vector for these requests 2594 */ 2595 static int 2596 apic_get_ipivect(int ipl, int type) 2597 { 2598 uchar_t vector; 2599 int irq; 2600 2601 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2602 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2603 apic_irq_table[irq]->airq_mps_intr_index = 2604 RESERVE_INDEX; 2605 apic_irq_table[irq]->airq_vector = vector; 2606 if (type != -1) { 2607 apic_resv_vector[ipl] = vector; 2608 } 2609 return (irq); 2610 } 2611 } 2612 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2613 return (-1); /* shouldn't happen */ 2614 } 2615 2616 static int 2617 apic_getclkirq(int ipl) 2618 { 2619 int irq; 2620 2621 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2622 return (-1); 2623 /* 2624 * Note the vector in apic_clkvect for per clock handling. 2625 */ 2626 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2627 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2628 apic_clkvect)); 2629 return (irq); 2630 } 2631 2632 /* 2633 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2634 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2635 */ 2636 static uint_t 2637 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2638 { 2639 uint8_t pit_tick_lo; 2640 uint16_t pit_tick, target_pit_tick; 2641 uint32_t start_apic_tick, end_apic_tick; 2642 int iflag; 2643 2644 addr += APIC_CURR_COUNT; 2645 2646 iflag = intr_clear(); 2647 2648 do { 2649 pit_tick_lo = inb(PITCTR0_PORT); 2650 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2651 } while (pit_tick < APIC_TIME_MIN || 2652 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2653 2654 /* 2655 * Wait for the 8254 to decrement by 5 ticks to ensure 2656 * we didn't start in the middle of a tick. 2657 * Compare with 0x10 for the wrap around case. 2658 */ 2659 target_pit_tick = pit_tick - 5; 2660 do { 2661 pit_tick_lo = inb(PITCTR0_PORT); 2662 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2663 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2664 2665 start_apic_tick = *addr; 2666 2667 /* 2668 * Wait for the 8254 to decrement by 2669 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2670 */ 2671 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2672 do { 2673 pit_tick_lo = inb(PITCTR0_PORT); 2674 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2675 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2676 2677 end_apic_tick = *addr; 2678 2679 *pit_ticks_adj = target_pit_tick - pit_tick; 2680 2681 intr_restore(iflag); 2682 2683 return (start_apic_tick - end_apic_tick); 2684 } 2685 2686 /* 2687 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2688 * frequency. Note at this stage in the boot sequence, the boot processor 2689 * is the only active processor. 2690 * hertz value of 0 indicates a one-shot mode request. In this case 2691 * the function returns the resolution (in nanoseconds) for the hardware 2692 * timer interrupt. If one-shot mode capability is not available, 2693 * the return value will be 0. apic_enable_oneshot is a global switch 2694 * for disabling the functionality. 2695 * A non-zero positive value for hertz indicates a periodic mode request. 2696 * In this case the hardware will be programmed to generate clock interrupts 2697 * at hertz frequency and returns the resolution of interrupts in 2698 * nanosecond. 2699 */ 2700 2701 static int 2702 apic_clkinit(int hertz) 2703 { 2704 2705 uint_t apic_ticks = 0; 2706 uint_t pit_time; 2707 int ret; 2708 uint16_t pit_ticks_adj; 2709 static int firsttime = 1; 2710 2711 if (firsttime) { 2712 /* first time calibrate */ 2713 2714 apicadr[APIC_DIVIDE_REG] = 0x0; 2715 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; 2716 2717 /* set periodic interrupt based on CLKIN */ 2718 apicadr[APIC_LOCAL_TIMER] = 2719 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2720 tenmicrosec(); 2721 2722 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2723 2724 apicadr[APIC_LOCAL_TIMER] = 2725 (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 2726 /* 2727 * pit time is the amount of real time (in nanoseconds ) it took 2728 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks 2729 */ 2730 pit_time = ((longlong_t)(APIC_TIME_COUNT + 2731 pit_ticks_adj) * NANOSEC) / PIT_HZ; 2732 2733 /* 2734 * Determine the number of nanoseconds per APIC clock tick 2735 * and then determine how many APIC ticks to interrupt at the 2736 * desired frequency 2737 */ 2738 apic_nsec_per_tick = pit_time / apic_ticks; 2739 if (apic_nsec_per_tick == 0) 2740 apic_nsec_per_tick = 1; 2741 2742 /* the interval timer initial count is 32 bit max */ 2743 apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL; 2744 firsttime = 0; 2745 } 2746 2747 if (hertz != 0) { 2748 /* periodic */ 2749 apic_nsec_per_intr = NANOSEC / hertz; 2750 apic_hertz_count = (longlong_t)apic_nsec_per_intr / 2751 apic_nsec_per_tick; 2752 apic_sample_factor_redistribution = hertz + 1; 2753 } 2754 2755 apic_int_busy_mark = (apic_int_busy_mark * 2756 apic_sample_factor_redistribution) / 100; 2757 apic_int_free_mark = (apic_int_free_mark * 2758 apic_sample_factor_redistribution) / 100; 2759 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2760 apic_sample_factor_redistribution) / 100; 2761 2762 if (hertz == 0) { 2763 /* requested one_shot */ 2764 if (!apic_oneshot_enable) 2765 return (0); 2766 apic_oneshot = 1; 2767 ret = (int)apic_nsec_per_tick; 2768 } else { 2769 /* program the local APIC to interrupt at the given frequency */ 2770 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2771 apicadr[APIC_LOCAL_TIMER] = 2772 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2773 apic_oneshot = 0; 2774 ret = NANOSEC / hertz; 2775 } 2776 2777 return (ret); 2778 2779 } 2780 2781 /* 2782 * apic_preshutdown: 2783 * Called early in shutdown whilst we can still access filesystems to do 2784 * things like loading modules which will be required to complete shutdown 2785 * after filesystems are all unmounted. 2786 */ 2787 static void 2788 apic_preshutdown(int cmd, int fcn) 2789 { 2790 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2791 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2792 2793 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2794 return; 2795 } 2796 } 2797 2798 static void 2799 apic_shutdown(int cmd, int fcn) 2800 { 2801 int iflag, restarts, attempts; 2802 int i, j; 2803 volatile int32_t *ioapic; 2804 uchar_t byte; 2805 2806 /* Send NMI to all CPUs except self to do per processor shutdown */ 2807 iflag = intr_clear(); 2808 while (get_apic_cmd1() & AV_PENDING) 2809 apic_ret(); 2810 apic_shutdown_processors = 1; 2811 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2812 2813 /* restore cmos shutdown byte before reboot */ 2814 if (apic_cmos_ssb_set) { 2815 outb(CMOS_ADDR, SSB); 2816 outb(CMOS_DATA, 0); 2817 } 2818 /* Disable the I/O APIC redirection entries */ 2819 for (j = 0; j < apic_io_max; j++) { 2820 int intin_max; 2821 ioapic = apicioadr[j]; 2822 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2823 /* Bits 23-16 define the maximum redirection entries */ 2824 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2825 for (i = 0; i < intin_max; i++) { 2826 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2827 ioapic[APIC_IO_DATA] = AV_MASK; 2828 } 2829 } 2830 2831 /* disable apic mode if imcr present */ 2832 if (apic_imcrp) { 2833 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2834 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2835 } 2836 2837 apic_disable_local_apic(); 2838 2839 intr_restore(iflag); 2840 2841 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2842 return; 2843 } 2844 2845 switch (apic_poweroff_method) { 2846 case APIC_POWEROFF_VIA_RTC: 2847 2848 /* select the extended NVRAM bank in the RTC */ 2849 outb(CMOS_ADDR, RTC_REGA); 2850 byte = inb(CMOS_DATA); 2851 outb(CMOS_DATA, (byte | EXT_BANK)); 2852 2853 outb(CMOS_ADDR, PFR_REG); 2854 2855 /* for Predator must toggle the PAB bit */ 2856 byte = inb(CMOS_DATA); 2857 2858 /* 2859 * clear power active bar, wakeup alarm and 2860 * kickstart 2861 */ 2862 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2863 outb(CMOS_DATA, byte); 2864 2865 /* delay before next write */ 2866 drv_usecwait(1000); 2867 2868 /* for S40 the following would suffice */ 2869 byte = inb(CMOS_DATA); 2870 2871 /* power active bar control bit */ 2872 byte |= PAB_CBIT; 2873 outb(CMOS_DATA, byte); 2874 2875 break; 2876 2877 case APIC_POWEROFF_VIA_ASPEN_BMC: 2878 restarts = 0; 2879 restart_aspen_bmc: 2880 if (++restarts == 3) 2881 break; 2882 attempts = 0; 2883 do { 2884 byte = inb(MISMIC_FLAG_REGISTER); 2885 byte &= MISMIC_BUSY_MASK; 2886 if (byte != 0) { 2887 drv_usecwait(1000); 2888 if (attempts >= 3) 2889 goto restart_aspen_bmc; 2890 ++attempts; 2891 } 2892 } while (byte != 0); 2893 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2894 byte = inb(MISMIC_FLAG_REGISTER); 2895 byte |= 0x1; 2896 outb(MISMIC_FLAG_REGISTER, byte); 2897 i = 0; 2898 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2899 i++) { 2900 attempts = 0; 2901 do { 2902 byte = inb(MISMIC_FLAG_REGISTER); 2903 byte &= MISMIC_BUSY_MASK; 2904 if (byte != 0) { 2905 drv_usecwait(1000); 2906 if (attempts >= 3) 2907 goto restart_aspen_bmc; 2908 ++attempts; 2909 } 2910 } while (byte != 0); 2911 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 2912 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 2913 byte = inb(MISMIC_FLAG_REGISTER); 2914 byte |= 0x1; 2915 outb(MISMIC_FLAG_REGISTER, byte); 2916 } 2917 break; 2918 2919 case APIC_POWEROFF_VIA_SITKA_BMC: 2920 restarts = 0; 2921 restart_sitka_bmc: 2922 if (++restarts == 3) 2923 break; 2924 attempts = 0; 2925 do { 2926 byte = inb(SMS_STATUS_REGISTER); 2927 byte &= SMS_STATE_MASK; 2928 if ((byte == SMS_READ_STATE) || 2929 (byte == SMS_WRITE_STATE)) { 2930 drv_usecwait(1000); 2931 if (attempts >= 3) 2932 goto restart_sitka_bmc; 2933 ++attempts; 2934 } 2935 } while ((byte == SMS_READ_STATE) || 2936 (byte == SMS_WRITE_STATE)); 2937 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 2938 i = 0; 2939 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 2940 i++) { 2941 attempts = 0; 2942 do { 2943 byte = inb(SMS_STATUS_REGISTER); 2944 byte &= SMS_IBF_MASK; 2945 if (byte != 0) { 2946 drv_usecwait(1000); 2947 if (attempts >= 3) 2948 goto restart_sitka_bmc; 2949 ++attempts; 2950 } 2951 } while (byte != 0); 2952 outb(sitka_bmc[i].port, sitka_bmc[i].data); 2953 } 2954 break; 2955 2956 case APIC_POWEROFF_NONE: 2957 2958 /* If no APIC direct method, we will try using ACPI */ 2959 if (apic_enable_acpi) { 2960 if (acpi_poweroff() == 1) 2961 return; 2962 } else 2963 return; 2964 2965 break; 2966 } 2967 /* 2968 * Wait a limited time here for power to go off. 2969 * If the power does not go off, then there was a 2970 * problem and we should continue to the halt which 2971 * prints a message for the user to press a key to 2972 * reboot. 2973 */ 2974 drv_usecwait(7000000); /* wait seven seconds */ 2975 2976 } 2977 2978 /* 2979 * Try and disable all interrupts. We just assign interrupts to other 2980 * processors based on policy. If any were bound by user request, we 2981 * let them continue and return failure. We do not bother to check 2982 * for cache affinity while rebinding. 2983 */ 2984 2985 static int 2986 apic_disable_intr(processorid_t cpun) 2987 { 2988 int bind_cpu = 0, i, hardbound = 0, iflag; 2989 apic_irq_t *irq_ptr; 2990 2991 if (cpun == 0) 2992 return (PSM_FAILURE); 2993 2994 iflag = intr_clear(); 2995 lock_set(&apic_ioapic_lock); 2996 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 2997 lock_clear(&apic_ioapic_lock); 2998 intr_restore(iflag); 2999 apic_cpus[cpun].aci_curipl = 0; 3000 i = apic_min_device_irq; 3001 for (; i <= apic_max_device_irq; i++) { 3002 /* 3003 * If there are bound interrupts on this cpu, then 3004 * rebind them to other processors. 3005 */ 3006 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3007 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3008 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3009 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3010 apic_nproc)); 3011 3012 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3013 hardbound = 1; 3014 continue; 3015 } 3016 3017 if (irq_ptr->airq_temp_cpu == cpun) { 3018 do { 3019 apic_next_bind_cpu += 2; 3020 bind_cpu = apic_next_bind_cpu / 2; 3021 if (bind_cpu >= apic_nproc) { 3022 apic_next_bind_cpu = 1; 3023 bind_cpu = 0; 3024 3025 } 3026 } while (apic_rebind_all(irq_ptr, bind_cpu, 1)); 3027 } 3028 } 3029 } 3030 if (hardbound) { 3031 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3032 "due to user bound interrupts", cpun); 3033 return (PSM_FAILURE); 3034 } 3035 else 3036 return (PSM_SUCCESS); 3037 } 3038 3039 static void 3040 apic_enable_intr(processorid_t cpun) 3041 { 3042 int i, iflag; 3043 apic_irq_t *irq_ptr; 3044 3045 iflag = intr_clear(); 3046 lock_set(&apic_ioapic_lock); 3047 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3048 lock_clear(&apic_ioapic_lock); 3049 intr_restore(iflag); 3050 3051 i = apic_min_device_irq; 3052 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3053 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3054 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3055 (void) apic_rebind_all(irq_ptr, 3056 irq_ptr->airq_cpu, 1); 3057 } 3058 } 3059 } 3060 } 3061 3062 /* 3063 * apic_introp_xlate() replaces apic_translate_irq() and is 3064 * called only from apic_intr_ops(). With the new ADII framework, 3065 * the priority can no longer be retrived through i_ddi_get_intrspec(). 3066 * It has to be passed in from the caller. 3067 */ 3068 int 3069 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3070 { 3071 char dev_type[16]; 3072 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3073 int irqno = ispec->intrspec_vec; 3074 ddi_acc_handle_t cfg_handle; 3075 uchar_t ipin; 3076 struct apic_io_intr *intrp; 3077 iflag_t intr_flag; 3078 APIC_HEADER *hp; 3079 MADT_INTERRUPT_OVERRIDE *isop; 3080 apic_irq_t *airqp; 3081 3082 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3083 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3084 irqno)); 3085 3086 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3087 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) 3088 return (apic_vector_to_irq[airqp->airq_vector]); 3089 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3090 NULL, type)); 3091 } 3092 3093 bustype = 0; 3094 3095 /* check if we have already translated this irq */ 3096 mutex_enter(&airq_mutex); 3097 newirq = apic_min_device_irq; 3098 for (; newirq <= apic_max_device_irq; newirq++) { 3099 airqp = apic_irq_table[newirq]; 3100 while (airqp) { 3101 if ((airqp->airq_dip == dip) && 3102 (airqp->airq_origirq == irqno) && 3103 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3104 3105 mutex_exit(&airq_mutex); 3106 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3107 } 3108 airqp = airqp->airq_next; 3109 } 3110 } 3111 mutex_exit(&airq_mutex); 3112 3113 if (apic_defconf) 3114 goto defconf; 3115 3116 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3117 goto nonpci; 3118 3119 dev_len = sizeof (dev_type); 3120 if (ddi_getlongprop_buf(DDI_DEV_T_NONE, ddi_get_parent(dip), 3121 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3122 &dev_len) != DDI_PROP_SUCCESS) { 3123 goto nonpci; 3124 } 3125 3126 if (strcmp(dev_type, "pci") == 0) { 3127 /* pci device */ 3128 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3129 goto nonpci; 3130 if (busid == 0 && apic_pci_bus_total == 1) 3131 busid = (int)apic_single_pci_busid; 3132 3133 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3134 goto nonpci; 3135 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3136 pci_config_teardown(&cfg_handle); 3137 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3138 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3139 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3140 goto nonpci; 3141 3142 intr_flag.bustype = BUS_PCI; 3143 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3144 ispec, &intr_flag, type)) == -1) 3145 goto nonpci; 3146 return (newirq); 3147 } else { 3148 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3149 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3150 == NULL) { 3151 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3152 devid, ipin, &intrp)) == -1) 3153 goto nonpci; 3154 } 3155 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3156 ispec, NULL, type)) == -1) 3157 goto nonpci; 3158 return (newirq); 3159 } 3160 } else if (strcmp(dev_type, "isa") == 0) 3161 bustype = BUS_ISA; 3162 else if (strcmp(dev_type, "eisa") == 0) 3163 bustype = BUS_EISA; 3164 3165 nonpci: 3166 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3167 /* search iso entries first */ 3168 if (acpi_iso_cnt != 0) { 3169 hp = (APIC_HEADER *)acpi_isop; 3170 i = 0; 3171 while (i < acpi_iso_cnt) { 3172 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3173 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3174 if (isop->Bus == 0 && 3175 isop->Source == irqno) { 3176 newirq = isop->Interrupt; 3177 intr_flag.intr_po = 3178 isop->Polarity; 3179 intr_flag.intr_el = 3180 isop->TriggerMode; 3181 intr_flag.bustype = BUS_ISA; 3182 3183 return (apic_setup_irq_table( 3184 dip, newirq, NULL, ispec, 3185 &intr_flag, type)); 3186 3187 } 3188 i++; 3189 } 3190 hp = (APIC_HEADER *)(((char *)hp) + 3191 hp->Length); 3192 } 3193 } 3194 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3195 intr_flag.intr_el = INTR_EL_EDGE; 3196 intr_flag.bustype = BUS_ISA; 3197 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3198 &intr_flag, type)); 3199 } else { 3200 if (bustype == 0) 3201 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3202 for (i = 0; i < 2; i++) { 3203 if (((busid = apic_find_bus_id(bustype)) != -1) && 3204 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3205 != NULL)) { 3206 if ((newirq = apic_setup_irq_table(dip, irqno, 3207 intrp, ispec, NULL, type)) != -1) { 3208 return (newirq); 3209 } 3210 goto defconf; 3211 } 3212 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3213 } 3214 } 3215 3216 /* MPS default configuration */ 3217 defconf: 3218 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3219 if (newirq == -1) 3220 return (newirq); 3221 ASSERT(IRQINDEX(newirq) == irqno); 3222 ASSERT(apic_irq_table[irqno]); 3223 return (newirq); 3224 } 3225 3226 3227 3228 3229 3230 3231 /* 3232 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3233 * needs special handling. We may need to chase up the device tree, 3234 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3235 * to find the IPIN at the root bus that relates to the IPIN on the 3236 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3237 * in the MP table or the ACPI namespace for this device itself. 3238 * We handle both cases in the search below. 3239 */ 3240 /* this is the non-acpi version */ 3241 static int 3242 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3243 struct apic_io_intr **intrp) 3244 { 3245 dev_info_t *dipp, *dip; 3246 int pci_irq; 3247 ddi_acc_handle_t cfg_handle; 3248 int bridge_devno, bridge_bus; 3249 int ipin; 3250 3251 dip = idip; 3252 3253 /*CONSTCOND*/ 3254 while (1) { 3255 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3256 return (-1); 3257 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3258 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3259 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3260 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3261 pci_config_teardown(&cfg_handle); 3262 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3263 NULL) != 0) 3264 return (-1); 3265 /* 3266 * This is the rotating scheme that Compaq is using 3267 * and documented in the pci to pci spec. Also, if 3268 * the pci to pci bridge is behind another pci to 3269 * pci bridge, then it need to keep transversing 3270 * up until an interrupt entry is found or reach 3271 * the top of the tree 3272 */ 3273 ipin = (child_devno + child_ipin) % PCI_INTD; 3274 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3275 bridge_bus = (int)apic_single_pci_busid; 3276 pci_irq = ((bridge_devno & 0x1f) << 2) | 3277 (ipin & 0x3); 3278 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3279 bridge_bus)) != NULL) { 3280 return (pci_irq); 3281 } 3282 dip = dipp; 3283 child_devno = bridge_devno; 3284 child_ipin = ipin; 3285 } else 3286 return (-1); 3287 } 3288 /*LINTED: function will not fall off the bottom */ 3289 } 3290 3291 3292 3293 3294 static uchar_t 3295 acpi_find_ioapic(int irq) 3296 { 3297 int i; 3298 3299 for (i = 0; i < apic_io_max; i++) { 3300 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3301 return (i); 3302 } 3303 return (0xFF); /* shouldn't happen */ 3304 } 3305 3306 /* 3307 * See if two irqs are compatible for sharing a vector. 3308 * Currently we only support sharing of PCI devices. 3309 */ 3310 static int 3311 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3312 { 3313 uint_t level1, po1; 3314 uint_t level2, po2; 3315 3316 /* Assume active high by default */ 3317 po1 = 0; 3318 po2 = 0; 3319 3320 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3321 return (0); 3322 3323 if (iflag1.intr_el == INTR_EL_CONFORM) 3324 level1 = AV_LEVEL; 3325 else 3326 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3327 3328 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3329 (iflag1.intr_po == INTR_PO_CONFORM))) 3330 po1 = AV_ACTIVE_LOW; 3331 3332 if (iflag2.intr_el == INTR_EL_CONFORM) 3333 level2 = AV_LEVEL; 3334 else 3335 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3336 3337 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3338 (iflag2.intr_po == INTR_PO_CONFORM))) 3339 po2 = AV_ACTIVE_LOW; 3340 3341 if ((level1 == level2) && (po1 == po2)) 3342 return (1); 3343 3344 return (0); 3345 } 3346 3347 /* 3348 * Attempt to share vector with someone else 3349 */ 3350 static int 3351 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3352 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3353 { 3354 #ifdef DEBUG 3355 apic_irq_t *tmpirqp = NULL; 3356 #endif /* DEBUG */ 3357 apic_irq_t *irqptr, dummyirq; 3358 int newirq, chosen_irq = -1, share = 127; 3359 int lowest, highest, i; 3360 uchar_t share_id; 3361 3362 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3363 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3364 3365 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3366 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3367 3368 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3369 lowest -= APIC_VECTOR_PER_IPL; 3370 dummyirq.airq_mps_intr_index = intr_index; 3371 dummyirq.airq_ioapicindex = ioapicindex; 3372 dummyirq.airq_intin_no = ipin; 3373 if (intr_flagp) 3374 dummyirq.airq_iflag = *intr_flagp; 3375 apic_record_rdt_entry(&dummyirq, irqno); 3376 for (i = lowest; i <= highest; i++) { 3377 newirq = apic_vector_to_irq[i]; 3378 if (newirq == APIC_RESV_IRQ) 3379 continue; 3380 irqptr = apic_irq_table[newirq]; 3381 3382 /* don't share SCI */ 3383 if (irqptr->airq_mps_intr_index == SCI_INDEX) 3384 continue; 3385 3386 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3387 (irqptr->airq_rdt_entry & 0xFF00)) 3388 /* not compatible */ 3389 continue; 3390 3391 if (irqptr->airq_share < share) { 3392 share = irqptr->airq_share; 3393 chosen_irq = newirq; 3394 } 3395 } 3396 if (chosen_irq != -1) { 3397 /* 3398 * Assign a share id which is free or which is larger 3399 * than the largest one. 3400 */ 3401 share_id = 1; 3402 mutex_enter(&airq_mutex); 3403 irqptr = apic_irq_table[chosen_irq]; 3404 while (irqptr) { 3405 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3406 share_id = irqptr->airq_share_id; 3407 break; 3408 } 3409 if (share_id <= irqptr->airq_share_id) 3410 share_id = irqptr->airq_share_id + 1; 3411 #ifdef DEBUG 3412 tmpirqp = irqptr; 3413 #endif /* DEBUG */ 3414 irqptr = irqptr->airq_next; 3415 } 3416 if (!irqptr) { 3417 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3418 irqptr->airq_temp_cpu = IRQ_UNINIT; 3419 irqptr->airq_next = 3420 apic_irq_table[chosen_irq]->airq_next; 3421 apic_irq_table[chosen_irq]->airq_next = irqptr; 3422 #ifdef DEBUG 3423 tmpirqp = apic_irq_table[chosen_irq]; 3424 #endif /* DEBUG */ 3425 } 3426 irqptr->airq_mps_intr_index = intr_index; 3427 irqptr->airq_ioapicindex = ioapicindex; 3428 irqptr->airq_intin_no = ipin; 3429 if (intr_flagp) 3430 irqptr->airq_iflag = *intr_flagp; 3431 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3432 irqptr->airq_share_id = share_id; 3433 apic_record_rdt_entry(irqptr, irqno); 3434 *irqptrp = irqptr; 3435 #ifdef DEBUG 3436 /* shuffle the pointers to test apic_delspl path */ 3437 if (tmpirqp) { 3438 tmpirqp->airq_next = irqptr->airq_next; 3439 irqptr->airq_next = apic_irq_table[chosen_irq]; 3440 apic_irq_table[chosen_irq] = irqptr; 3441 } 3442 #endif /* DEBUG */ 3443 mutex_exit(&airq_mutex); 3444 return (VIRTIRQ(chosen_irq, share_id)); 3445 } 3446 return (-1); 3447 } 3448 3449 /* 3450 * 3451 */ 3452 static int 3453 apic_setup_sci_irq_table(int irqno, uchar_t ipl, iflag_t *intr_flagp) 3454 { 3455 int intr_index; 3456 uchar_t ipin, ioapicindex, vector; 3457 apic_irq_t *irqptr; 3458 3459 ASSERT(intr_flagp != NULL); 3460 3461 intr_index = SCI_INDEX; 3462 ioapicindex = acpi_find_ioapic(irqno); 3463 ASSERT(ioapicindex != 0xFF); 3464 ipin = irqno - apic_io_vectbase[ioapicindex]; 3465 if (apic_irq_table[irqno] && 3466 apic_irq_table[irqno]->airq_mps_intr_index == SCI_INDEX) { 3467 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3468 apic_irq_table[irqno]->airq_ioapicindex == 3469 ioapicindex); 3470 return (irqno); 3471 } 3472 3473 if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3474 cmn_err(CE_WARN, "!apic: failed to allocate vector for SCI"); 3475 return (-1); 3476 } 3477 mutex_enter(&airq_mutex); 3478 if (apic_irq_table[irqno] == NULL) { 3479 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3480 irqptr->airq_temp_cpu = IRQ_UNINIT; 3481 apic_irq_table[irqno] = irqptr; 3482 } else { 3483 /* 3484 * We assume that SCI is the first to attach this IRQ 3485 */ 3486 cmn_err(CE_WARN, "!acpi: apic_irq_t not empty for SCI"); 3487 return (-1); 3488 } 3489 3490 apic_max_device_irq = max(irqno, apic_max_device_irq); 3491 apic_min_device_irq = min(irqno, apic_min_device_irq); 3492 mutex_exit(&airq_mutex); 3493 irqptr->airq_ioapicindex = ioapicindex; 3494 irqptr->airq_intin_no = ipin; 3495 irqptr->airq_ipl = ipl; 3496 irqptr->airq_vector = vector; 3497 irqptr->airq_origirq = (uchar_t)irqno; 3498 irqptr->airq_share_id = 0; 3499 irqptr->airq_mps_intr_index = (short)intr_index; 3500 irqptr->airq_dip = NULL; 3501 irqptr->airq_major = 0; 3502 irqptr->airq_cpu = 0; /* SCI always on CPU 0 */ 3503 irqptr->airq_iflag = *intr_flagp; 3504 apic_record_rdt_entry(irqptr, irqno); 3505 return (irqno); 3506 } 3507 3508 /* 3509 * 3510 */ 3511 static int 3512 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3513 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3514 { 3515 int origirq = ispec->intrspec_vec; 3516 uchar_t ipl = ispec->intrspec_pri; 3517 int newirq, intr_index; 3518 uchar_t ipin, ioapic, ioapicindex, vector; 3519 apic_irq_t *irqptr; 3520 major_t major; 3521 dev_info_t *sdip; 3522 3523 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3524 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3525 3526 ASSERT(ispec != NULL); 3527 3528 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3529 3530 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3531 /* MSI/X doesn't need to setup ioapic stuffs */ 3532 ioapicindex = 0xff; 3533 ioapic = 0xff; 3534 ipin = (uchar_t)0xff; 3535 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3536 MSIX_INDEX; 3537 mutex_enter(&airq_mutex); 3538 if ((irqno = apic_allocate_irq(APIC_FIRST_FREE_IRQ)) == -1) { 3539 mutex_exit(&airq_mutex); 3540 /* need an irq for MSI/X to index into autovect[] */ 3541 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3542 ddi_get_name(dip), ddi_get_instance(dip)); 3543 return (-1); 3544 } 3545 mutex_exit(&airq_mutex); 3546 3547 } else if (intrp != NULL) { 3548 intr_index = (int)(intrp - apic_io_intrp); 3549 ioapic = intrp->intr_destid; 3550 ipin = intrp->intr_destintin; 3551 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3552 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3553 if (apic_io_id[ioapicindex] == ioapic) 3554 break; 3555 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3556 (ioapic == INTR_ALL_APIC)); 3557 3558 /* check whether this intin# has been used by another irqno */ 3559 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3560 return (newirq); 3561 } 3562 3563 } else if (intr_flagp != NULL) { 3564 /* ACPI case */ 3565 intr_index = ACPI_INDEX; 3566 ioapicindex = acpi_find_ioapic(irqno); 3567 ASSERT(ioapicindex != 0xFF); 3568 ioapic = apic_io_id[ioapicindex]; 3569 ipin = irqno - apic_io_vectbase[ioapicindex]; 3570 if (apic_irq_table[irqno] && 3571 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3572 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3573 apic_irq_table[irqno]->airq_ioapicindex == 3574 ioapicindex); 3575 return (irqno); 3576 } 3577 3578 } else { 3579 /* default configuration */ 3580 ioapicindex = 0; 3581 ioapic = apic_io_id[ioapicindex]; 3582 ipin = (uchar_t)irqno; 3583 intr_index = DEFAULT_INDEX; 3584 } 3585 3586 if (ispec == NULL) { 3587 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3588 irqno)); 3589 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3590 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3591 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3592 irqptr->airq_ipl = ipl; 3593 irqptr->airq_origirq = (uchar_t)origirq; 3594 irqptr->airq_dip = dip; 3595 irqptr->airq_major = major; 3596 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3597 if (sdip == NULL) { 3598 cmn_err(CE_WARN, "Sharing vectors: %s" 3599 " instance %d and SCI", 3600 ddi_get_name(dip), ddi_get_instance(dip)); 3601 } else { 3602 cmn_err(CE_WARN, "Sharing vectors: %s" 3603 " instance %d and %s instance %d", 3604 ddi_get_name(sdip), ddi_get_instance(sdip), 3605 ddi_get_name(dip), ddi_get_instance(dip)); 3606 } 3607 return (newirq); 3608 } 3609 /* try high priority allocation now that share has failed */ 3610 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3611 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3612 ddi_get_name(dip), ddi_get_instance(dip)); 3613 return (-1); 3614 } 3615 } 3616 3617 mutex_enter(&airq_mutex); 3618 if (apic_irq_table[irqno] == NULL) { 3619 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3620 irqptr->airq_temp_cpu = IRQ_UNINIT; 3621 apic_irq_table[irqno] = irqptr; 3622 } else { 3623 irqptr = apic_irq_table[irqno]; 3624 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3625 /* 3626 * The slot is used by another irqno, so allocate 3627 * a free irqno for this interrupt 3628 */ 3629 newirq = apic_allocate_irq(APIC_FIRST_FREE_IRQ); 3630 if (newirq == -1) { 3631 mutex_exit(&airq_mutex); 3632 return (-1); 3633 } 3634 irqno = newirq; 3635 irqptr = apic_irq_table[irqno]; 3636 if (irqptr == NULL) { 3637 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3638 KM_SLEEP); 3639 irqptr->airq_temp_cpu = IRQ_UNINIT; 3640 apic_irq_table[irqno] = irqptr; 3641 } 3642 apic_modify_vector(vector, newirq); 3643 } 3644 } 3645 apic_max_device_irq = max(irqno, apic_max_device_irq); 3646 apic_min_device_irq = min(irqno, apic_min_device_irq); 3647 mutex_exit(&airq_mutex); 3648 irqptr->airq_ioapicindex = ioapicindex; 3649 irqptr->airq_intin_no = ipin; 3650 irqptr->airq_ipl = ipl; 3651 irqptr->airq_vector = vector; 3652 irqptr->airq_origirq = (uchar_t)origirq; 3653 irqptr->airq_share_id = 0; 3654 irqptr->airq_mps_intr_index = (short)intr_index; 3655 irqptr->airq_dip = dip; 3656 irqptr->airq_major = major; 3657 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3658 if (intr_flagp) 3659 irqptr->airq_iflag = *intr_flagp; 3660 3661 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3662 /* setup I/O APIC entry for non-MSI/X interrupts */ 3663 apic_record_rdt_entry(irqptr, irqno); 3664 } 3665 return (irqno); 3666 } 3667 3668 /* 3669 * return the cpu to which this intr should be bound. 3670 * Check properties or any other mechanism to see if user wants it 3671 * bound to a specific CPU. If so, return the cpu id with high bit set. 3672 * If not, use the policy to choose a cpu and return the id. 3673 */ 3674 uchar_t 3675 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3676 { 3677 int instance, instno, prop_len, bind_cpu, count; 3678 uint_t i, rc; 3679 uchar_t cpu; 3680 major_t major; 3681 char *name, *drv_name, *prop_val, *cptr; 3682 char prop_name[32]; 3683 3684 3685 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3686 return (IRQ_UNBOUND); 3687 3688 drv_name = NULL; 3689 rc = DDI_PROP_NOT_FOUND; 3690 major = (major_t)-1; 3691 if (dip != NULL) { 3692 name = ddi_get_name(dip); 3693 major = ddi_name_to_major(name); 3694 drv_name = ddi_major_to_name(major); 3695 instance = ddi_get_instance(dip); 3696 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3697 i = apic_min_device_irq; 3698 for (; i <= apic_max_device_irq; i++) { 3699 3700 if ((i == irq) || (apic_irq_table[i] == NULL) || 3701 (apic_irq_table[i]->airq_mps_intr_index 3702 == FREE_INDEX)) 3703 continue; 3704 3705 if ((apic_irq_table[i]->airq_major == major) && 3706 (!(apic_irq_table[i]->airq_cpu & 3707 IRQ_USER_BOUND))) { 3708 3709 cpu = apic_irq_table[i]->airq_cpu; 3710 3711 cmn_err(CE_CONT, 3712 "!pcplusmp: %s (%s) instance #%d " 3713 "vector 0x%x ioapic 0x%x " 3714 "intin 0x%x is bound to cpu %d\n", 3715 name, drv_name, instance, irq, 3716 ioapicid, intin, cpu); 3717 return (cpu); 3718 } 3719 } 3720 } 3721 /* 3722 * search for "drvname"_intpt_bind_cpus property first, the 3723 * syntax of the property should be "a[,b,c,...]" where 3724 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3725 * instance 3 binds to cpu c... 3726 * ddi_getlongprop() will search /option first, then / 3727 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3728 * intpt_bind_cpus property. The syntax is the same, and 3729 * it applies to all the devices if its "drvname" specific 3730 * property doesn't exist 3731 */ 3732 (void) strcpy(prop_name, drv_name); 3733 (void) strcat(prop_name, "_intpt_bind_cpus"); 3734 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3735 (caddr_t)&prop_val, &prop_len); 3736 if (rc != DDI_PROP_SUCCESS) { 3737 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3738 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3739 } 3740 } 3741 if (rc == DDI_PROP_SUCCESS) { 3742 for (i = count = 0; i < (prop_len - 1); i++) 3743 if (prop_val[i] == ',') 3744 count++; 3745 if (prop_val[i-1] != ',') 3746 count++; 3747 /* 3748 * if somehow the binding instances defined in the 3749 * property are not enough for this instno., then 3750 * reuse the pattern for the next instance until 3751 * it reaches the requested instno 3752 */ 3753 instno = instance % count; 3754 i = 0; 3755 cptr = prop_val; 3756 while (i < instno) 3757 if (*cptr++ == ',') 3758 i++; 3759 bind_cpu = stoi(&cptr); 3760 kmem_free(prop_val, prop_len); 3761 /* if specific cpu is bogus, then default to cpu 0 */ 3762 if (bind_cpu >= apic_nproc) { 3763 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3764 prop_name, prop_val, bind_cpu); 3765 bind_cpu = 0; 3766 } else { 3767 /* indicate that we are bound at user request */ 3768 bind_cpu |= IRQ_USER_BOUND; 3769 } 3770 /* 3771 * no need to check apic_cpus[].aci_status, if specific cpu is 3772 * not up, then post_cpu_start will handle it. 3773 */ 3774 } else { 3775 /* 3776 * We change bind_cpu only for every two calls 3777 * as most drivers still do 2 add_intrs for every 3778 * interrupt 3779 */ 3780 bind_cpu = (apic_next_bind_cpu++) / 2; 3781 if (bind_cpu >= apic_nproc) { 3782 apic_next_bind_cpu = 1; 3783 bind_cpu = 0; 3784 } 3785 } 3786 if (drv_name != NULL) 3787 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3788 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3789 name, drv_name, instance, 3790 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3791 else 3792 cmn_err(CE_CONT, "!pcplusmp: " 3793 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3794 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3795 3796 return ((uchar_t)bind_cpu); 3797 } 3798 3799 static struct apic_io_intr * 3800 apic_find_io_intr_w_busid(int irqno, int busid) 3801 { 3802 struct apic_io_intr *intrp; 3803 3804 /* 3805 * It can have more than 1 entry with same source bus IRQ, 3806 * but unique with the source bus id 3807 */ 3808 intrp = apic_io_intrp; 3809 if (intrp != NULL) { 3810 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3811 if (intrp->intr_irq == irqno && 3812 intrp->intr_busid == busid && 3813 intrp->intr_type == IO_INTR_INT) 3814 return (intrp); 3815 intrp++; 3816 } 3817 } 3818 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3819 "busid %x:%x\n", irqno, busid)); 3820 return ((struct apic_io_intr *)NULL); 3821 } 3822 3823 3824 struct mps_bus_info { 3825 char *bus_name; 3826 int bus_id; 3827 } bus_info_array[] = { 3828 "ISA ", BUS_ISA, 3829 "PCI ", BUS_PCI, 3830 "EISA ", BUS_EISA, 3831 "XPRESS", BUS_XPRESS, 3832 "PCMCIA", BUS_PCMCIA, 3833 "VL ", BUS_VL, 3834 "CBUS ", BUS_CBUS, 3835 "CBUSII", BUS_CBUSII, 3836 "FUTURE", BUS_FUTURE, 3837 "INTERN", BUS_INTERN, 3838 "MBI ", BUS_MBI, 3839 "MBII ", BUS_MBII, 3840 "MPI ", BUS_MPI, 3841 "MPSA ", BUS_MPSA, 3842 "NUBUS ", BUS_NUBUS, 3843 "TC ", BUS_TC, 3844 "VME ", BUS_VME 3845 }; 3846 3847 static int 3848 apic_find_bus_type(char *bus) 3849 { 3850 int i = 0; 3851 3852 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3853 if (strncmp(bus, bus_info_array[i].bus_name, 3854 strlen(bus_info_array[i].bus_name)) == 0) 3855 return (bus_info_array[i].bus_id); 3856 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3857 return (0); 3858 } 3859 3860 static int 3861 apic_find_bus(int busid) 3862 { 3863 struct apic_bus *busp; 3864 3865 busp = apic_busp; 3866 while (busp->bus_entry == APIC_BUS_ENTRY) { 3867 if (busp->bus_id == busid) 3868 return (apic_find_bus_type((char *)&busp->bus_str1)); 3869 busp++; 3870 } 3871 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3872 return (0); 3873 } 3874 3875 static int 3876 apic_find_bus_id(int bustype) 3877 { 3878 struct apic_bus *busp; 3879 3880 busp = apic_busp; 3881 while (busp->bus_entry == APIC_BUS_ENTRY) { 3882 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3883 return (busp->bus_id); 3884 busp++; 3885 } 3886 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3887 bustype)); 3888 return (-1); 3889 } 3890 3891 /* 3892 * Check if a particular irq need to be reserved for any io_intr 3893 */ 3894 static struct apic_io_intr * 3895 apic_find_io_intr(int irqno) 3896 { 3897 struct apic_io_intr *intrp; 3898 3899 intrp = apic_io_intrp; 3900 if (intrp != NULL) { 3901 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3902 if (intrp->intr_irq == irqno && 3903 intrp->intr_type == IO_INTR_INT) 3904 return (intrp); 3905 intrp++; 3906 } 3907 } 3908 return ((struct apic_io_intr *)NULL); 3909 } 3910 3911 /* 3912 * Check if the given ioapicindex intin combination has already been assigned 3913 * an irq. If so return irqno. Else -1 3914 */ 3915 static int 3916 apic_find_intin(uchar_t ioapic, uchar_t intin) 3917 { 3918 apic_irq_t *irqptr; 3919 int i; 3920 3921 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3922 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3923 irqptr = apic_irq_table[i]; 3924 while (irqptr) { 3925 if ((irqptr->airq_mps_intr_index >= 0) && 3926 (irqptr->airq_intin_no == intin) && 3927 (irqptr->airq_ioapicindex == ioapic)) { 3928 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3929 "entry for ioapic:intin %x:%x " 3930 "shared interrupts ?", ioapic, intin)); 3931 return (i); 3932 } 3933 irqptr = irqptr->airq_next; 3934 } 3935 } 3936 return (-1); 3937 } 3938 3939 int 3940 apic_allocate_irq(int irq) 3941 { 3942 int freeirq, i; 3943 3944 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 3945 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 3946 (irq - 1))) == -1) { 3947 /* 3948 * if BIOS really defines every single irq in the mps 3949 * table, then don't worry about conflicting with 3950 * them, just use any free slot in apic_irq_table 3951 */ 3952 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 3953 if ((apic_irq_table[i] == NULL) || 3954 apic_irq_table[i]->airq_mps_intr_index == 3955 FREE_INDEX) { 3956 freeirq = i; 3957 break; 3958 } 3959 } 3960 if (freeirq == -1) { 3961 /* This shouldn't happen, but just in case */ 3962 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 3963 return (-1); 3964 } 3965 } 3966 if (apic_irq_table[freeirq] == NULL) { 3967 apic_irq_table[freeirq] = 3968 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 3969 if (apic_irq_table[freeirq] == NULL) { 3970 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 3971 return (-1); 3972 } 3973 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 3974 } 3975 return (freeirq); 3976 } 3977 3978 static int 3979 apic_find_free_irq(int start, int end) 3980 { 3981 int i; 3982 3983 for (i = start; i <= end; i++) 3984 /* Check if any I/O entry needs this IRQ */ 3985 if (apic_find_io_intr(i) == NULL) { 3986 /* Then see if it is free */ 3987 if ((apic_irq_table[i] == NULL) || 3988 (apic_irq_table[i]->airq_mps_intr_index == 3989 FREE_INDEX)) { 3990 return (i); 3991 } 3992 } 3993 return (-1); 3994 } 3995 3996 /* 3997 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 3998 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 3999 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 4000 * requests and allocated only when pri is set. 4001 */ 4002 static uchar_t 4003 apic_allocate_vector(int ipl, int irq, int pri) 4004 { 4005 int lowest, highest, i; 4006 4007 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 4008 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 4009 4010 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 4011 lowest -= APIC_VECTOR_PER_IPL; 4012 4013 #ifdef DEBUG 4014 if (apic_restrict_vector) /* for testing shared interrupt logic */ 4015 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 4016 #endif /* DEBUG */ 4017 if (pri == 0) 4018 highest -= APIC_HI_PRI_VECTS; 4019 4020 for (i = lowest; i < highest; i++) { 4021 if ((i == T_FASTTRAP) || (i == APIC_SPUR_INTR) || 4022 (i == T_SYSCALLINT) || (i == T_DTRACE_PROBE) || 4023 (i == T_DTRACE_RET)) 4024 continue; 4025 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 4026 apic_vector_to_irq[i] = (uchar_t)irq; 4027 return (i); 4028 } 4029 } 4030 4031 return (0); 4032 } 4033 4034 static void 4035 apic_modify_vector(uchar_t vector, int irq) 4036 { 4037 apic_vector_to_irq[vector] = (uchar_t)irq; 4038 } 4039 4040 /* 4041 * Mark vector as being in the process of being deleted. Interrupts 4042 * may still come in on some CPU. The moment an interrupt comes with 4043 * the new vector, we know we can free the old one. Called only from 4044 * addspl and delspl with interrupts disabled. Because an interrupt 4045 * can be shared, but no interrupt from either device may come in, 4046 * we also use a timeout mechanism, which we arbitrarily set to 4047 * apic_revector_timeout microseconds. 4048 */ 4049 static void 4050 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4051 { 4052 int iflag = intr_clear(); 4053 lock_set(&apic_revector_lock); 4054 if (!apic_oldvec_to_newvec) { 4055 apic_oldvec_to_newvec = 4056 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4057 KM_NOSLEEP); 4058 4059 if (!apic_oldvec_to_newvec) { 4060 /* 4061 * This failure is not catastrophic. 4062 * But, the oldvec will never be freed. 4063 */ 4064 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4065 lock_clear(&apic_revector_lock); 4066 intr_restore(iflag); 4067 return; 4068 } 4069 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4070 } 4071 4072 /* See if we already did this for drivers which do double addintrs */ 4073 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4074 apic_oldvec_to_newvec[oldvector] = newvector; 4075 apic_newvec_to_oldvec[newvector] = oldvector; 4076 apic_revector_pending++; 4077 } 4078 lock_clear(&apic_revector_lock); 4079 intr_restore(iflag); 4080 (void) timeout(apic_xlate_vector_free_timeout_handler, 4081 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4082 } 4083 4084 /* 4085 * xlate_vector is called from intr_enter if revector_pending is set. 4086 * It will xlate it if needed and mark the old vector as free. 4087 */ 4088 static uchar_t 4089 apic_xlate_vector(uchar_t vector) 4090 { 4091 uchar_t newvector, oldvector = 0; 4092 4093 lock_set(&apic_revector_lock); 4094 /* Do we really need to do this ? */ 4095 if (!apic_revector_pending) { 4096 lock_clear(&apic_revector_lock); 4097 return (vector); 4098 } 4099 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4100 oldvector = vector; 4101 else { 4102 /* 4103 * The incoming vector is new . See if a stale entry is 4104 * remaining 4105 */ 4106 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4107 newvector = vector; 4108 } 4109 4110 if (oldvector) { 4111 apic_revector_pending--; 4112 apic_oldvec_to_newvec[oldvector] = 0; 4113 apic_newvec_to_oldvec[newvector] = 0; 4114 apic_free_vector(oldvector); 4115 lock_clear(&apic_revector_lock); 4116 /* There could have been more than one reprogramming! */ 4117 return (apic_xlate_vector(newvector)); 4118 } 4119 lock_clear(&apic_revector_lock); 4120 return (vector); 4121 } 4122 4123 void 4124 apic_xlate_vector_free_timeout_handler(void *arg) 4125 { 4126 int iflag; 4127 uchar_t oldvector, newvector; 4128 4129 oldvector = (uchar_t)(uintptr_t)arg; 4130 iflag = intr_clear(); 4131 lock_set(&apic_revector_lock); 4132 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4133 apic_free_vector(oldvector); 4134 apic_oldvec_to_newvec[oldvector] = 0; 4135 apic_newvec_to_oldvec[newvector] = 0; 4136 apic_revector_pending--; 4137 } 4138 4139 lock_clear(&apic_revector_lock); 4140 intr_restore(iflag); 4141 } 4142 4143 4144 /* Mark vector as not being used by any irq */ 4145 static void 4146 apic_free_vector(uchar_t vector) 4147 { 4148 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4149 } 4150 4151 /* 4152 * compute the polarity, trigger mode and vector for programming into 4153 * the I/O apic and record in airq_rdt_entry. 4154 */ 4155 static void 4156 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4157 { 4158 int ioapicindex, bus_type, vector; 4159 short intr_index; 4160 uint_t level, po, io_po; 4161 struct apic_io_intr *iointrp; 4162 4163 intr_index = irqptr->airq_mps_intr_index; 4164 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4165 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4166 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4167 4168 if (intr_index == RESERVE_INDEX) { 4169 apic_error |= APIC_ERR_INVALID_INDEX; 4170 return; 4171 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4172 return; 4173 } 4174 4175 vector = irqptr->airq_vector; 4176 ioapicindex = irqptr->airq_ioapicindex; 4177 /* Assume edge triggered by default */ 4178 level = 0; 4179 /* Assume active high by default */ 4180 po = 0; 4181 4182 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4183 ASSERT(irq < 16); 4184 if (eisa_level_intr_mask & (1 << irq)) 4185 level = AV_LEVEL; 4186 if (intr_index == FREE_INDEX && apic_defconf == 0) 4187 apic_error |= APIC_ERR_INVALID_INDEX; 4188 } else if (intr_index == ACPI_INDEX || intr_index == SCI_INDEX) { 4189 bus_type = irqptr->airq_iflag.bustype; 4190 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4191 if (bus_type == BUS_PCI) 4192 level = AV_LEVEL; 4193 } else 4194 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4195 AV_LEVEL : 0; 4196 if (level && 4197 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4198 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4199 bus_type == BUS_PCI))) 4200 po = AV_ACTIVE_LOW; 4201 } else { 4202 iointrp = apic_io_intrp + intr_index; 4203 bus_type = apic_find_bus(iointrp->intr_busid); 4204 if (iointrp->intr_el == INTR_EL_CONFORM) { 4205 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4206 level = AV_LEVEL; 4207 else if (bus_type == BUS_PCI) 4208 level = AV_LEVEL; 4209 } else 4210 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4211 AV_LEVEL : 0; 4212 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4213 (iointrp->intr_po == INTR_PO_CONFORM && 4214 bus_type == BUS_PCI))) 4215 po = AV_ACTIVE_LOW; 4216 } 4217 if (level) 4218 apic_level_intr[irq] = 1; 4219 /* 4220 * The 82489DX External APIC cannot do active low polarity interrupts. 4221 */ 4222 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4223 io_po = po; 4224 else 4225 io_po = 0; 4226 4227 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4228 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4229 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4230 4231 irqptr->airq_rdt_entry = level|io_po|vector; 4232 } 4233 4234 /* 4235 * Call rebind to do the actual programming. 4236 */ 4237 static int 4238 apic_setup_io_intr(apic_irq_t *irqptr, int irq) 4239 { 4240 int rv; 4241 4242 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4243 IMMEDIATE)) 4244 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4245 rv = apic_rebind(irqptr, 0, 1, IMMEDIATE); 4246 4247 return (rv); 4248 } 4249 4250 /* 4251 * Deferred reprogramming: Call apic_rebind to do the real work. 4252 */ 4253 static int 4254 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq) 4255 { 4256 int rv; 4257 4258 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4259 DEFERRED)) 4260 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4261 rv = apic_rebind(irqptr, 0, 1, DEFERRED); 4262 4263 return (rv); 4264 } 4265 4266 /* 4267 * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock 4268 * if false (0) means lock is already held (e.g: in rebind_all). 4269 */ 4270 static int 4271 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when) 4272 { 4273 int intin_no; 4274 volatile int32_t *ioapic; 4275 uchar_t airq_temp_cpu; 4276 apic_cpus_info_t *cpu_infop; 4277 int iflag; 4278 int which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4279 4280 intin_no = irq_ptr->airq_intin_no; 4281 ioapic = apicioadr[irq_ptr->airq_ioapicindex]; 4282 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4283 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4284 if (airq_temp_cpu & IRQ_USER_BOUND) 4285 /* Mask off high bit so it can be used as array index */ 4286 airq_temp_cpu &= ~IRQ_USER_BOUND; 4287 4288 ASSERT(airq_temp_cpu < apic_nproc); 4289 } 4290 4291 iflag = intr_clear(); 4292 4293 if (acquire_lock) 4294 lock_set(&apic_ioapic_lock); 4295 4296 /* 4297 * Can't bind to a CPU that's not online: 4298 */ 4299 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4300 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) { 4301 4302 if (acquire_lock) 4303 lock_clear(&apic_ioapic_lock); 4304 4305 intr_restore(iflag); 4306 return (1); 4307 } 4308 4309 /* 4310 * If this is a deferred reprogramming attempt, ensure we have 4311 * not been passed stale data: 4312 */ 4313 if ((when == DEFERRED) && 4314 (apic_reprogram_info[which_irq].valid == 0)) { 4315 /* stale info, so just return */ 4316 if (acquire_lock) 4317 lock_clear(&apic_ioapic_lock); 4318 4319 intr_restore(iflag); 4320 return (0); 4321 } 4322 4323 /* 4324 * If this interrupt has been delivered to a CPU and that CPU 4325 * has not handled it yet, we cannot reprogram the IOAPIC now: 4326 */ 4327 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) && 4328 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu, 4329 ioapic, intin_no, which_irq) != 0) { 4330 4331 if (acquire_lock) 4332 lock_clear(&apic_ioapic_lock); 4333 4334 intr_restore(iflag); 4335 return (0); 4336 } 4337 4338 /* 4339 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4340 * come in before we have a chance to reprogram it below. The 4341 * reprogramming below will simultaneously change and unmask the 4342 * RDT entry. 4343 */ 4344 4345 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4346 /* Write the RDT entry -- no specific CPU binding */ 4347 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4348 4349 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4350 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4351 4352 /* Write the vector, trigger, and polarity portion of the RDT */ 4353 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4354 AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry); 4355 if (acquire_lock) 4356 lock_clear(&apic_ioapic_lock); 4357 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4358 intr_restore(iflag); 4359 return (0); 4360 } 4361 4362 if (bind_cpu & IRQ_USER_BOUND) { 4363 cpu_infop->aci_bound++; 4364 } else { 4365 cpu_infop->aci_temp_bound++; 4366 } 4367 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4368 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4369 /* Write the RDT entry -- bind to a specific CPU: */ 4370 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4371 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4372 } 4373 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4374 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4375 } 4376 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4377 /* Write the vector, trigger, and polarity portion of the RDT */ 4378 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4379 AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry); 4380 } else { 4381 if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) { 4382 /* first one */ 4383 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4384 "apic_pci_msi_enable_vector\n")); 4385 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, 4386 (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4387 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX, which_irq, 4388 irq_ptr->airq_vector, irq_ptr->airq_intin_no, 4389 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4390 cmn_err(CE_WARN, "pcplusmp: " 4391 "apic_pci_msi_enable_vector " 4392 "returned PSM_FAILURE"); 4393 } 4394 } 4395 if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) == 4396 irq_ptr->airq_origirq) { /* last one */ 4397 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4398 "pci_msi_enable_mode\n")); 4399 if (pci_msi_enable_mode(irq_ptr->airq_dip, 4400 (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4401 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX, 4402 which_irq) != DDI_SUCCESS) { 4403 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4404 "pci_msi_enable failed\n")); 4405 (void) pci_msi_unconfigure(irq_ptr->airq_dip, 4406 (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4407 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX, 4408 which_irq); 4409 } 4410 } 4411 } 4412 if (acquire_lock) 4413 lock_clear(&apic_ioapic_lock); 4414 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4415 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4416 intr_restore(iflag); 4417 return (0); 4418 } 4419 4420 /* 4421 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4422 * bit set. Sets up a timeout to perform the reprogramming at a later time 4423 * if it cannot wait for the Remote IRR bit to clear (or if waiting did not 4424 * result in the bit's clearing). 4425 * 4426 * This function will mask the RDT entry if the Remote IRR bit is set. 4427 * 4428 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4429 */ 4430 static int 4431 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4432 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq) 4433 { 4434 int32_t rdt_entry; 4435 int waited; 4436 4437 /* Mask the RDT entry, but only if it's a level-triggered interrupt */ 4438 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4439 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4440 4441 /* Mask it */ 4442 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4443 AV_MASK | rdt_entry); 4444 } 4445 4446 /* 4447 * Wait for the delivery pending bit to clear. 4448 */ 4449 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4450 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4451 4452 /* 4453 * If we're still waiting on the delivery of this interrupt, 4454 * continue to wait here until it is delivered (this should be 4455 * a very small amount of time, but include a timeout just in 4456 * case). 4457 */ 4458 for (waited = 0; waited < apic_max_usecs_clear_pending; 4459 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4460 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4461 & AV_PENDING) == 0) { 4462 break; 4463 } 4464 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4465 } 4466 4467 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4468 AV_PENDING) != 0) { 4469 cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not " 4470 "deliver interrupt to local APIC within " 4471 "%d usecs.", irq_ptr->airq_ioapicindex, 4472 irq_ptr->airq_intin_no, 4473 apic_max_usecs_clear_pending); 4474 } 4475 } 4476 4477 /* 4478 * If the remote IRR bit is set, then the interrupt has been sent 4479 * to a CPU for processing. We have no choice but to wait for 4480 * that CPU to process the interrupt, at which point the remote IRR 4481 * bit will be cleared. 4482 */ 4483 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4484 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4485 4486 /* 4487 * If the CPU that this RDT is bound to is NOT the current 4488 * CPU, wait until that CPU handles the interrupt and ACKs 4489 * it. If this interrupt is not bound to any CPU (that is, 4490 * if it's bound to the logical destination of "anyone"), it 4491 * may have been delivered to the current CPU so handle that 4492 * case by deferring the reprogramming (below). 4493 */ 4494 kpreempt_disable(); 4495 if ((old_bind_cpu != IRQ_UNBOUND) && 4496 (old_bind_cpu != IRQ_UNINIT) && 4497 (old_bind_cpu != psm_get_cpu_id())) { 4498 for (waited = 0; waited < apic_max_usecs_clear_pending; 4499 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4500 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4501 intin_no) & AV_REMOTE_IRR) == 0) { 4502 4503 /* Clear the reprogramming state: */ 4504 lock_set(&apic_ioapic_reprogram_lock); 4505 4506 apic_reprogram_info[which_irq].valid 4507 = 0; 4508 apic_reprogram_info[which_irq].bindcpu 4509 = 0; 4510 apic_reprogram_info[which_irq].timeouts 4511 = 0; 4512 4513 lock_clear(&apic_ioapic_reprogram_lock); 4514 4515 /* Remote IRR has cleared! */ 4516 kpreempt_enable(); 4517 return (0); 4518 } 4519 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4520 } 4521 } 4522 kpreempt_enable(); 4523 4524 /* 4525 * If we waited and the Remote IRR bit is still not cleared, 4526 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4527 * times for this interrupt, try the last-ditch workarounds: 4528 */ 4529 if (apic_reprogram_info[which_irq].timeouts >= 4530 APIC_REPROGRAM_MAX_TIMEOUTS) { 4531 4532 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4533 & AV_REMOTE_IRR) != 0) { 4534 /* 4535 * Trying to clear the bit through normal 4536 * channels has failed. So as a last-ditch 4537 * effort, try to set the trigger mode to 4538 * edge, then to level. This has been 4539 * observed to work on many systems. 4540 */ 4541 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4542 intin_no, 4543 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4544 intin_no) & ~AV_LEVEL); 4545 4546 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4547 intin_no, 4548 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4549 intin_no) | AV_LEVEL); 4550 4551 /* 4552 * If the bit's STILL set, declare total and 4553 * utter failure 4554 */ 4555 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4556 intin_no) & AV_REMOTE_IRR) != 0) { 4557 cmn_err(CE_WARN, "!IOAPIC %d intin %d: " 4558 "Remote IRR failed to reset " 4559 "within %d usecs. Interrupts to " 4560 "this pin may cease to function.", 4561 irq_ptr->airq_ioapicindex, 4562 irq_ptr->airq_intin_no, 4563 apic_max_usecs_clear_pending); 4564 } 4565 } 4566 /* Clear the reprogramming state: */ 4567 lock_set(&apic_ioapic_reprogram_lock); 4568 4569 apic_reprogram_info[which_irq].valid = 0; 4570 apic_reprogram_info[which_irq].bindcpu = 0; 4571 apic_reprogram_info[which_irq].timeouts = 0; 4572 4573 lock_clear(&apic_ioapic_reprogram_lock); 4574 } else { 4575 #ifdef DEBUG 4576 cmn_err(CE_WARN, "Deferring reprogramming of irq %d", 4577 which_irq); 4578 #endif /* DEBUG */ 4579 /* 4580 * If waiting for the Remote IRR bit (above) didn't 4581 * allow it to clear, defer the reprogramming: 4582 */ 4583 lock_set(&apic_ioapic_reprogram_lock); 4584 4585 apic_reprogram_info[which_irq].valid = 1; 4586 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4587 apic_reprogram_info[which_irq].timeouts++; 4588 4589 lock_clear(&apic_ioapic_reprogram_lock); 4590 4591 /* Fire up a timeout to handle this later */ 4592 (void) timeout(apic_reprogram_timeout_handler, 4593 (void *) 0, 4594 drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY)); 4595 4596 /* Inform caller to defer IOAPIC programming: */ 4597 return (1); 4598 } 4599 } 4600 return (0); 4601 } 4602 4603 /* 4604 * Timeout handler that performs the APIC reprogramming 4605 */ 4606 /*ARGSUSED*/ 4607 static void 4608 apic_reprogram_timeout_handler(void *arg) 4609 { 4610 /*LINTED: set but not used in function*/ 4611 int i, result; 4612 4613 /* Serialize access to this function */ 4614 mutex_enter(&apic_reprogram_timeout_mutex); 4615 4616 /* 4617 * For each entry in the reprogramming state that's valid, 4618 * try the reprogramming again: 4619 */ 4620 for (i = 0; i < APIC_MAX_VECTOR; i++) { 4621 if (apic_reprogram_info[i].valid == 0) 4622 continue; 4623 /* 4624 * Though we can't really do anything about errors 4625 * at this point, keep track of them for reporting. 4626 * Note that it is very possible for apic_setup_io_intr 4627 * to re-register this very timeout if the Remote IRR bit 4628 * has not yet cleared. 4629 */ 4630 result = apic_setup_io_intr_deferred(apic_irq_table[i], i); 4631 4632 #ifdef DEBUG 4633 if (result) 4634 cmn_err(CE_WARN, "apic_reprogram_timeout: " 4635 "apic_setup_io_intr returned nonzero for " 4636 "irq=%d!", i); 4637 #endif /* DEBUG */ 4638 } 4639 4640 mutex_exit(&apic_reprogram_timeout_mutex); 4641 } 4642 4643 4644 /* 4645 * Called to migrate all interrupts at an irq to another cpu. safe 4646 * if true means we are not being called from an interrupt 4647 * context and hence it is safe to do a lock_set. If false 4648 * do only a lock_try and return failure ( non 0 ) if we cannot get it 4649 */ 4650 static int 4651 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe) 4652 { 4653 apic_irq_t *irqptr = irq_ptr; 4654 int retval = 0; 4655 int iflag; 4656 4657 iflag = intr_clear(); 4658 if (!safe) { 4659 if (lock_try(&apic_ioapic_lock) == 0) { 4660 intr_restore(iflag); 4661 return (1); 4662 } 4663 } else 4664 lock_set(&apic_ioapic_lock); 4665 4666 while (irqptr) { 4667 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4668 retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE); 4669 irqptr = irqptr->airq_next; 4670 } 4671 lock_clear(&apic_ioapic_lock); 4672 intr_restore(iflag); 4673 return (retval); 4674 } 4675 4676 /* 4677 * apic_intr_redistribute does all the messy computations for identifying 4678 * which interrupt to move to which CPU. Currently we do just one interrupt 4679 * at a time. This reduces the time we spent doing all this within clock 4680 * interrupt. When it is done in idle, we could do more than 1. 4681 * First we find the most busy and the most free CPU (time in ISR only) 4682 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4683 * Then we look for IRQs which are closest to the difference between the 4684 * most busy CPU and the average ISR load. We try to find one whose load 4685 * is less than difference.If none exists, then we chose one larger than the 4686 * difference, provided it does not make the most idle CPU worse than the 4687 * most busy one. In the end, we clear all the busy fields for CPUs. For 4688 * IRQs, they are cleared as they are scanned. 4689 */ 4690 static void 4691 apic_intr_redistribute() 4692 { 4693 int busiest_cpu, most_free_cpu; 4694 int cpu_free, cpu_busy, max_busy, min_busy; 4695 int min_free, diff; 4696 int average_busy, cpus_online; 4697 int i, busy; 4698 apic_cpus_info_t *cpu_infop; 4699 apic_irq_t *min_busy_irq = NULL; 4700 apic_irq_t *max_busy_irq = NULL; 4701 4702 busiest_cpu = most_free_cpu = -1; 4703 cpu_free = cpu_busy = max_busy = average_busy = 0; 4704 min_free = apic_sample_factor_redistribution; 4705 cpus_online = 0; 4706 /* 4707 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4708 * without ioapic_lock. That is OK as we are just doing statistical 4709 * sampling anyway and any inaccuracy now will get corrected next time 4710 * The call to rebind which actually changes things will make sure 4711 * we are consistent. 4712 */ 4713 for (i = 0; i < apic_nproc; i++) { 4714 if (!(apic_redist_cpu_skip & (1 << i)) && 4715 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4716 4717 cpu_infop = &apic_cpus[i]; 4718 /* 4719 * If no unbound interrupts or only 1 total on this 4720 * CPU, skip 4721 */ 4722 if (!cpu_infop->aci_temp_bound || 4723 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4724 == 1) { 4725 apic_redist_cpu_skip |= 1 << i; 4726 continue; 4727 } 4728 4729 busy = cpu_infop->aci_busy; 4730 average_busy += busy; 4731 cpus_online++; 4732 if (max_busy < busy) { 4733 max_busy = busy; 4734 busiest_cpu = i; 4735 } 4736 if (min_free > busy) { 4737 min_free = busy; 4738 most_free_cpu = i; 4739 } 4740 if (busy > apic_int_busy_mark) { 4741 cpu_busy |= 1 << i; 4742 } else { 4743 if (busy < apic_int_free_mark) 4744 cpu_free |= 1 << i; 4745 } 4746 } 4747 } 4748 if ((cpu_busy && cpu_free) || 4749 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4750 4751 apic_num_imbalance++; 4752 #ifdef DEBUG 4753 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4754 prom_printf( 4755 "redistribute busy=%x free=%x max=%x min=%x", 4756 cpu_busy, cpu_free, max_busy, min_free); 4757 } 4758 #endif /* DEBUG */ 4759 4760 4761 average_busy /= cpus_online; 4762 4763 diff = max_busy - average_busy; 4764 min_busy = max_busy; /* start with the max possible value */ 4765 max_busy = 0; 4766 min_busy_irq = max_busy_irq = NULL; 4767 i = apic_min_device_irq; 4768 for (; i < apic_max_device_irq; i++) { 4769 apic_irq_t *irq_ptr; 4770 /* Change to linked list per CPU ? */ 4771 if ((irq_ptr = apic_irq_table[i]) == NULL) 4772 continue; 4773 /* Check for irq_busy & decide which one to move */ 4774 /* Also zero them for next round */ 4775 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4776 irq_ptr->airq_busy) { 4777 if (irq_ptr->airq_busy < diff) { 4778 /* 4779 * Check for least busy CPU, 4780 * best fit or what ? 4781 */ 4782 if (max_busy < irq_ptr->airq_busy) { 4783 /* 4784 * Most busy within the 4785 * required differential 4786 */ 4787 max_busy = irq_ptr->airq_busy; 4788 max_busy_irq = irq_ptr; 4789 } 4790 } else { 4791 if (min_busy > irq_ptr->airq_busy) { 4792 /* 4793 * least busy, but more than 4794 * the reqd diff 4795 */ 4796 if (min_busy < 4797 (diff + average_busy - 4798 min_free)) { 4799 /* 4800 * Making sure new cpu 4801 * will not end up 4802 * worse 4803 */ 4804 min_busy = 4805 irq_ptr->airq_busy; 4806 4807 min_busy_irq = irq_ptr; 4808 } 4809 } 4810 } 4811 } 4812 irq_ptr->airq_busy = 0; 4813 } 4814 4815 if (max_busy_irq != NULL) { 4816 #ifdef DEBUG 4817 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4818 prom_printf("rebinding %x to %x", 4819 max_busy_irq->airq_vector, most_free_cpu); 4820 } 4821 #endif /* DEBUG */ 4822 if (apic_rebind_all(max_busy_irq, most_free_cpu, 0) 4823 == 0) 4824 /* Make change permenant */ 4825 max_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4826 } else if (min_busy_irq != NULL) { 4827 #ifdef DEBUG 4828 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4829 prom_printf("rebinding %x to %x", 4830 min_busy_irq->airq_vector, most_free_cpu); 4831 } 4832 #endif /* DEBUG */ 4833 4834 if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) == 4835 0) 4836 /* Make change permenant */ 4837 min_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4838 } else { 4839 if (cpu_busy != (1 << busiest_cpu)) { 4840 apic_redist_cpu_skip |= 1 << busiest_cpu; 4841 /* 4842 * We leave cpu_skip set so that next time we 4843 * can choose another cpu 4844 */ 4845 } 4846 } 4847 apic_num_rebind++; 4848 } else { 4849 /* 4850 * found nothing. Could be that we skipped over valid CPUs 4851 * or we have balanced everything. If we had a variable 4852 * ticks_for_redistribution, it could be increased here. 4853 * apic_int_busy, int_free etc would also need to be 4854 * changed. 4855 */ 4856 if (apic_redist_cpu_skip) 4857 apic_redist_cpu_skip = 0; 4858 } 4859 for (i = 0; i < apic_nproc; i++) { 4860 apic_cpus[i].aci_busy = 0; 4861 } 4862 } 4863 4864 static void 4865 apic_cleanup_busy() 4866 { 4867 int i; 4868 apic_irq_t *irq_ptr; 4869 4870 for (i = 0; i < apic_nproc; i++) { 4871 apic_cpus[i].aci_busy = 0; 4872 } 4873 4874 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 4875 if ((irq_ptr = apic_irq_table[i]) != NULL) 4876 irq_ptr->airq_busy = 0; 4877 } 4878 apic_skipped_redistribute = 0; 4879 } 4880 4881 4882 /* 4883 * This function will reprogram the timer. 4884 * 4885 * When in oneshot mode the argument is the absolute time in future to 4886 * generate the interrupt at. 4887 * 4888 * When in periodic mode, the argument is the interval at which the 4889 * interrupts should be generated. There is no need to support the periodic 4890 * mode timer change at this time. 4891 */ 4892 static void 4893 apic_timer_reprogram(hrtime_t time) 4894 { 4895 hrtime_t now; 4896 uint_t ticks; 4897 4898 /* 4899 * We should be called from high PIL context (CBE_HIGH_PIL), 4900 * so kpreempt is disabled. 4901 */ 4902 4903 if (!apic_oneshot) { 4904 /* time is the interval for periodic mode */ 4905 ticks = (uint_t)((time) / apic_nsec_per_tick); 4906 } else { 4907 /* one shot mode */ 4908 4909 now = gethrtime(); 4910 4911 if (time <= now) { 4912 /* 4913 * requested to generate an interrupt in the past 4914 * generate an interrupt as soon as possible 4915 */ 4916 ticks = apic_min_timer_ticks; 4917 } else if ((time - now) > apic_nsec_max) { 4918 /* 4919 * requested to generate an interrupt at a time 4920 * further than what we are capable of. Set to max 4921 * the hardware can handle 4922 */ 4923 4924 ticks = APIC_MAXVAL; 4925 #ifdef DEBUG 4926 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 4927 " %lld too far in future, current time" 4928 " %lld \n", time, now); 4929 #endif /* DEBUG */ 4930 } else 4931 ticks = (uint_t)((time - now) / apic_nsec_per_tick); 4932 } 4933 4934 if (ticks < apic_min_timer_ticks) 4935 ticks = apic_min_timer_ticks; 4936 4937 apicadr[APIC_INIT_COUNT] = ticks; 4938 4939 } 4940 4941 /* 4942 * This function will enable timer interrupts. 4943 */ 4944 static void 4945 apic_timer_enable(void) 4946 { 4947 /* 4948 * We should be Called from high PIL context (CBE_HIGH_PIL), 4949 * so kpreempt is disabled. 4950 */ 4951 4952 if (!apic_oneshot) 4953 apicadr[APIC_LOCAL_TIMER] = 4954 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 4955 else { 4956 /* one shot */ 4957 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 4958 } 4959 } 4960 4961 /* 4962 * This function will disable timer interrupts. 4963 */ 4964 static void 4965 apic_timer_disable(void) 4966 { 4967 /* 4968 * We should be Called from high PIL context (CBE_HIGH_PIL), 4969 * so kpreempt is disabled. 4970 */ 4971 4972 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 4973 } 4974 4975 4976 cyclic_id_t apic_cyclic_id; 4977 4978 /* 4979 * If this module needs to be a consumer of cyclic subsystem, they 4980 * can be added here, since at this time kernel cyclic subsystem is initialized 4981 * argument is not currently used, and is reserved for future. 4982 */ 4983 static void 4984 apic_post_cyclic_setup(void *arg) 4985 { 4986 _NOTE(ARGUNUSED(arg)) 4987 cyc_handler_t hdlr; 4988 cyc_time_t when; 4989 4990 /* cpu_lock is held */ 4991 4992 /* set up cyclics for intr redistribution */ 4993 4994 /* 4995 * In peridoc mode intr redistribution processing is done in 4996 * apic_intr_enter during clk intr processing 4997 */ 4998 if (!apic_oneshot) 4999 return; 5000 5001 hdlr.cyh_level = CY_LOW_LEVEL; 5002 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 5003 hdlr.cyh_arg = NULL; 5004 5005 when.cyt_when = 0; 5006 when.cyt_interval = apic_redistribute_sample_interval; 5007 apic_cyclic_id = cyclic_add(&hdlr, &when); 5008 5009 5010 } 5011 5012 static void 5013 apic_redistribute_compute(void) 5014 { 5015 int i, j, max_busy; 5016 5017 if (apic_enable_dynamic_migration) { 5018 if (++apic_nticks == apic_sample_factor_redistribution) { 5019 /* 5020 * Time to call apic_intr_redistribute(). 5021 * reset apic_nticks. This will cause max_busy 5022 * to be calculated below and if it is more than 5023 * apic_int_busy, we will do the whole thing 5024 */ 5025 apic_nticks = 0; 5026 } 5027 max_busy = 0; 5028 for (i = 0; i < apic_nproc; i++) { 5029 5030 /* 5031 * Check if curipl is non zero & if ISR is in 5032 * progress 5033 */ 5034 if (((j = apic_cpus[i].aci_curipl) != 0) && 5035 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 5036 5037 int irq; 5038 apic_cpus[i].aci_busy++; 5039 irq = apic_cpus[i].aci_current[j]; 5040 apic_irq_table[irq]->airq_busy++; 5041 } 5042 5043 if (!apic_nticks && 5044 (apic_cpus[i].aci_busy > max_busy)) 5045 max_busy = apic_cpus[i].aci_busy; 5046 } 5047 if (!apic_nticks) { 5048 if (max_busy > apic_int_busy_mark) { 5049 /* 5050 * We could make the following check be 5051 * skipped > 1 in which case, we get a 5052 * redistribution at half the busy mark (due to 5053 * double interval). Need to be able to collect 5054 * more empirical data to decide if that is a 5055 * good strategy. Punt for now. 5056 */ 5057 if (apic_skipped_redistribute) 5058 apic_cleanup_busy(); 5059 else 5060 apic_intr_redistribute(); 5061 } else 5062 apic_skipped_redistribute++; 5063 } 5064 } 5065 } 5066 5067 5068 static int 5069 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5070 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5071 { 5072 5073 int status; 5074 acpi_psm_lnk_t acpipsmlnk; 5075 5076 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5077 intr_flagp)) == ACPI_PSM_SUCCESS) { 5078 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5079 "from cache for device %s, instance #%d\n", *pci_irqp, 5080 ddi_get_name(dip), ddi_get_instance(dip))); 5081 return (status); 5082 } 5083 5084 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5085 5086 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5087 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5088 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5089 " acpi_translate_pci_irq failed for device %s, instance" 5090 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5091 return (status); 5092 } 5093 5094 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5095 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5096 intr_flagp); 5097 if (status != ACPI_PSM_SUCCESS) { 5098 status = acpi_get_current_irq_resource(&acpipsmlnk, 5099 pci_irqp, intr_flagp); 5100 } 5101 } 5102 5103 if (status == ACPI_PSM_SUCCESS) { 5104 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5105 intr_flagp, &acpipsmlnk); 5106 5107 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5108 "new irq %d for device %s, instance #%d\n", 5109 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5110 } 5111 5112 return (status); 5113 } 5114 5115 /* 5116 * Configures the irq for the interrupt link device identified by 5117 * acpipsmlnkp. 5118 * 5119 * Gets the current and the list of possible irq settings for the 5120 * device. If apic_unconditional_srs is not set, and the current 5121 * resource setting is in the list of possible irq settings, 5122 * current irq resource setting is passed to the caller. 5123 * 5124 * Otherwise, picks an irq number from the list of possible irq 5125 * settings, and sets the irq of the device to this value. 5126 * If prefer_crs is set, among a set of irq numbers in the list that have 5127 * the least number of devices sharing the interrupt, we pick current irq 5128 * resource setting if it is a member of this set. 5129 * 5130 * Passes the irq number in the value pointed to by pci_irqp, and 5131 * polarity and sensitivity in the structure pointed to by dipintrflagp 5132 * to the caller. 5133 * 5134 * Note that if setting the irq resource failed, but successfuly obtained 5135 * the current irq resource settings, passes the current irq resources 5136 * and considers it a success. 5137 * 5138 * Returns: 5139 * ACPI_PSM_SUCCESS on success. 5140 * 5141 * ACPI_PSM_FAILURE if an error occured during the configuration or 5142 * if a suitable irq was not found for this device, or if setting the 5143 * irq resource and obtaining the current resource fails. 5144 * 5145 */ 5146 static int 5147 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5148 int *pci_irqp, iflag_t *dipintr_flagp) 5149 { 5150 5151 int i, min_share, foundnow, done = 0; 5152 int32_t irq; 5153 int32_t share_irq = -1; 5154 int32_t chosen_irq = -1; 5155 int cur_irq = -1; 5156 acpi_irqlist_t *irqlistp; 5157 acpi_irqlist_t *irqlistent; 5158 5159 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5160 == ACPI_PSM_FAILURE) { 5161 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5162 "or assign IRQ for device %s, instance #%d: The system was " 5163 "unable to get the list of potential IRQs from ACPI.", 5164 ddi_get_name(dip), ddi_get_instance(dip))); 5165 5166 return (ACPI_PSM_FAILURE); 5167 } 5168 5169 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5170 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5171 (cur_irq > 0)) { 5172 /* 5173 * If an IRQ is set in CRS and that IRQ exists in the set 5174 * returned from _PRS, return that IRQ, otherwise print 5175 * a warning 5176 */ 5177 5178 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5179 == ACPI_PSM_SUCCESS) { 5180 5181 acpi_free_irqlist(irqlistp); 5182 ASSERT(pci_irqp != NULL); 5183 *pci_irqp = cur_irq; 5184 return (ACPI_PSM_SUCCESS); 5185 } 5186 5187 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5188 "current irq %d for device %s, instance #%d in ACPI's " 5189 "list of possible irqs for this device. Picking one from " 5190 " the latter list.", cur_irq, ddi_get_name(dip), 5191 ddi_get_instance(dip))); 5192 } 5193 5194 irqlistent = irqlistp; 5195 min_share = 255; 5196 5197 while (irqlistent != NULL) { 5198 irqlistent->intr_flags.bustype = BUS_PCI; 5199 5200 for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) { 5201 5202 irq = irqlistent->irqs[i]; 5203 5204 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5205 continue; 5206 5207 if (irq == 0) { 5208 /* invalid irq number */ 5209 continue; 5210 } 5211 5212 if ((apic_irq_table[irq] == NULL) || 5213 (apic_irq_table[irq]->airq_dip == dip)) { 5214 chosen_irq = irq; 5215 foundnow = 1; 5216 /* 5217 * If we do not prefer current irq from crs 5218 * or if we do and this irq is the same as 5219 * current irq from crs, this is the one 5220 * to pick. 5221 */ 5222 if (!(apic_prefer_crs) || (irq == cur_irq)) { 5223 done = 1; 5224 break; 5225 } 5226 continue; 5227 } 5228 5229 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5230 continue; 5231 5232 if (!acpi_intr_compatible(irqlistent->intr_flags, 5233 apic_irq_table[irq]->airq_iflag)) 5234 continue; 5235 5236 if ((apic_irq_table[irq]->airq_share < min_share) || 5237 ((apic_irq_table[irq]->airq_share == min_share) && 5238 (cur_irq == irq) && (apic_prefer_crs))) { 5239 min_share = apic_irq_table[irq]->airq_share; 5240 share_irq = irq; 5241 foundnow = 1; 5242 } 5243 } 5244 5245 /* 5246 * If we found an IRQ in the inner loop this time, save the 5247 * details from the irqlist for later use. 5248 */ 5249 if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) { 5250 /* 5251 * Copy the acpi_prs_private_t and flags from this 5252 * irq list entry, since we found an irq from this 5253 * entry. 5254 */ 5255 acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv; 5256 *dipintr_flagp = irqlistent->intr_flags; 5257 } 5258 5259 if (done) 5260 break; 5261 5262 /* Go to the next irqlist entry */ 5263 irqlistent = irqlistent->next; 5264 } 5265 5266 5267 acpi_free_irqlist(irqlistp); 5268 if (chosen_irq != -1) 5269 irq = chosen_irq; 5270 else if (share_irq != -1) 5271 irq = share_irq; 5272 else { 5273 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5274 "suitable irq from the list of possible irqs for device " 5275 "%s, instance #%d in ACPI's list of possible irqs", 5276 ddi_get_name(dip), ddi_get_instance(dip))); 5277 return (ACPI_PSM_FAILURE); 5278 } 5279 5280 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s " 5281 "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip))); 5282 5283 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) { 5284 /* 5285 * setting irq was successful, check to make sure CRS 5286 * reflects that. If CRS does not agree with what we 5287 * set, return the irq that was set. 5288 */ 5289 5290 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5291 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5292 5293 if (cur_irq != irq) 5294 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: " 5295 "IRQ resource set (irqno %d) for device %s " 5296 "instance #%d, differs from current " 5297 "setting irqno %d", 5298 irq, ddi_get_name(dip), 5299 ddi_get_instance(dip), cur_irq)); 5300 } 5301 5302 /* 5303 * return the irq that was set, and not what CRS reports, 5304 * since CRS has been seen to be bogus on some systems 5305 */ 5306 cur_irq = irq; 5307 } else { 5308 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d " 5309 "failed for device %s instance #%d", 5310 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5311 5312 if (cur_irq == -1) 5313 return (ACPI_PSM_FAILURE); 5314 } 5315 5316 ASSERT(pci_irqp != NULL); 5317 *pci_irqp = cur_irq; 5318 return (ACPI_PSM_SUCCESS); 5319 } 5320