1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 30 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 31 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 32 * PSMI 1.5 extensions are supported in Solaris Nevada. 33 */ 34 #define PSMI_1_5 35 36 #include <sys/processor.h> 37 #include <sys/time.h> 38 #include <sys/psm.h> 39 #include <sys/smp_impldefs.h> 40 #include <sys/cram.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/psm_common.h> 44 #include "apic.h" 45 #include <sys/pit.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/ddi_impldefs.h> 49 #include <sys/pci.h> 50 #include <sys/promif.h> 51 #include <sys/x86_archext.h> 52 #include <sys/cpc_impl.h> 53 #include <sys/uadmin.h> 54 #include <sys/panic.h> 55 #include <sys/debug.h> 56 #include <sys/archsystm.h> 57 #include <sys/trap.h> 58 #include <sys/machsystm.h> 59 #include <sys/cpuvar.h> 60 #include <sys/rm_platter.h> 61 #include <sys/privregs.h> 62 #include <sys/cyclic.h> 63 #include <sys/note.h> 64 #include <sys/pci_intr_lib.h> 65 66 /* 67 * Local Function Prototypes 68 */ 69 static void apic_init_intr(); 70 static void apic_ret(); 71 static int apic_handle_defconf(); 72 static int apic_parse_mpct(caddr_t mpct, int bypass); 73 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 74 static int apic_checksum(caddr_t bptr, int len); 75 static int get_apic_cmd1(); 76 static int get_apic_pri(); 77 static int apic_find_bus_type(char *bus); 78 static int apic_find_bus(int busid); 79 static int apic_find_bus_id(int bustype); 80 static struct apic_io_intr *apic_find_io_intr(int irqno); 81 int apic_allocate_irq(int irq); 82 static int apic_find_free_irq(int start, int end); 83 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 84 static void apic_modify_vector(uchar_t vector, int irq); 85 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 86 static uchar_t apic_xlate_vector(uchar_t oldvector); 87 static void apic_xlate_vector_free_timeout_handler(void *arg); 88 static void apic_free_vector(uchar_t vector); 89 static void apic_reprogram_timeout_handler(void *arg); 90 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 91 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq); 92 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq); 93 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq); 94 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 95 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 96 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 97 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 98 int child_ipin, struct apic_io_intr **intrp); 99 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 100 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 101 int type); 102 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 103 iflag_t *intr_flagp); 104 static void apic_nmi_intr(caddr_t arg); 105 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 106 uchar_t intin); 107 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, 108 int when); 109 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe); 110 static void apic_intr_redistribute(); 111 static void apic_cleanup_busy(); 112 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 113 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 114 115 /* ACPI support routines */ 116 static int acpi_probe(void); 117 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 118 int *pci_irqp, iflag_t *intr_flagp); 119 120 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 121 int ipin, int *pci_irqp, iflag_t *intr_flagp); 122 static uchar_t acpi_find_ioapic(int irq); 123 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 124 125 /* 126 * standard MP entries 127 */ 128 static int apic_probe(); 129 static int apic_clkinit(); 130 static int apic_getclkirq(int ipl); 131 static uint_t apic_calibrate(volatile uint32_t *addr, 132 uint16_t *pit_ticks_adj); 133 static hrtime_t apic_gettime(); 134 static hrtime_t apic_gethrtime(); 135 static void apic_init(); 136 static void apic_picinit(void); 137 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 138 static int apic_post_cpu_start(void); 139 static void apic_send_ipi(int cpun, int ipl); 140 static void apic_set_softintr(int softintr); 141 static void apic_set_idlecpu(processorid_t cpun); 142 static void apic_unset_idlecpu(processorid_t cpun); 143 static int apic_softlvl_to_irq(int ipl); 144 static int apic_intr_enter(int ipl, int *vect); 145 static void apic_intr_exit(int ipl, int vect); 146 static void apic_setspl(int ipl); 147 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 148 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 149 static void apic_shutdown(int cmd, int fcn); 150 static void apic_preshutdown(int cmd, int fcn); 151 static int apic_disable_intr(processorid_t cpun); 152 static void apic_enable_intr(processorid_t cpun); 153 static processorid_t apic_get_next_processorid(processorid_t cpun); 154 static int apic_get_ipivect(int ipl, int type); 155 static void apic_timer_reprogram(hrtime_t time); 156 static void apic_timer_enable(void); 157 static void apic_timer_disable(void); 158 static void apic_post_cyclic_setup(void *arg); 159 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 160 psm_intr_op_t, int *); 161 162 static int apic_oneshot = 0; 163 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 164 165 /* 166 * These variables are frequently accessed in apic_intr_enter(), 167 * apic_intr_exit and apic_setspl, so group them together 168 */ 169 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 170 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 171 int apic_clkvect; 172 173 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 174 int apic_sci_vect = -1; 175 iflag_t apic_sci_flags; 176 177 /* vector at which error interrupts come in */ 178 int apic_errvect; 179 int apic_enable_error_intr = 1; 180 int apic_error_display_delay = 100; 181 182 /* vector at which performance counter overflow interrupts come in */ 183 int apic_cpcovf_vect; 184 int apic_enable_cpcovf_intr = 1; 185 186 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */ 187 static int apic_max_usecs_clear_pending = 1000; 188 189 /* Amt of usecs to wait before checking if RDT flags have reset. */ 190 #define APIC_USECS_PER_WAIT_INTERVAL 100 191 192 /* Maximum number of times to retry reprogramming via the timeout */ 193 #define APIC_REPROGRAM_MAX_TIMEOUTS 10 194 195 /* timeout delay for IOAPIC delayed reprogramming */ 196 #define APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */ 197 198 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */ 199 #define DEFERRED 1 200 #define IMMEDIATE 0 201 202 /* 203 * number of bits per byte, from <sys/param.h> 204 */ 205 #define UCHAR_MAX ((1 << NBBY) - 1) 206 207 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ + 1]; 208 209 /* 210 * The following vector assignments influence the value of ipltopri and 211 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 212 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 213 * we care to do so in future. Note some IPLs which are rarely used 214 * will share the vector ranges and heavily used IPLs (5 and 6) have 215 * a wide range. 216 * IPL Vector range. as passed to intr_enter 217 * 0 none. 218 * 1,2,3 0x20-0x2f 0x0-0xf 219 * 4 0x30-0x3f 0x10-0x1f 220 * 5 0x40-0x5f 0x20-0x3f 221 * 6 0x60-0x7f 0x40-0x5f 222 * 7,8,9 0x80-0x8f 0x60-0x6f 223 * 10 0x90-0x9f 0x70-0x7f 224 * 11 0xa0-0xaf 0x80-0x8f 225 * ... ... 226 * 16 0xf0-0xff 0xd0-0xdf 227 */ 228 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 229 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 230 }; 231 /* 232 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 233 * NOTE that this is vector as passed into intr_enter which is 234 * programmed vector - 0x20 (APIC_BASE_VECT) 235 */ 236 237 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 238 /* The taskpri to be programmed into apic to mask given ipl */ 239 240 #if defined(__amd64) 241 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 242 #endif 243 244 /* 245 * Patchable global variables. 246 */ 247 int apic_forceload = 0; 248 249 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 250 #define INTR_ROUND_ROBIN 1 251 #define INTR_LOWEST_PRIORITY 2 252 253 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 254 255 static int apic_next_bind_cpu = 2; /* For round robin assignment */ 256 /* start with cpu 1 */ 257 258 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 259 /* 1 - use gettime() for performance */ 260 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 261 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 262 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 263 int apic_panic_on_nmi = 0; 264 int apic_panic_on_apic_error = 0; 265 266 int apic_verbose = 0; 267 268 /* Flag definitions for apic_verbose */ 269 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 270 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 271 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 272 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 273 274 275 #define APIC_VERBOSE_IOAPIC(fmt) \ 276 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 277 cmn_err fmt; 278 279 #define APIC_VERBOSE_IRQ(fmt) \ 280 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 281 cmn_err fmt; 282 283 #define APIC_VERBOSE_POWEROFF(fmt) \ 284 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 285 prom_printf fmt; 286 287 288 /* Now the ones for Dynamic Interrupt distribution */ 289 int apic_enable_dynamic_migration = 0; 290 291 /* 292 * If enabled, the distribution works as follows: 293 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 294 * and the irq corresponding to the ipl is also set in the aci_current array. 295 * interrupt exit and setspl (due to soft interrupts) will cause the current 296 * ipl to be be changed. This is cache friendly as these frequently used 297 * paths write into a per cpu structure. 298 * 299 * Sampling is done by checking the structures for all CPUs and incrementing 300 * the busy field of the irq (if any) executing on each CPU and the busy field 301 * of the corresponding CPU. 302 * In periodic mode this is done on every clock interrupt. 303 * In one-shot mode, this is done thru a cyclic with an interval of 304 * apic_redistribute_sample_interval (default 10 milli sec). 305 * 306 * Every apic_sample_factor_redistribution times we sample, we do computations 307 * to decide which interrupt needs to be migrated (see comments 308 * before apic_intr_redistribute(). 309 */ 310 311 /* 312 * Following 3 variables start as % and can be patched or set using an 313 * API to be defined in future. They will be scaled to 314 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 315 * mode), or 101 in one-shot mode to stagger it away from one sec processing 316 */ 317 318 int apic_int_busy_mark = 60; 319 int apic_int_free_mark = 20; 320 int apic_diff_for_redistribution = 10; 321 322 /* sampling interval for interrupt redistribution for dynamic migration */ 323 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 324 325 /* 326 * number of times we sample before deciding to redistribute interrupts 327 * for dynamic migration 328 */ 329 int apic_sample_factor_redistribution = 101; 330 331 /* timeout for xlate_vector, mark_vector */ 332 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 333 334 int apic_redist_cpu_skip = 0; 335 int apic_num_imbalance = 0; 336 int apic_num_rebind = 0; 337 338 int apic_nproc = 0; 339 int apic_defconf = 0; 340 int apic_irq_translate = 0; 341 int apic_spec_rev = 0; 342 int apic_imcrp = 0; 343 344 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 345 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 346 347 /* 348 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 349 * will be assigned (via _SRS). If it is not set, use the current 350 * irq setting (via _CRS), but only if that irq is in the set of possible 351 * irqs (returned by _PRS) for the device. 352 */ 353 int apic_unconditional_srs = 1; 354 355 /* 356 * For interrupt link devices, if apic_prefer_crs is set when we are 357 * assigning an IRQ resource to a device, prefer the current IRQ setting 358 * over other possible irq settings under same conditions. 359 */ 360 361 int apic_prefer_crs = 1; 362 363 364 /* minimum number of timer ticks to program to */ 365 int apic_min_timer_ticks = 1; 366 /* 367 * Local static data 368 */ 369 static struct psm_ops apic_ops = { 370 apic_probe, 371 372 apic_init, 373 apic_picinit, 374 apic_intr_enter, 375 apic_intr_exit, 376 apic_setspl, 377 apic_addspl, 378 apic_delspl, 379 apic_disable_intr, 380 apic_enable_intr, 381 apic_softlvl_to_irq, 382 apic_set_softintr, 383 384 apic_set_idlecpu, 385 apic_unset_idlecpu, 386 387 apic_clkinit, 388 apic_getclkirq, 389 (void (*)(void))NULL, /* psm_hrtimeinit */ 390 apic_gethrtime, 391 392 apic_get_next_processorid, 393 apic_cpu_start, 394 apic_post_cpu_start, 395 apic_shutdown, 396 apic_get_ipivect, 397 apic_send_ipi, 398 399 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 400 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 401 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 402 (void (*)(int, char *))NULL, /* psm_notify_error */ 403 (void (*)(int))NULL, /* psm_notify_func */ 404 apic_timer_reprogram, 405 apic_timer_enable, 406 apic_timer_disable, 407 apic_post_cyclic_setup, 408 apic_preshutdown, 409 apic_intr_ops /* Advanced DDI Interrupt framework */ 410 }; 411 412 413 static struct psm_info apic_psm_info = { 414 PSM_INFO_VER01_5, /* version */ 415 PSM_OWN_EXCLUSIVE, /* ownership */ 416 (struct psm_ops *)&apic_ops, /* operation */ 417 "pcplusmp", /* machine name */ 418 "pcplusmp v1.4 compatible %I%", 419 }; 420 421 static void *apic_hdlp; 422 423 #ifdef DEBUG 424 #define DENT 0x0001 425 int apic_debug = 0; 426 /* 427 * set apic_restrict_vector to the # of vectors we want to allow per range 428 * useful in testing shared interrupt logic by setting it to 2 or 3 429 */ 430 int apic_restrict_vector = 0; 431 432 #define APIC_DEBUG_MSGBUFSIZE 2048 433 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 434 int apic_debug_msgbufindex = 0; 435 436 /* 437 * Put "int" info into debug buffer. No MP consistency, but light weight. 438 * Good enough for most debugging. 439 */ 440 #define APIC_DEBUG_BUF_PUT(x) \ 441 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 442 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 443 apic_debug_msgbufindex = 0; 444 445 #endif /* DEBUG */ 446 447 apic_cpus_info_t *apic_cpus; 448 449 static uint_t apic_cpumask = 0; 450 static uint_t apic_flag; 451 452 /* Flag to indicate that we need to shut down all processors */ 453 static uint_t apic_shutdown_processors; 454 455 uint_t apic_nsec_per_intr = 0; 456 457 /* 458 * apic_let_idle_redistribute can have the following values: 459 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 460 * apic_redistribute_lock prevents multiple idle cpus from redistributing 461 */ 462 int apic_num_idle_redistributions = 0; 463 static int apic_let_idle_redistribute = 0; 464 static uint_t apic_nticks = 0; 465 static uint_t apic_skipped_redistribute = 0; 466 467 /* to gather intr data and redistribute */ 468 static void apic_redistribute_compute(void); 469 470 static uint_t last_count_read = 0; 471 static lock_t apic_gethrtime_lock; 472 volatile int apic_hrtime_stamp = 0; 473 volatile hrtime_t apic_nsec_since_boot = 0; 474 static uint_t apic_hertz_count, apic_nsec_per_tick; 475 static hrtime_t apic_nsec_max; 476 477 static hrtime_t apic_last_hrtime = 0; 478 int apic_hrtime_error = 0; 479 int apic_remote_hrterr = 0; 480 int apic_num_nmis = 0; 481 int apic_apic_error = 0; 482 int apic_num_apic_errors = 0; 483 int apic_num_cksum_errors = 0; 484 485 static uchar_t apic_io_id[MAX_IO_APIC]; 486 static uchar_t apic_io_ver[MAX_IO_APIC]; 487 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 488 static uchar_t apic_io_vectend[MAX_IO_APIC]; 489 volatile int32_t *apicioadr[MAX_IO_APIC]; 490 491 /* 492 * First available slot to be used as IRQ index into the apic_irq_table 493 * for those interrupts (like MSI/X) that don't have a physical IRQ. 494 */ 495 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 496 497 /* 498 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 499 * and bound elements of cpus_info and the temp_cpu element of irq_struct 500 */ 501 lock_t apic_ioapic_lock; 502 503 /* 504 * apic_ioapic_reprogram_lock prevents a CPU from exiting 505 * apic_intr_exit before IOAPIC reprogramming information 506 * is collected. 507 */ 508 static lock_t apic_ioapic_reprogram_lock; 509 static int apic_io_max = 0; /* no. of i/o apics enabled */ 510 511 static struct apic_io_intr *apic_io_intrp = 0; 512 static struct apic_bus *apic_busp; 513 514 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 515 static uchar_t apic_resv_vector[MAXIPL+1]; 516 517 static char apic_level_intr[APIC_MAX_VECTOR+1]; 518 static int apic_error = 0; 519 /* values which apic_error can take. Not catastrophic, but may help debug */ 520 #define APIC_ERR_BOOT_EOI 0x1 521 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 522 #define APIC_ERR_INVALID_INDEX 0x4 523 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 524 #define APIC_ERR_APIC_ERROR 0x40000000 525 #define APIC_ERR_NMI 0x80000000 526 527 static int apic_cmos_ssb_set = 0; 528 529 static uint32_t eisa_level_intr_mask = 0; 530 /* At least MSB will be set if EISA bus */ 531 532 static int apic_pci_bus_total = 0; 533 static uchar_t apic_single_pci_busid = 0; 534 535 536 /* 537 * airq_mutex protects additions to the apic_irq_table - the first 538 * pointer and any airq_nexts off of that one. It also protects 539 * apic_max_device_irq & apic_min_device_irq. It also guarantees 540 * that share_id is unique as new ids are generated only when new 541 * irq_t structs are linked in. Once linked in the structs are never 542 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 543 * or allocated. Note that there is a slight gap between allocating in 544 * apic_introp_xlate and programming in addspl. 545 */ 546 kmutex_t airq_mutex; 547 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 548 int apic_max_device_irq = 0; 549 int apic_min_device_irq = APIC_MAX_VECTOR; 550 551 /* use to make sure only one cpu handles the nmi */ 552 static lock_t apic_nmi_lock; 553 /* use to make sure only one cpu handles the error interrupt */ 554 static lock_t apic_error_lock; 555 556 /* 557 * Following declarations are for revectoring; used when ISRs at different 558 * IPLs share an irq. 559 */ 560 static lock_t apic_revector_lock; 561 static int apic_revector_pending = 0; 562 static uchar_t *apic_oldvec_to_newvec; 563 static uchar_t *apic_newvec_to_oldvec; 564 565 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */ 566 static kmutex_t apic_reprogram_timeout_mutex; 567 568 static struct ioapic_reprogram_data { 569 int valid; /* This entry is valid */ 570 int bindcpu; /* The CPU to which the int will be bound */ 571 unsigned timeouts; /* # times the reprogram timeout was called */ 572 } apic_reprogram_info[APIC_MAX_VECTOR+1]; 573 /* 574 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 575 * is indexed by IRQ number, NOT by vector number. 576 */ 577 578 579 /* 580 * The following added to identify a software poweroff method if available. 581 */ 582 583 static struct { 584 int poweroff_method; 585 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 586 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 587 } apic_mps_ids[] = { 588 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 589 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 590 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 591 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 592 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 593 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 594 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 595 }; 596 597 int apic_poweroff_method = APIC_POWEROFF_NONE; 598 599 static struct { 600 uchar_t cntl; 601 uchar_t data; 602 } aspen_bmc[] = { 603 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 604 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 605 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 606 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 607 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 608 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 609 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 610 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 611 612 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 613 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 614 }; 615 616 static struct { 617 int port; 618 uchar_t data; 619 } sitka_bmc[] = { 620 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 621 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 622 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 623 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 624 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 625 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 626 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 627 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 628 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 629 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 630 631 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 632 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 633 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 634 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 635 }; 636 637 638 /* Patchable global variables. */ 639 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 640 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 641 642 /* 643 * ACPI definitions 644 */ 645 /* _PIC method arguments */ 646 #define ACPI_PIC_MODE 0 647 #define ACPI_APIC_MODE 1 648 649 /* APIC error flags we care about */ 650 #define APIC_SEND_CS_ERROR 0x01 651 #define APIC_RECV_CS_ERROR 0x02 652 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 653 654 /* 655 * ACPI variables 656 */ 657 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 658 static int apic_enable_acpi = 0; 659 660 /* ACPI Multiple APIC Description Table ptr */ 661 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 662 663 /* ACPI Interrupt Source Override Structure ptr */ 664 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 665 static int acpi_iso_cnt = 0; 666 667 /* ACPI Non-maskable Interrupt Sources ptr */ 668 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 669 static int acpi_nmi_scnt = 0; 670 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 671 static int acpi_nmi_ccnt = 0; 672 673 /* 674 * extern declarations 675 */ 676 extern int intr_clear(void); 677 extern void intr_restore(uint_t); 678 #if defined(__amd64) 679 extern int intpri_use_cr8; 680 #endif /* __amd64 */ 681 682 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 683 int, int, int); 684 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 685 686 /* 687 * This is the loadable module wrapper 688 */ 689 690 int 691 _init(void) 692 { 693 if (apic_coarse_hrtime) 694 apic_ops.psm_gethrtime = &apic_gettime; 695 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 696 } 697 698 int 699 _fini(void) 700 { 701 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 702 } 703 704 int 705 _info(struct modinfo *modinfop) 706 { 707 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 708 } 709 710 /* 711 * Auto-configuration routines 712 */ 713 714 /* 715 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 716 * May work with 1.1 - but not guaranteed. 717 * According to the MP Spec, the MP floating pointer structure 718 * will be searched in the order described below: 719 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 720 * 2. Within the last kilobyte of system base memory 721 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 722 * Once we find the right signature with proper checksum, we call 723 * either handle_defconf or parse_mpct to get all info necessary for 724 * subsequent operations. 725 */ 726 static int 727 apic_probe() 728 { 729 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 730 caddr_t biosdatap; 731 caddr_t mpct; 732 caddr_t fptr; 733 int i, mpct_size, mapsize, retval = PSM_FAILURE; 734 ushort_t ebda_seg, base_mem_size; 735 struct apic_mpfps_hdr *fpsp; 736 struct apic_mp_cnf_hdr *hdrp; 737 int bypass_cpu_and_ioapics_in_mptables; 738 int acpi_user_options; 739 740 if (apic_forceload < 0) 741 return (retval); 742 743 /* Allow override for MADT-only mode */ 744 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 745 "acpi-user-options", 0); 746 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 747 748 /* Allow apic_use_acpi to override MADT-only mode */ 749 if (!apic_use_acpi) 750 apic_use_acpi_madt_only = 0; 751 752 retval = acpi_probe(); 753 754 /* 755 * mapin the bios data area 40:0 756 * 40:13h - two-byte location reports the base memory size 757 * 40:0Eh - two-byte location for the exact starting address of 758 * the EBDA segment for EISA 759 */ 760 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 761 if (!biosdatap) 762 return (retval); 763 fpsp = (struct apic_mpfps_hdr *)NULL; 764 mapsize = MPFPS_RAM_WIN_LEN; 765 /*LINTED: pointer cast may result in improper alignment */ 766 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 767 /* check the 1k of EBDA */ 768 if (ebda_seg) { 769 ebda_start = ((uint32_t)ebda_seg) << 4; 770 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 771 if (fptr) { 772 if (!(fpsp = 773 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 774 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 775 } 776 } 777 /* If not in EBDA, check the last k of system base memory */ 778 if (!fpsp) { 779 /*LINTED: pointer cast may result in improper alignment */ 780 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 781 782 if (base_mem_size > 512) 783 base_mem_end = 639 * 1024; 784 else 785 base_mem_end = 511 * 1024; 786 /* if ebda == last k of base mem, skip to check BIOS ROM */ 787 if (base_mem_end != ebda_start) { 788 789 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 790 PROT_READ); 791 792 if (fptr) { 793 if (!(fpsp = apic_find_fps_sig(fptr, 794 MPFPS_RAM_WIN_LEN))) 795 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 796 } 797 } 798 } 799 psm_unmap_phys(biosdatap, 0x20); 800 801 /* If still cannot find it, check the BIOS ROM space */ 802 if (!fpsp) { 803 mapsize = MPFPS_ROM_WIN_LEN; 804 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 805 MPFPS_ROM_WIN_LEN, PROT_READ); 806 if (fptr) { 807 if (!(fpsp = 808 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 809 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 810 return (retval); 811 } 812 } 813 } 814 815 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 816 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 817 return (retval); 818 } 819 820 apic_spec_rev = fpsp->mpfps_spec_rev; 821 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 822 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 823 return (retval); 824 } 825 826 /* check IMCR is present or not */ 827 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 828 829 /* check default configuration (dual CPUs) */ 830 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 831 psm_unmap_phys(fptr, mapsize); 832 return (apic_handle_defconf()); 833 } 834 835 /* MP Configuration Table */ 836 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 837 838 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 839 840 /* 841 * Map in enough memory for the MP Configuration Table Header. 842 * Use this table to read the total length of the BIOS data and 843 * map in all the info 844 */ 845 /*LINTED: pointer cast may result in improper alignment */ 846 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 847 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 848 if (!hdrp) 849 return (retval); 850 851 /* check mp configuration table signature PCMP */ 852 if (hdrp->mpcnf_sig != 0x504d4350) { 853 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 854 return (retval); 855 } 856 mpct_size = (int)hdrp->mpcnf_tbl_length; 857 858 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 859 860 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 861 862 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 863 /* This is an ACPI machine No need for further checks */ 864 return (retval); 865 } 866 867 /* 868 * Map in the entries for this machine, ie. Processor 869 * Entry Tables, Bus Entry Tables, etc. 870 * They are in fixed order following one another 871 */ 872 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 873 if (!mpct) 874 return (retval); 875 876 if (apic_checksum(mpct, mpct_size) != 0) 877 goto apic_fail1; 878 879 880 /*LINTED: pointer cast may result in improper alignment */ 881 hdrp = (struct apic_mp_cnf_hdr *)mpct; 882 /*LINTED: pointer cast may result in improper alignment */ 883 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 884 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 885 if (!apicadr) 886 goto apic_fail1; 887 888 /* Parse all information in the tables */ 889 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 890 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 891 PSM_SUCCESS) 892 return (PSM_SUCCESS); 893 894 for (i = 0; i < apic_io_max; i++) 895 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 896 if (apic_cpus) 897 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 898 if (apicadr) 899 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 900 apic_fail1: 901 psm_unmap_phys(mpct, mpct_size); 902 return (retval); 903 } 904 905 static void 906 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 907 { 908 int i; 909 910 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 911 i++) { 912 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 913 strlen(apic_mps_ids[i].oem_id)) == 0) && 914 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 915 strlen(apic_mps_ids[i].prod_id)) == 0)) { 916 917 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 918 break; 919 } 920 } 921 922 if (apic_debug_mps_id != 0) { 923 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 924 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 925 hdrp->mpcnf_oem_str[0], 926 hdrp->mpcnf_oem_str[1], 927 hdrp->mpcnf_oem_str[2], 928 hdrp->mpcnf_oem_str[3], 929 hdrp->mpcnf_oem_str[4], 930 hdrp->mpcnf_oem_str[5], 931 hdrp->mpcnf_oem_str[6], 932 hdrp->mpcnf_oem_str[7], 933 hdrp->mpcnf_prod_str[0], 934 hdrp->mpcnf_prod_str[1], 935 hdrp->mpcnf_prod_str[2], 936 hdrp->mpcnf_prod_str[3], 937 hdrp->mpcnf_prod_str[4], 938 hdrp->mpcnf_prod_str[5], 939 hdrp->mpcnf_prod_str[6], 940 hdrp->mpcnf_prod_str[7], 941 hdrp->mpcnf_prod_str[8], 942 hdrp->mpcnf_prod_str[9], 943 hdrp->mpcnf_prod_str[10], 944 hdrp->mpcnf_prod_str[11]); 945 } 946 } 947 948 static int 949 acpi_probe(void) 950 { 951 int i, id, intmax, ver, index, rv; 952 int acpi_verboseflags = 0; 953 int madt_seen, madt_size; 954 APIC_HEADER *ap; 955 MADT_PROCESSOR_APIC *mpa; 956 MADT_IO_APIC *mia; 957 MADT_IO_SAPIC *misa; 958 MADT_INTERRUPT_OVERRIDE *mio; 959 MADT_NMI_SOURCE *mns; 960 MADT_INTERRUPT_SOURCE *mis; 961 MADT_LOCAL_APIC_NMI *mlan; 962 MADT_ADDRESS_OVERRIDE *mao; 963 ACPI_OBJECT_LIST arglist; 964 ACPI_OBJECT arg; 965 int sci; 966 iflag_t sci_flags; 967 volatile int32_t *ioapic; 968 char local_ids[NCPU]; 969 char proc_ids[NCPU]; 970 uchar_t hid; 971 972 if (!apic_use_acpi) 973 return (PSM_FAILURE); 974 975 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 976 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 977 return (PSM_FAILURE); 978 979 apicadr = (uint32_t *)psm_map_phys( 980 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 981 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 982 if (!apicadr) 983 return (PSM_FAILURE); 984 985 id = apicadr[APIC_LID_REG]; 986 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 987 apic_nproc = index = 1; 988 apic_io_max = 0; 989 990 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 991 madt_size = acpi_mapic_dtp->Length; 992 madt_seen = sizeof (*acpi_mapic_dtp); 993 994 while (madt_seen < madt_size) { 995 switch (ap->Type) { 996 case APIC_PROCESSOR: 997 mpa = (MADT_PROCESSOR_APIC *) ap; 998 if (mpa->ProcessorEnabled) { 999 if (mpa->LocalApicId == local_ids[0]) 1000 proc_ids[0] = mpa->ProcessorId; 1001 else if (apic_nproc < NCPU) { 1002 local_ids[index] = mpa->LocalApicId; 1003 proc_ids[index] = mpa->ProcessorId; 1004 index++; 1005 apic_nproc++; 1006 } else 1007 cmn_err(CE_WARN, "pcplusmp: exceeded " 1008 "maximum no. of CPUs (= %d)", NCPU); 1009 } 1010 break; 1011 1012 case APIC_IO: 1013 mia = (MADT_IO_APIC *) ap; 1014 if (apic_io_max < MAX_IO_APIC) { 1015 apic_io_id[apic_io_max] = mia->IoApicId; 1016 apic_io_vectbase[apic_io_max] = 1017 mia->Interrupt; 1018 ioapic = apicioadr[apic_io_max] = 1019 (int32_t *)psm_map_phys( 1020 (uint32_t)mia->Address, 1021 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1022 if (!ioapic) 1023 goto cleanup; 1024 apic_io_max++; 1025 } 1026 break; 1027 1028 case APIC_XRUPT_OVERRIDE: 1029 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1030 if (acpi_isop == NULL) 1031 acpi_isop = mio; 1032 acpi_iso_cnt++; 1033 break; 1034 1035 case APIC_NMI: 1036 /* UNIMPLEMENTED */ 1037 mns = (MADT_NMI_SOURCE *) ap; 1038 if (acpi_nmi_sp == NULL) 1039 acpi_nmi_sp = mns; 1040 acpi_nmi_scnt++; 1041 1042 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1043 mns->Interrupt, mns->Polarity, 1044 mns->TriggerMode); 1045 break; 1046 1047 case APIC_LOCAL_NMI: 1048 /* UNIMPLEMENTED */ 1049 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1050 if (acpi_nmi_cp == NULL) 1051 acpi_nmi_cp = mlan; 1052 acpi_nmi_ccnt++; 1053 1054 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1055 mlan->ProcessorId, mlan->Polarity, 1056 mlan->TriggerMode, mlan->Lint); 1057 break; 1058 1059 case APIC_ADDRESS_OVERRIDE: 1060 /* UNIMPLEMENTED */ 1061 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1062 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1063 (long)mao->Address); 1064 break; 1065 1066 case APIC_IO_SAPIC: 1067 /* UNIMPLEMENTED */ 1068 misa = (MADT_IO_SAPIC *) ap; 1069 1070 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1071 misa->IoSapicId, misa->InterruptBase, 1072 (long)misa->Address); 1073 break; 1074 1075 case APIC_XRUPT_SOURCE: 1076 /* UNIMPLEMENTED */ 1077 mis = (MADT_INTERRUPT_SOURCE *) ap; 1078 1079 cmn_err(CE_NOTE, 1080 "!apic: irq source: %d %d %d %d %d %d %d\n", 1081 mis->ProcessorId, mis->ProcessorEid, 1082 mis->Interrupt, mis->Polarity, 1083 mis->TriggerMode, mis->InterruptType, 1084 mis->IoSapicVector); 1085 break; 1086 case APIC_RESERVED: 1087 default: 1088 goto cleanup; 1089 } 1090 1091 /* advance to next entry */ 1092 madt_seen += ap->Length; 1093 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1094 } 1095 1096 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1097 KM_NOSLEEP)) == NULL) 1098 goto cleanup; 1099 1100 apic_cpumask = (1 << apic_nproc) - 1; 1101 1102 /* 1103 * ACPI doesn't provide the local apic ver, get it directly from the 1104 * local apic 1105 */ 1106 ver = apicadr[APIC_VERS_REG]; 1107 for (i = 0; i < apic_nproc; i++) { 1108 apic_cpus[i].aci_local_id = local_ids[i]; 1109 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1110 } 1111 for (i = 0; i < apic_io_max; i++) { 1112 ioapic = apicioadr[i]; 1113 1114 /* 1115 * need to check Sitka on the following acpi problem 1116 * On the Sitka, the ioapic's apic_id field isn't reporting 1117 * the actual io apic id. We have reported this problem 1118 * to Intel. Until they fix the problem, we will get the 1119 * actual id directly from the ioapic. 1120 */ 1121 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1122 id = ioapic[APIC_IO_DATA]; 1123 hid = (uchar_t)(((uint_t)id) >> 24); 1124 1125 if (hid != apic_io_id[i]) { 1126 if (apic_io_id[i] == 0) 1127 apic_io_id[i] = hid; 1128 else { /* set ioapic id to whatever reported by ACPI */ 1129 id = ((int32_t)apic_io_id[i]) << 24; 1130 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1131 ioapic[APIC_IO_DATA] = id; 1132 } 1133 } 1134 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1135 ver = ioapic[APIC_IO_DATA]; 1136 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1137 intmax = (ver >> 16) & 0xff; 1138 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1139 if (apic_first_avail_irq <= apic_io_vectend[i]) 1140 apic_first_avail_irq = apic_io_vectend[i] + 1; 1141 } 1142 1143 1144 /* 1145 * Process SCI configuration here 1146 * An error may be returned here if 1147 * acpi-user-options specifies legacy mode 1148 * (no SCI, no ACPI mode) 1149 */ 1150 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1151 sci = -1; 1152 1153 /* 1154 * Now call acpi_init() to generate namespaces 1155 * If this fails, we don't attempt to use ACPI 1156 * even if we were able to get a MADT above 1157 */ 1158 if (acpica_init() != AE_OK) 1159 goto cleanup; 1160 1161 /* 1162 * Squirrel away the SCI and flags for later on 1163 * in apic_picinit() when we're ready 1164 */ 1165 apic_sci_vect = sci; 1166 apic_sci_flags = sci_flags; 1167 1168 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1169 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1170 1171 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1172 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1173 1174 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1175 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1176 1177 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1178 ACPI_PSM_FAILURE) 1179 goto cleanup; 1180 1181 /* Enable ACPI APIC interrupt routing */ 1182 arglist.Count = 1; 1183 arglist.Pointer = &arg; 1184 arg.Type = ACPI_TYPE_INTEGER; 1185 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1186 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1187 if (rv == AE_OK) { 1188 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1189 apic_enable_acpi = 1; 1190 if (apic_use_acpi_madt_only) { 1191 cmn_err(CE_CONT, 1192 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1193 } 1194 return (PSM_SUCCESS); 1195 } 1196 /* if setting APIC mode failed above, we fall through to cleanup */ 1197 1198 cleanup: 1199 if (apicadr != NULL) { 1200 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1201 apicadr = NULL; 1202 } 1203 apic_nproc = 0; 1204 for (i = 0; i < apic_io_max; i++) { 1205 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1206 apicioadr[i] = NULL; 1207 } 1208 apic_io_max = 0; 1209 acpi_isop = NULL; 1210 acpi_iso_cnt = 0; 1211 acpi_nmi_sp = NULL; 1212 acpi_nmi_scnt = 0; 1213 acpi_nmi_cp = NULL; 1214 acpi_nmi_ccnt = 0; 1215 return (PSM_FAILURE); 1216 } 1217 1218 /* 1219 * Handle default configuration. Fill in reqd global variables & tables 1220 * Fill all details as MP table does not give any more info 1221 */ 1222 static int 1223 apic_handle_defconf() 1224 { 1225 uint_t lid; 1226 1227 /*LINTED: pointer cast may result in improper alignment */ 1228 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1229 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1230 /*LINTED: pointer cast may result in improper alignment */ 1231 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1232 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1233 apic_cpus = (apic_cpus_info_t *) 1234 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1235 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1236 goto apic_handle_defconf_fail; 1237 apic_cpumask = 3; 1238 apic_nproc = 2; 1239 lid = apicadr[APIC_LID_REG]; 1240 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1241 /* 1242 * According to the PC+MP spec 1.1, the local ids 1243 * for the default configuration has to be 0 or 1 1244 */ 1245 if (apic_cpus[0].aci_local_id == 1) 1246 apic_cpus[1].aci_local_id = 0; 1247 else if (apic_cpus[0].aci_local_id == 0) 1248 apic_cpus[1].aci_local_id = 1; 1249 else 1250 goto apic_handle_defconf_fail; 1251 1252 apic_io_id[0] = 2; 1253 apic_io_max = 1; 1254 if (apic_defconf >= 5) { 1255 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1256 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1257 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1258 } else { 1259 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1260 apic_cpus[1].aci_local_ver = 0; 1261 apic_io_ver[0] = 0; 1262 } 1263 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1264 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1265 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1266 return (PSM_SUCCESS); 1267 1268 apic_handle_defconf_fail: 1269 if (apic_cpus) 1270 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1271 if (apicadr) 1272 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1273 if (apicioadr[0]) 1274 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1275 return (PSM_FAILURE); 1276 } 1277 1278 /* Parse the entries in MP configuration table and collect info that we need */ 1279 static int 1280 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1281 { 1282 struct apic_procent *procp; 1283 struct apic_bus *busp; 1284 struct apic_io_entry *ioapicp; 1285 struct apic_io_intr *intrp; 1286 volatile int32_t *ioapic; 1287 uint_t lid; 1288 int id; 1289 uchar_t hid; 1290 1291 /*LINTED: pointer cast may result in improper alignment */ 1292 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1293 1294 /* No need to count cpu entries if we won't use them */ 1295 if (!bypass_cpus_and_ioapics) { 1296 1297 /* Find max # of CPUS and allocate structure accordingly */ 1298 apic_nproc = 0; 1299 while (procp->proc_entry == APIC_CPU_ENTRY) { 1300 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1301 apic_nproc++; 1302 } 1303 procp++; 1304 } 1305 if (apic_nproc > NCPU) 1306 cmn_err(CE_WARN, "pcplusmp: exceeded " 1307 "maximum no. of CPUs (= %d)", NCPU); 1308 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1309 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1310 return (PSM_FAILURE); 1311 } 1312 1313 /*LINTED: pointer cast may result in improper alignment */ 1314 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1315 1316 /* 1317 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1318 * if we're bypassing this information, it has already been filled 1319 * in by acpi_probe(), so don't overwrite it. 1320 */ 1321 if (!bypass_cpus_and_ioapics) 1322 apic_nproc = 1; 1323 1324 while (procp->proc_entry == APIC_CPU_ENTRY) { 1325 /* check whether the cpu exists or not */ 1326 if (!bypass_cpus_and_ioapics && 1327 procp->proc_cpuflags & CPUFLAGS_EN) { 1328 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1329 lid = apicadr[APIC_LID_REG]; 1330 apic_cpus[0].aci_local_id = procp->proc_apicid; 1331 if (apic_cpus[0].aci_local_id != 1332 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1333 return (PSM_FAILURE); 1334 } 1335 apic_cpus[0].aci_local_ver = 1336 procp->proc_version; 1337 } else { 1338 1339 apic_cpus[apic_nproc].aci_local_id = 1340 procp->proc_apicid; 1341 apic_cpus[apic_nproc].aci_local_ver = 1342 procp->proc_version; 1343 apic_nproc++; 1344 1345 } 1346 } 1347 procp++; 1348 } 1349 1350 if (!bypass_cpus_and_ioapics) { 1351 /* convert the number of processors into a cpumask */ 1352 apic_cpumask = (1 << apic_nproc) - 1; 1353 } 1354 1355 /* 1356 * Save start of bus entries for later use. 1357 * Get EISA level cntrl if EISA bus is present. 1358 * Also get the CPI bus id for single CPI bus case 1359 */ 1360 apic_busp = busp = (struct apic_bus *)procp; 1361 while (busp->bus_entry == APIC_BUS_ENTRY) { 1362 lid = apic_find_bus_type((char *)&busp->bus_str1); 1363 if (lid == BUS_EISA) { 1364 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1365 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1366 } else if (lid == BUS_PCI) { 1367 /* 1368 * apic_single_pci_busid will be used only if 1369 * apic_pic_bus_total is equal to 1 1370 */ 1371 apic_pci_bus_total++; 1372 apic_single_pci_busid = busp->bus_id; 1373 } 1374 busp++; 1375 } 1376 1377 ioapicp = (struct apic_io_entry *)busp; 1378 1379 if (!bypass_cpus_and_ioapics) 1380 apic_io_max = 0; 1381 do { 1382 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1383 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1384 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1385 apic_io_ver[apic_io_max] = ioapicp->io_version; 1386 /*LINTED: pointer cast may result in improper alignment */ 1387 apicioadr[apic_io_max] = 1388 (int32_t *)psm_map_phys( 1389 (uint32_t)ioapicp->io_apic_addr, 1390 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1391 1392 if (!apicioadr[apic_io_max]) 1393 return (PSM_FAILURE); 1394 1395 ioapic = apicioadr[apic_io_max]; 1396 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1397 id = ioapic[APIC_IO_DATA]; 1398 hid = (uchar_t)(((uint_t)id) >> 24); 1399 1400 if (hid != apic_io_id[apic_io_max]) { 1401 if (apic_io_id[apic_io_max] == 0) 1402 apic_io_id[apic_io_max] = hid; 1403 else { 1404 /* 1405 * set ioapic id to whatever 1406 * reported by MPS 1407 * 1408 * may not need to set index 1409 * again ??? 1410 * take it out and try 1411 */ 1412 1413 id = ((int32_t) 1414 apic_io_id[apic_io_max]) << 1415 24; 1416 1417 ioapic[APIC_IO_REG] = 1418 APIC_ID_CMD; 1419 1420 ioapic[APIC_IO_DATA] = id; 1421 1422 } 1423 } 1424 apic_io_max++; 1425 } 1426 } 1427 ioapicp++; 1428 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1429 1430 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1431 1432 intrp = apic_io_intrp; 1433 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1434 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1435 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1436 apic_irq_translate = 1; 1437 break; 1438 } 1439 intrp++; 1440 } 1441 1442 return (PSM_SUCCESS); 1443 } 1444 1445 boolean_t 1446 apic_cpu_in_range(int cpu) 1447 { 1448 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1449 } 1450 1451 static struct apic_mpfps_hdr * 1452 apic_find_fps_sig(caddr_t cptr, int len) 1453 { 1454 int i; 1455 1456 /* Look for the pattern "_MP_" */ 1457 for (i = 0; i < len; i += 16) { 1458 if ((*(cptr+i) == '_') && 1459 (*(cptr+i+1) == 'M') && 1460 (*(cptr+i+2) == 'P') && 1461 (*(cptr+i+3) == '_')) 1462 /*LINTED: pointer cast may result in improper alignment */ 1463 return ((struct apic_mpfps_hdr *)(cptr + i)); 1464 } 1465 return (NULL); 1466 } 1467 1468 static int 1469 apic_checksum(caddr_t bptr, int len) 1470 { 1471 int i; 1472 uchar_t cksum; 1473 1474 cksum = 0; 1475 for (i = 0; i < len; i++) 1476 cksum += *bptr++; 1477 return ((int)cksum); 1478 } 1479 1480 1481 /* 1482 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1483 * are also set to NULL. vector->irq is set to a value which cannot map 1484 * to a real irq to show that it is free. 1485 */ 1486 void 1487 apic_init() 1488 { 1489 int i; 1490 int *iptr; 1491 1492 int j = 1; 1493 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1494 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1495 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1496 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1497 /* get to highest vector at the same ipl */ 1498 continue; 1499 for (; j <= apic_vectortoipl[i]; j++) { 1500 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1501 APIC_BASE_VECT; 1502 } 1503 } 1504 for (; j < MAXIPL + 1; j++) 1505 /* fill up any empty ipltopri slots */ 1506 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1507 1508 /* cpu 0 is always up */ 1509 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1510 1511 iptr = (int *)&apic_irq_table[0]; 1512 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1513 apic_level_intr[i] = 0; 1514 *iptr++ = NULL; 1515 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1516 apic_reprogram_info[i].valid = 0; 1517 apic_reprogram_info[i].bindcpu = 0; 1518 apic_reprogram_info[i].timeouts = 0; 1519 } 1520 1521 /* 1522 * Allocate a dummy irq table entry for the reserved entry. 1523 * This takes care of the race between removing an irq and 1524 * clock detecting a CPU in that irq during interrupt load 1525 * sampling. 1526 */ 1527 apic_irq_table[APIC_RESV_IRQ] = 1528 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1529 1530 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1531 mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL); 1532 #if defined(__amd64) 1533 /* 1534 * Make cpu-specific interrupt info point to cr8pri vector 1535 */ 1536 for (i = 0; i <= MAXIPL; i++) 1537 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1538 CPU->cpu_pri_data = apic_cr8pri; 1539 intpri_use_cr8 = 1; 1540 #endif /* __amd64 */ 1541 } 1542 1543 /* 1544 * handler for APIC Error interrupt. Just print a warning and continue 1545 */ 1546 static int 1547 apic_error_intr() 1548 { 1549 uint_t error0, error1, error; 1550 uint_t i; 1551 1552 /* 1553 * We need to write before read as per 7.4.17 of system prog manual. 1554 * We do both and or the results to be safe 1555 */ 1556 error0 = apicadr[APIC_ERROR_STATUS]; 1557 apicadr[APIC_ERROR_STATUS] = 0; 1558 error1 = apicadr[APIC_ERROR_STATUS]; 1559 error = error0 | error1; 1560 1561 /* 1562 * Clear the APIC error status (do this on all cpus that enter here) 1563 * (two writes are required due to the semantics of accessing the 1564 * error status register.) 1565 */ 1566 apicadr[APIC_ERROR_STATUS] = 0; 1567 apicadr[APIC_ERROR_STATUS] = 0; 1568 1569 /* 1570 * Prevent more than 1 CPU from handling error interrupt causing 1571 * double printing (interleave of characters from multiple 1572 * CPU's when using prom_printf) 1573 */ 1574 if (lock_try(&apic_error_lock) == 0) 1575 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1576 if (error) { 1577 #if DEBUG 1578 if (apic_debug) 1579 debug_enter("pcplusmp: APIC Error interrupt received"); 1580 #endif /* DEBUG */ 1581 if (apic_panic_on_apic_error) 1582 cmn_err(CE_PANIC, 1583 "APIC Error interrupt on CPU %d. Status = %x\n", 1584 psm_get_cpu_id(), error); 1585 else { 1586 if ((error & ~APIC_CS_ERRORS) == 0) { 1587 /* cksum error only */ 1588 apic_error |= APIC_ERR_APIC_ERROR; 1589 apic_apic_error |= error; 1590 apic_num_apic_errors++; 1591 apic_num_cksum_errors++; 1592 } else { 1593 /* 1594 * prom_printf is the best shot we have of 1595 * something which is problem free from 1596 * high level/NMI type of interrupts 1597 */ 1598 prom_printf("APIC Error interrupt on CPU %d. " 1599 "Status 0 = %x, Status 1 = %x\n", 1600 psm_get_cpu_id(), error0, error1); 1601 apic_error |= APIC_ERR_APIC_ERROR; 1602 apic_apic_error |= error; 1603 apic_num_apic_errors++; 1604 for (i = 0; i < apic_error_display_delay; i++) { 1605 tenmicrosec(); 1606 } 1607 /* 1608 * provide more delay next time limited to 1609 * roughly 1 clock tick time 1610 */ 1611 if (apic_error_display_delay < 500) 1612 apic_error_display_delay *= 2; 1613 } 1614 } 1615 lock_clear(&apic_error_lock); 1616 return (DDI_INTR_CLAIMED); 1617 } else { 1618 lock_clear(&apic_error_lock); 1619 return (DDI_INTR_UNCLAIMED); 1620 } 1621 /* NOTREACHED */ 1622 } 1623 1624 /* 1625 * Turn off the mask bit in the performance counter Local Vector Table entry. 1626 */ 1627 static void 1628 apic_cpcovf_mask_clear(void) 1629 { 1630 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1631 } 1632 1633 static void 1634 apic_init_intr() 1635 { 1636 processorid_t cpun = psm_get_cpu_id(); 1637 1638 #if defined(__amd64) 1639 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1640 #else 1641 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1642 #endif 1643 1644 if (apic_flat_model) 1645 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1646 else 1647 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1648 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1649 1650 /* need to enable APIC before unmasking NMI */ 1651 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1652 1653 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1654 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1655 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1656 1657 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1658 return; 1659 1660 /* Enable performance counter overflow interrupt */ 1661 1662 if ((x86_feature & X86_MSR) != X86_MSR) 1663 apic_enable_cpcovf_intr = 0; 1664 if (apic_enable_cpcovf_intr) { 1665 if (apic_cpcovf_vect == 0) { 1666 int ipl = APIC_PCINT_IPL; 1667 int irq = apic_get_ipivect(ipl, -1); 1668 1669 ASSERT(irq != -1); 1670 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1671 ASSERT(apic_cpcovf_vect); 1672 (void) add_avintr(NULL, ipl, 1673 (avfunc)kcpc_hw_overflow_intr, 1674 "apic pcint", irq, NULL, NULL, NULL, NULL); 1675 kcpc_hw_overflow_intr_installed = 1; 1676 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1677 } 1678 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1679 } 1680 1681 /* Enable error interrupt */ 1682 1683 if (apic_enable_error_intr) { 1684 if (apic_errvect == 0) { 1685 int ipl = 0xf; /* get highest priority intr */ 1686 int irq = apic_get_ipivect(ipl, -1); 1687 1688 ASSERT(irq != -1); 1689 apic_errvect = apic_irq_table[irq]->airq_vector; 1690 ASSERT(apic_errvect); 1691 /* 1692 * Not PSMI compliant, but we are going to merge 1693 * with ON anyway 1694 */ 1695 (void) add_avintr((void *)NULL, ipl, 1696 (avfunc)apic_error_intr, "apic error intr", 1697 irq, NULL, NULL, NULL, NULL); 1698 } 1699 apicadr[APIC_ERR_VECT] = apic_errvect; 1700 apicadr[APIC_ERROR_STATUS] = 0; 1701 apicadr[APIC_ERROR_STATUS] = 0; 1702 } 1703 } 1704 1705 static void 1706 apic_disable_local_apic() 1707 { 1708 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1709 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1710 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1711 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1712 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1713 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1714 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1715 } 1716 1717 static void 1718 apic_picinit(void) 1719 { 1720 int i, j; 1721 uint_t isr; 1722 volatile int32_t *ioapic; 1723 apic_irq_t *irqptr; 1724 struct intrspec ispec; 1725 1726 /* 1727 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1728 * bit on without clearing it with EOI. Since softint 1729 * uses vector 0x20 to interrupt itself, so softint will 1730 * not work on this machine. In order to fix this problem 1731 * a check is made to verify all the isr bits are clear. 1732 * If not, EOIs are issued to clear the bits. 1733 */ 1734 for (i = 7; i >= 1; i--) { 1735 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1736 for (j = 0; ((j < 32) && (isr != 0)); j++) 1737 if (isr & (1 << j)) { 1738 apicadr[APIC_EOI_REG] = 0; 1739 isr &= ~(1 << j); 1740 apic_error |= APIC_ERR_BOOT_EOI; 1741 } 1742 } 1743 1744 /* set a flag so we know we have run apic_picinit() */ 1745 apic_flag = 1; 1746 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1747 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1748 LOCK_INIT_CLEAR(&apic_revector_lock); 1749 LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock); 1750 LOCK_INIT_CLEAR(&apic_error_lock); 1751 1752 picsetup(); /* initialise the 8259 */ 1753 1754 /* add nmi handler - least priority nmi handler */ 1755 LOCK_INIT_CLEAR(&apic_nmi_lock); 1756 1757 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1758 "pcplusmp NMI handler", (caddr_t)NULL)) 1759 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1760 1761 apic_init_intr(); 1762 1763 /* enable apic mode if imcr present */ 1764 if (apic_imcrp) { 1765 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1766 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1767 } 1768 1769 /* mask interrupt vectors */ 1770 for (j = 0; j < apic_io_max; j++) { 1771 int intin_max; 1772 ioapic = apicioadr[j]; 1773 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1774 /* Bits 23-16 define the maximum redirection entries */ 1775 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1776 for (i = 0; i < intin_max; i++) { 1777 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1778 ioapic[APIC_IO_DATA] = AV_MASK; 1779 } 1780 } 1781 1782 /* 1783 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1784 */ 1785 if (apic_sci_vect > 0) { 1786 /* 1787 * acpica has already done add_avintr(); we just 1788 * to finish the job by mimicing translate_irq() 1789 * 1790 * Fake up an intrspec and setup the tables 1791 */ 1792 ispec.intrspec_vec = apic_sci_vect; 1793 ispec.intrspec_pri = SCI_IPL; 1794 1795 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1796 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1797 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1798 return; 1799 } 1800 irqptr = apic_irq_table[apic_sci_vect]; 1801 1802 /* Program I/O APIC */ 1803 (void) apic_setup_io_intr(irqptr, apic_sci_vect); 1804 1805 irqptr->airq_share++; 1806 } 1807 } 1808 1809 1810 static void 1811 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1812 { 1813 int loop_count; 1814 uint32_t vector; 1815 uint_t cpu_id, iflag; 1816 1817 cpu_id = apic_cpus[cpun].aci_local_id; 1818 1819 apic_cmos_ssb_set = 1; 1820 1821 /* 1822 * Interrupts on BSP cpu will be disabled during these startup 1823 * steps in order to avoid unwanted side effects from 1824 * executing interrupt handlers on a problematic BIOS. 1825 */ 1826 1827 iflag = intr_clear(); 1828 outb(CMOS_ADDR, SSB); 1829 outb(CMOS_DATA, BIOS_SHUTDOWN); 1830 1831 while (get_apic_cmd1() & AV_PENDING) 1832 apic_ret(); 1833 1834 /* for integrated - make sure there is one INIT IPI in buffer */ 1835 /* for external - it will wake up the cpu */ 1836 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1837 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1838 1839 /* If only 1 CPU is installed, PENDING bit will not go low */ 1840 for (loop_count = 0x1000; loop_count; loop_count--) 1841 if (get_apic_cmd1() & AV_PENDING) 1842 apic_ret(); 1843 else 1844 break; 1845 1846 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1847 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1848 1849 drv_usecwait(20000); /* 20 milli sec */ 1850 1851 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1852 /* integrated apic */ 1853 1854 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1855 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1856 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1857 1858 /* to offset the INIT IPI queue up in the buffer */ 1859 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1860 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1861 1862 drv_usecwait(200); /* 20 micro sec */ 1863 1864 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1865 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1866 1867 drv_usecwait(200); /* 20 micro sec */ 1868 } 1869 intr_restore(iflag); 1870 } 1871 1872 1873 #ifdef DEBUG 1874 int apic_break_on_cpu = 9; 1875 int apic_stretch_interrupts = 0; 1876 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1877 1878 void 1879 apic_break() 1880 { 1881 } 1882 #endif /* DEBUG */ 1883 1884 /* 1885 * platform_intr_enter 1886 * 1887 * Called at the beginning of the interrupt service routine to 1888 * mask all level equal to and below the interrupt priority 1889 * of the interrupting vector. An EOI should be given to 1890 * the interrupt controller to enable other HW interrupts. 1891 * 1892 * Return -1 for spurious interrupts 1893 * 1894 */ 1895 /*ARGSUSED*/ 1896 static int 1897 apic_intr_enter(int ipl, int *vectorp) 1898 { 1899 uchar_t vector; 1900 int nipl; 1901 int irq, iflag; 1902 apic_cpus_info_t *cpu_infop; 1903 1904 /* 1905 * The real vector programmed in APIC is *vectorp + 0x20 1906 * But, cmnint code subtracts 0x20 before pushing it. 1907 * Hence APIC_BASE_VECT is 0x20. 1908 */ 1909 1910 vector = (uchar_t)*vectorp; 1911 1912 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1913 if (vector == apic_clkvect) { 1914 if (!apic_oneshot) { 1915 /* NOTE: this is not MT aware */ 1916 apic_hrtime_stamp++; 1917 apic_nsec_since_boot += apic_nsec_per_intr; 1918 apic_hrtime_stamp++; 1919 last_count_read = apic_hertz_count; 1920 apic_redistribute_compute(); 1921 } 1922 1923 /* We will avoid all the book keeping overhead for clock */ 1924 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1925 #if defined(__amd64) 1926 setcr8((ulong_t)apic_cr8pri[nipl]); 1927 #else 1928 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1929 #endif 1930 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1931 apicadr[APIC_EOI_REG] = 0; 1932 return (nipl); 1933 } 1934 1935 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1936 1937 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1938 cpu_infop->aci_spur_cnt++; 1939 return (APIC_INT_SPURIOUS); 1940 } 1941 1942 /* Check if the vector we got is really what we need */ 1943 if (apic_revector_pending) { 1944 /* 1945 * Disable interrupts for the duration of 1946 * the vector translation to prevent a self-race for 1947 * the apic_revector_lock. This cannot be done 1948 * in apic_xlate_vector because it is recursive and 1949 * we want the vector translation to be atomic with 1950 * respect to other (higher-priority) interrupts. 1951 */ 1952 iflag = intr_clear(); 1953 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1954 APIC_BASE_VECT; 1955 intr_restore(iflag); 1956 } 1957 1958 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1959 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1960 1961 #if defined(__amd64) 1962 setcr8((ulong_t)apic_cr8pri[nipl]); 1963 #else 1964 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1965 #endif 1966 1967 cpu_infop->aci_current[nipl] = (uchar_t)irq; 1968 cpu_infop->aci_curipl = (uchar_t)nipl; 1969 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 1970 1971 /* 1972 * apic_level_intr could have been assimilated into the irq struct. 1973 * but, having it as a character array is more efficient in terms of 1974 * cache usage. So, we leave it as is. 1975 */ 1976 if (!apic_level_intr[irq]) 1977 apicadr[APIC_EOI_REG] = 0; 1978 1979 #ifdef DEBUG 1980 APIC_DEBUG_BUF_PUT(vector); 1981 APIC_DEBUG_BUF_PUT(irq); 1982 APIC_DEBUG_BUF_PUT(nipl); 1983 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 1984 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 1985 drv_usecwait(apic_stretch_interrupts); 1986 1987 if (apic_break_on_cpu == psm_get_cpu_id()) 1988 apic_break(); 1989 #endif /* DEBUG */ 1990 return (nipl); 1991 } 1992 1993 static void 1994 apic_intr_exit(int prev_ipl, int irq) 1995 { 1996 apic_cpus_info_t *cpu_infop; 1997 1998 #if defined(__amd64) 1999 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 2000 #else 2001 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 2002 #endif 2003 2004 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 2005 if (apic_level_intr[irq]) 2006 apicadr[APIC_EOI_REG] = 0; 2007 2008 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 2009 /* ISR above current pri could not be in progress */ 2010 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 2011 } 2012 2013 /* 2014 * Mask all interrupts below or equal to the given IPL 2015 */ 2016 static void 2017 apic_setspl(int ipl) 2018 { 2019 2020 #if defined(__amd64) 2021 setcr8((ulong_t)apic_cr8pri[ipl]); 2022 #else 2023 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 2024 #endif 2025 2026 /* interrupts at ipl above this cannot be in progress */ 2027 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2028 /* 2029 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2030 * have enough time to come in before the priority is raised again 2031 * during the idle() loop. 2032 */ 2033 if (apic_setspl_delay) 2034 (void) get_apic_pri(); 2035 } 2036 2037 /* 2038 * trigger a software interrupt at the given IPL 2039 */ 2040 static void 2041 apic_set_softintr(int ipl) 2042 { 2043 int vector; 2044 uint_t flag; 2045 2046 vector = apic_resv_vector[ipl]; 2047 2048 flag = intr_clear(); 2049 2050 while (get_apic_cmd1() & AV_PENDING) 2051 apic_ret(); 2052 2053 /* generate interrupt at vector on itself only */ 2054 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2055 2056 intr_restore(flag); 2057 } 2058 2059 /* 2060 * generates an interprocessor interrupt to another CPU 2061 */ 2062 static void 2063 apic_send_ipi(int cpun, int ipl) 2064 { 2065 int vector; 2066 uint_t flag; 2067 2068 vector = apic_resv_vector[ipl]; 2069 2070 flag = intr_clear(); 2071 2072 while (get_apic_cmd1() & AV_PENDING) 2073 apic_ret(); 2074 2075 apicadr[APIC_INT_CMD2] = 2076 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2077 apicadr[APIC_INT_CMD1] = vector; 2078 2079 intr_restore(flag); 2080 } 2081 2082 2083 /*ARGSUSED*/ 2084 static void 2085 apic_set_idlecpu(processorid_t cpun) 2086 { 2087 } 2088 2089 /*ARGSUSED*/ 2090 static void 2091 apic_unset_idlecpu(processorid_t cpun) 2092 { 2093 } 2094 2095 2096 static void 2097 apic_ret() 2098 { 2099 } 2100 2101 static int 2102 get_apic_cmd1() 2103 { 2104 return (apicadr[APIC_INT_CMD1]); 2105 } 2106 2107 static int 2108 get_apic_pri() 2109 { 2110 #if defined(__amd64) 2111 return ((int)getcr8()); 2112 #else 2113 return (apicadr[APIC_TASK_REG]); 2114 #endif 2115 } 2116 2117 /* 2118 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2119 * apic_gethrtime(). This is used for performance instead of accuracy. 2120 */ 2121 2122 static hrtime_t 2123 apic_gettime() 2124 { 2125 int old_hrtime_stamp; 2126 hrtime_t temp; 2127 2128 /* 2129 * In one-shot mode, we do not keep time, so if anyone 2130 * calls psm_gettime() directly, we vector over to 2131 * gethrtime(). 2132 * one-shot mode MUST NOT be enabled if this psm is the source of 2133 * hrtime. 2134 */ 2135 2136 if (apic_oneshot) 2137 return (gethrtime()); 2138 2139 2140 gettime_again: 2141 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2142 apic_ret(); 2143 2144 temp = apic_nsec_since_boot; 2145 2146 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2147 goto gettime_again; 2148 } 2149 return (temp); 2150 } 2151 2152 /* 2153 * Here we return the number of nanoseconds since booting. Note every 2154 * clock interrupt increments apic_nsec_since_boot by the appropriate 2155 * amount. 2156 */ 2157 static hrtime_t 2158 apic_gethrtime() 2159 { 2160 int curr_timeval, countval, elapsed_ticks, oflags; 2161 int old_hrtime_stamp, status; 2162 hrtime_t temp; 2163 uchar_t cpun; 2164 2165 2166 /* 2167 * In one-shot mode, we do not keep time, so if anyone 2168 * calls psm_gethrtime() directly, we vector over to 2169 * gethrtime(). 2170 * one-shot mode MUST NOT be enabled if this psm is the source of 2171 * hrtime. 2172 */ 2173 2174 if (apic_oneshot) 2175 return (gethrtime()); 2176 2177 oflags = intr_clear(); /* prevent migration */ 2178 2179 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2180 2181 lock_set(&apic_gethrtime_lock); 2182 2183 gethrtime_again: 2184 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2185 apic_ret(); 2186 2187 /* 2188 * Check to see which CPU we are on. Note the time is kept on 2189 * the local APIC of CPU 0. If on CPU 0, simply read the current 2190 * counter. If on another CPU, issue a remote read command to CPU 0. 2191 */ 2192 if (cpun == apic_cpus[0].aci_local_id) { 2193 countval = apicadr[APIC_CURR_COUNT]; 2194 } else { 2195 while (get_apic_cmd1() & AV_PENDING) 2196 apic_ret(); 2197 2198 apicadr[APIC_INT_CMD2] = 2199 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2200 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2201 2202 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2203 apic_ret(); 2204 2205 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2206 countval = apicadr[APIC_REMOTE_READ]; 2207 else { /* 0 = invalid */ 2208 apic_remote_hrterr++; 2209 /* 2210 * return last hrtime right now, will need more 2211 * testing if change to retry 2212 */ 2213 temp = apic_last_hrtime; 2214 2215 lock_clear(&apic_gethrtime_lock); 2216 2217 intr_restore(oflags); 2218 2219 return (temp); 2220 } 2221 } 2222 if (countval > last_count_read) 2223 countval = 0; 2224 else 2225 last_count_read = countval; 2226 2227 elapsed_ticks = apic_hertz_count - countval; 2228 2229 curr_timeval = elapsed_ticks * apic_nsec_per_tick; 2230 temp = apic_nsec_since_boot + curr_timeval; 2231 2232 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2233 /* we might have clobbered last_count_read. Restore it */ 2234 last_count_read = apic_hertz_count; 2235 goto gethrtime_again; 2236 } 2237 2238 if (temp < apic_last_hrtime) { 2239 /* return last hrtime if error occurs */ 2240 apic_hrtime_error++; 2241 temp = apic_last_hrtime; 2242 } 2243 else 2244 apic_last_hrtime = temp; 2245 2246 lock_clear(&apic_gethrtime_lock); 2247 intr_restore(oflags); 2248 2249 return (temp); 2250 } 2251 2252 /* apic NMI handler */ 2253 /*ARGSUSED*/ 2254 static void 2255 apic_nmi_intr(caddr_t arg) 2256 { 2257 if (apic_shutdown_processors) { 2258 apic_disable_local_apic(); 2259 return; 2260 } 2261 2262 if (lock_try(&apic_nmi_lock)) { 2263 if (apic_kmdb_on_nmi) { 2264 if (psm_debugger() == 0) { 2265 cmn_err(CE_PANIC, 2266 "NMI detected, kmdb is not available."); 2267 } else { 2268 debug_enter("\nNMI detected, entering kmdb.\n"); 2269 } 2270 } else { 2271 if (apic_panic_on_nmi) { 2272 /* Keep panic from entering kmdb. */ 2273 nopanicdebug = 1; 2274 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2275 } else { 2276 /* 2277 * prom_printf is the best shot we have 2278 * of something which is problem free from 2279 * high level/NMI type of interrupts 2280 */ 2281 prom_printf("pcplusmp: NMI received\n"); 2282 apic_error |= APIC_ERR_NMI; 2283 apic_num_nmis++; 2284 } 2285 } 2286 lock_clear(&apic_nmi_lock); 2287 } 2288 } 2289 2290 /* 2291 * Add mask bits to disable interrupt vector from happening 2292 * at or above IPL. In addition, it should remove mask bits 2293 * to enable interrupt vectors below the given IPL. 2294 * 2295 * Both add and delspl are complicated by the fact that different interrupts 2296 * may share IRQs. This can happen in two ways. 2297 * 1. The same H/W line is shared by more than 1 device 2298 * 1a. with interrupts at different IPLs 2299 * 1b. with interrupts at same IPL 2300 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2301 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2302 * will get called often when no interrupt is pending for the device. 2303 * For 1a, we just hope that the machine blows up with the person who 2304 * set it up that way!. In the meantime, we handle it at the higher IPL. 2305 */ 2306 /*ARGSUSED*/ 2307 static int 2308 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2309 { 2310 uchar_t vector; 2311 int iflag; 2312 apic_irq_t *irqptr, *irqheadptr; 2313 int irqindex; 2314 2315 ASSERT(max_ipl <= UCHAR_MAX); 2316 irqindex = IRQINDEX(irqno); 2317 2318 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2319 return (PSM_FAILURE); 2320 2321 irqptr = irqheadptr = apic_irq_table[irqindex]; 2322 2323 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2324 "vector=0x%x\n", (void *)irqptr->airq_dip, 2325 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2326 2327 while (irqptr) { 2328 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2329 break; 2330 irqptr = irqptr->airq_next; 2331 } 2332 irqptr->airq_share++; 2333 2334 /* return if it is not hardware interrupt */ 2335 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2336 return (PSM_SUCCESS); 2337 2338 /* Or if there are more interupts at a higher IPL */ 2339 if (ipl != max_ipl) 2340 return (PSM_SUCCESS); 2341 2342 /* 2343 * if apic_picinit() has not been called yet, just return. 2344 * At the end of apic_picinit(), we will call setup_io_intr(). 2345 */ 2346 2347 if (!apic_flag) 2348 return (PSM_SUCCESS); 2349 2350 iflag = intr_clear(); 2351 2352 /* 2353 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2354 * return failure. Not very elegant, but then we hope the 2355 * machine will blow up with ... 2356 */ 2357 if (irqptr->airq_ipl != max_ipl) { 2358 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2359 if (vector == 0) { 2360 intr_restore(iflag); 2361 irqptr->airq_share--; 2362 return (PSM_FAILURE); 2363 } 2364 irqptr = irqheadptr; 2365 apic_mark_vector(irqptr->airq_vector, vector); 2366 while (irqptr) { 2367 irqptr->airq_vector = vector; 2368 irqptr->airq_ipl = (uchar_t)max_ipl; 2369 /* 2370 * reprogram irq being added and every one else 2371 * who is not in the UNINIT state 2372 */ 2373 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2374 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2375 apic_record_rdt_entry(irqptr, irqindex); 2376 (void) apic_setup_io_intr(irqptr, irqindex); 2377 } 2378 irqptr = irqptr->airq_next; 2379 } 2380 intr_restore(iflag); 2381 return (PSM_SUCCESS); 2382 } 2383 2384 ASSERT(irqptr); 2385 (void) apic_setup_io_intr(irqptr, irqindex); 2386 intr_restore(iflag); 2387 return (PSM_SUCCESS); 2388 } 2389 2390 /* 2391 * Recompute mask bits for the given interrupt vector. 2392 * If there is no interrupt servicing routine for this 2393 * vector, this function should disable interrupt vector 2394 * from happening at all IPLs. If there are still 2395 * handlers using the given vector, this function should 2396 * disable the given vector from happening below the lowest 2397 * IPL of the remaining hadlers. 2398 */ 2399 /*ARGSUSED*/ 2400 static int 2401 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2402 { 2403 uchar_t vector, bind_cpu; 2404 int iflag, intin, irqindex; 2405 volatile int32_t *ioapic; 2406 apic_irq_t *irqptr, *irqheadptr; 2407 2408 irqindex = IRQINDEX(irqno); 2409 irqptr = irqheadptr = apic_irq_table[irqindex]; 2410 2411 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2412 "vector=0x%x\n", (void *)irqptr->airq_dip, 2413 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2414 2415 while (irqptr) { 2416 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2417 break; 2418 irqptr = irqptr->airq_next; 2419 } 2420 ASSERT(irqptr); 2421 2422 irqptr->airq_share--; 2423 2424 if (ipl < max_ipl) 2425 return (PSM_SUCCESS); 2426 2427 /* return if it is not hardware interrupt */ 2428 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2429 return (PSM_SUCCESS); 2430 2431 if (!apic_flag) { 2432 /* 2433 * Clear irq_struct. If two devices shared an intpt 2434 * line & 1 unloaded before picinit, we are hosed. But, then 2435 * we hope the machine will ... 2436 */ 2437 irqptr->airq_mps_intr_index = FREE_INDEX; 2438 irqptr->airq_temp_cpu = IRQ_UNINIT; 2439 apic_free_vector(irqptr->airq_vector); 2440 return (PSM_SUCCESS); 2441 } 2442 /* 2443 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2444 * use old IPL. Not very elegant, but then we hope ... 2445 */ 2446 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2447 apic_irq_t *irqp; 2448 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2449 apic_mark_vector(irqheadptr->airq_vector, vector); 2450 irqp = irqheadptr; 2451 while (irqp) { 2452 irqp->airq_vector = vector; 2453 irqp->airq_ipl = (uchar_t)max_ipl; 2454 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2455 apic_record_rdt_entry(irqp, irqindex); 2456 (void) apic_setup_io_intr(irqp, 2457 irqindex); 2458 } 2459 irqp = irqp->airq_next; 2460 } 2461 } 2462 } 2463 2464 if (irqptr->airq_share) 2465 return (PSM_SUCCESS); 2466 2467 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2468 intin = irqptr->airq_intin_no; 2469 iflag = intr_clear(); 2470 lock_set(&apic_ioapic_lock); 2471 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2472 ioapic[APIC_IO_DATA] = AV_MASK; 2473 2474 /* Disable the MSI/X vector */ 2475 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2476 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2477 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2478 2479 /* 2480 * Make sure we only disable on the last 2481 * of the multi-MSI support 2482 */ 2483 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2484 (void) pci_msi_unconfigure(irqptr->airq_dip, type, 2485 irqptr->airq_ioapicindex); 2486 2487 (void) pci_msi_disable_mode(irqptr->airq_dip, type, 2488 irqptr->airq_ioapicindex); 2489 } 2490 } 2491 2492 if (max_ipl == PSM_INVALID_IPL) { 2493 ASSERT(irqheadptr == irqptr); 2494 bind_cpu = irqptr->airq_temp_cpu; 2495 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2496 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2497 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2498 if (bind_cpu & IRQ_USER_BOUND) { 2499 /* If hardbound, temp_cpu == cpu */ 2500 bind_cpu &= ~IRQ_USER_BOUND; 2501 apic_cpus[bind_cpu].aci_bound--; 2502 } else 2503 apic_cpus[bind_cpu].aci_temp_bound--; 2504 } 2505 lock_clear(&apic_ioapic_lock); 2506 intr_restore(iflag); 2507 irqptr->airq_temp_cpu = IRQ_UNINIT; 2508 irqptr->airq_mps_intr_index = FREE_INDEX; 2509 apic_free_vector(irqptr->airq_vector); 2510 return (PSM_SUCCESS); 2511 } 2512 lock_clear(&apic_ioapic_lock); 2513 intr_restore(iflag); 2514 2515 mutex_enter(&airq_mutex); 2516 if ((irqptr == apic_irq_table[irqindex])) { 2517 apic_irq_t *oldirqptr; 2518 /* Move valid irq entry to the head */ 2519 irqheadptr = oldirqptr = irqptr; 2520 irqptr = irqptr->airq_next; 2521 ASSERT(irqptr); 2522 while (irqptr) { 2523 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2524 break; 2525 oldirqptr = irqptr; 2526 irqptr = irqptr->airq_next; 2527 } 2528 /* remove all invalid ones from the beginning */ 2529 apic_irq_table[irqindex] = irqptr; 2530 /* 2531 * and link them back after the head. The invalid ones 2532 * begin with irqheadptr and end at oldirqptr 2533 */ 2534 oldirqptr->airq_next = irqptr->airq_next; 2535 irqptr->airq_next = irqheadptr; 2536 } 2537 mutex_exit(&airq_mutex); 2538 2539 irqptr->airq_temp_cpu = IRQ_UNINIT; 2540 irqptr->airq_mps_intr_index = FREE_INDEX; 2541 return (PSM_SUCCESS); 2542 } 2543 2544 /* 2545 * Return HW interrupt number corresponding to the given IPL 2546 */ 2547 /*ARGSUSED*/ 2548 static int 2549 apic_softlvl_to_irq(int ipl) 2550 { 2551 /* 2552 * Do not use apic to trigger soft interrupt. 2553 * It will cause the system to hang when 2 hardware interrupts 2554 * at the same priority with the softint are already accepted 2555 * by the apic. Cause the AV_PENDING bit will not be cleared 2556 * until one of the hardware interrupt is eoi'ed. If we need 2557 * to send an ipi at this time, we will end up looping forever 2558 * to wait for the AV_PENDING bit to clear. 2559 */ 2560 return (PSM_SV_SOFTWARE); 2561 } 2562 2563 static int 2564 apic_post_cpu_start() 2565 { 2566 int i, cpun; 2567 apic_irq_t *irq_ptr; 2568 2569 apic_init_intr(); 2570 2571 /* 2572 * since some systems don't enable the internal cache on the non-boot 2573 * cpus, so we have to enable them here 2574 */ 2575 setcr0(getcr0() & ~(0x60000000)); 2576 2577 while (get_apic_cmd1() & AV_PENDING) 2578 apic_ret(); 2579 2580 cpun = psm_get_cpu_id(); 2581 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2582 2583 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2584 irq_ptr = apic_irq_table[i]; 2585 if ((irq_ptr == NULL) || 2586 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2587 continue; 2588 2589 while (irq_ptr) { 2590 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) 2591 (void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE); 2592 irq_ptr = irq_ptr->airq_next; 2593 } 2594 } 2595 2596 return (PSM_SUCCESS); 2597 } 2598 2599 processorid_t 2600 apic_get_next_processorid(processorid_t cpu_id) 2601 { 2602 2603 int i; 2604 2605 if (cpu_id == -1) 2606 return ((processorid_t)0); 2607 2608 for (i = cpu_id + 1; i < NCPU; i++) { 2609 if (apic_cpumask & (1 << i)) 2610 return (i); 2611 } 2612 2613 return ((processorid_t)-1); 2614 } 2615 2616 2617 /* 2618 * type == -1 indicates it is an internal request. Do not change 2619 * resv_vector for these requests 2620 */ 2621 static int 2622 apic_get_ipivect(int ipl, int type) 2623 { 2624 uchar_t vector; 2625 int irq; 2626 2627 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2628 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2629 apic_irq_table[irq]->airq_mps_intr_index = 2630 RESERVE_INDEX; 2631 apic_irq_table[irq]->airq_vector = vector; 2632 if (type != -1) { 2633 apic_resv_vector[ipl] = vector; 2634 } 2635 return (irq); 2636 } 2637 } 2638 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2639 return (-1); /* shouldn't happen */ 2640 } 2641 2642 static int 2643 apic_getclkirq(int ipl) 2644 { 2645 int irq; 2646 2647 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2648 return (-1); 2649 /* 2650 * Note the vector in apic_clkvect for per clock handling. 2651 */ 2652 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2653 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2654 apic_clkvect)); 2655 return (irq); 2656 } 2657 2658 /* 2659 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2660 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2661 */ 2662 static uint_t 2663 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2664 { 2665 uint8_t pit_tick_lo; 2666 uint16_t pit_tick, target_pit_tick; 2667 uint32_t start_apic_tick, end_apic_tick; 2668 int iflag; 2669 2670 addr += APIC_CURR_COUNT; 2671 2672 iflag = intr_clear(); 2673 2674 do { 2675 pit_tick_lo = inb(PITCTR0_PORT); 2676 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2677 } while (pit_tick < APIC_TIME_MIN || 2678 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2679 2680 /* 2681 * Wait for the 8254 to decrement by 5 ticks to ensure 2682 * we didn't start in the middle of a tick. 2683 * Compare with 0x10 for the wrap around case. 2684 */ 2685 target_pit_tick = pit_tick - 5; 2686 do { 2687 pit_tick_lo = inb(PITCTR0_PORT); 2688 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2689 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2690 2691 start_apic_tick = *addr; 2692 2693 /* 2694 * Wait for the 8254 to decrement by 2695 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2696 */ 2697 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2698 do { 2699 pit_tick_lo = inb(PITCTR0_PORT); 2700 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2701 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2702 2703 end_apic_tick = *addr; 2704 2705 *pit_ticks_adj = target_pit_tick - pit_tick; 2706 2707 intr_restore(iflag); 2708 2709 return (start_apic_tick - end_apic_tick); 2710 } 2711 2712 /* 2713 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2714 * frequency. Note at this stage in the boot sequence, the boot processor 2715 * is the only active processor. 2716 * hertz value of 0 indicates a one-shot mode request. In this case 2717 * the function returns the resolution (in nanoseconds) for the hardware 2718 * timer interrupt. If one-shot mode capability is not available, 2719 * the return value will be 0. apic_enable_oneshot is a global switch 2720 * for disabling the functionality. 2721 * A non-zero positive value for hertz indicates a periodic mode request. 2722 * In this case the hardware will be programmed to generate clock interrupts 2723 * at hertz frequency and returns the resolution of interrupts in 2724 * nanosecond. 2725 */ 2726 2727 static int 2728 apic_clkinit(int hertz) 2729 { 2730 2731 uint_t apic_ticks = 0; 2732 uint_t pit_time; 2733 int ret; 2734 uint16_t pit_ticks_adj; 2735 static int firsttime = 1; 2736 2737 if (firsttime) { 2738 /* first time calibrate */ 2739 2740 apicadr[APIC_DIVIDE_REG] = 0x0; 2741 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; 2742 2743 /* set periodic interrupt based on CLKIN */ 2744 apicadr[APIC_LOCAL_TIMER] = 2745 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2746 tenmicrosec(); 2747 2748 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2749 2750 apicadr[APIC_LOCAL_TIMER] = 2751 (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 2752 /* 2753 * pit time is the amount of real time (in nanoseconds ) it took 2754 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks 2755 */ 2756 pit_time = ((longlong_t)(APIC_TIME_COUNT + 2757 pit_ticks_adj) * NANOSEC) / PIT_HZ; 2758 2759 /* 2760 * Determine the number of nanoseconds per APIC clock tick 2761 * and then determine how many APIC ticks to interrupt at the 2762 * desired frequency 2763 */ 2764 apic_nsec_per_tick = pit_time / apic_ticks; 2765 if (apic_nsec_per_tick == 0) 2766 apic_nsec_per_tick = 1; 2767 2768 /* the interval timer initial count is 32 bit max */ 2769 apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL; 2770 firsttime = 0; 2771 } 2772 2773 if (hertz != 0) { 2774 /* periodic */ 2775 apic_nsec_per_intr = NANOSEC / hertz; 2776 apic_hertz_count = (longlong_t)apic_nsec_per_intr / 2777 apic_nsec_per_tick; 2778 apic_sample_factor_redistribution = hertz + 1; 2779 } 2780 2781 apic_int_busy_mark = (apic_int_busy_mark * 2782 apic_sample_factor_redistribution) / 100; 2783 apic_int_free_mark = (apic_int_free_mark * 2784 apic_sample_factor_redistribution) / 100; 2785 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2786 apic_sample_factor_redistribution) / 100; 2787 2788 if (hertz == 0) { 2789 /* requested one_shot */ 2790 if (!apic_oneshot_enable) 2791 return (0); 2792 apic_oneshot = 1; 2793 ret = (int)apic_nsec_per_tick; 2794 } else { 2795 /* program the local APIC to interrupt at the given frequency */ 2796 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2797 apicadr[APIC_LOCAL_TIMER] = 2798 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2799 apic_oneshot = 0; 2800 ret = NANOSEC / hertz; 2801 } 2802 2803 return (ret); 2804 2805 } 2806 2807 /* 2808 * apic_preshutdown: 2809 * Called early in shutdown whilst we can still access filesystems to do 2810 * things like loading modules which will be required to complete shutdown 2811 * after filesystems are all unmounted. 2812 */ 2813 static void 2814 apic_preshutdown(int cmd, int fcn) 2815 { 2816 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2817 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2818 2819 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2820 return; 2821 } 2822 } 2823 2824 static void 2825 apic_shutdown(int cmd, int fcn) 2826 { 2827 int iflag, restarts, attempts; 2828 int i, j; 2829 volatile int32_t *ioapic; 2830 uchar_t byte; 2831 2832 /* Send NMI to all CPUs except self to do per processor shutdown */ 2833 iflag = intr_clear(); 2834 while (get_apic_cmd1() & AV_PENDING) 2835 apic_ret(); 2836 apic_shutdown_processors = 1; 2837 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2838 2839 /* restore cmos shutdown byte before reboot */ 2840 if (apic_cmos_ssb_set) { 2841 outb(CMOS_ADDR, SSB); 2842 outb(CMOS_DATA, 0); 2843 } 2844 /* Disable the I/O APIC redirection entries */ 2845 for (j = 0; j < apic_io_max; j++) { 2846 int intin_max; 2847 ioapic = apicioadr[j]; 2848 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2849 /* Bits 23-16 define the maximum redirection entries */ 2850 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2851 for (i = 0; i < intin_max; i++) { 2852 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2853 ioapic[APIC_IO_DATA] = AV_MASK; 2854 } 2855 } 2856 2857 /* disable apic mode if imcr present */ 2858 if (apic_imcrp) { 2859 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2860 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2861 } 2862 2863 apic_disable_local_apic(); 2864 2865 intr_restore(iflag); 2866 2867 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2868 return; 2869 } 2870 2871 switch (apic_poweroff_method) { 2872 case APIC_POWEROFF_VIA_RTC: 2873 2874 /* select the extended NVRAM bank in the RTC */ 2875 outb(CMOS_ADDR, RTC_REGA); 2876 byte = inb(CMOS_DATA); 2877 outb(CMOS_DATA, (byte | EXT_BANK)); 2878 2879 outb(CMOS_ADDR, PFR_REG); 2880 2881 /* for Predator must toggle the PAB bit */ 2882 byte = inb(CMOS_DATA); 2883 2884 /* 2885 * clear power active bar, wakeup alarm and 2886 * kickstart 2887 */ 2888 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2889 outb(CMOS_DATA, byte); 2890 2891 /* delay before next write */ 2892 drv_usecwait(1000); 2893 2894 /* for S40 the following would suffice */ 2895 byte = inb(CMOS_DATA); 2896 2897 /* power active bar control bit */ 2898 byte |= PAB_CBIT; 2899 outb(CMOS_DATA, byte); 2900 2901 break; 2902 2903 case APIC_POWEROFF_VIA_ASPEN_BMC: 2904 restarts = 0; 2905 restart_aspen_bmc: 2906 if (++restarts == 3) 2907 break; 2908 attempts = 0; 2909 do { 2910 byte = inb(MISMIC_FLAG_REGISTER); 2911 byte &= MISMIC_BUSY_MASK; 2912 if (byte != 0) { 2913 drv_usecwait(1000); 2914 if (attempts >= 3) 2915 goto restart_aspen_bmc; 2916 ++attempts; 2917 } 2918 } while (byte != 0); 2919 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2920 byte = inb(MISMIC_FLAG_REGISTER); 2921 byte |= 0x1; 2922 outb(MISMIC_FLAG_REGISTER, byte); 2923 i = 0; 2924 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2925 i++) { 2926 attempts = 0; 2927 do { 2928 byte = inb(MISMIC_FLAG_REGISTER); 2929 byte &= MISMIC_BUSY_MASK; 2930 if (byte != 0) { 2931 drv_usecwait(1000); 2932 if (attempts >= 3) 2933 goto restart_aspen_bmc; 2934 ++attempts; 2935 } 2936 } while (byte != 0); 2937 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 2938 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 2939 byte = inb(MISMIC_FLAG_REGISTER); 2940 byte |= 0x1; 2941 outb(MISMIC_FLAG_REGISTER, byte); 2942 } 2943 break; 2944 2945 case APIC_POWEROFF_VIA_SITKA_BMC: 2946 restarts = 0; 2947 restart_sitka_bmc: 2948 if (++restarts == 3) 2949 break; 2950 attempts = 0; 2951 do { 2952 byte = inb(SMS_STATUS_REGISTER); 2953 byte &= SMS_STATE_MASK; 2954 if ((byte == SMS_READ_STATE) || 2955 (byte == SMS_WRITE_STATE)) { 2956 drv_usecwait(1000); 2957 if (attempts >= 3) 2958 goto restart_sitka_bmc; 2959 ++attempts; 2960 } 2961 } while ((byte == SMS_READ_STATE) || 2962 (byte == SMS_WRITE_STATE)); 2963 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 2964 i = 0; 2965 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 2966 i++) { 2967 attempts = 0; 2968 do { 2969 byte = inb(SMS_STATUS_REGISTER); 2970 byte &= SMS_IBF_MASK; 2971 if (byte != 0) { 2972 drv_usecwait(1000); 2973 if (attempts >= 3) 2974 goto restart_sitka_bmc; 2975 ++attempts; 2976 } 2977 } while (byte != 0); 2978 outb(sitka_bmc[i].port, sitka_bmc[i].data); 2979 } 2980 break; 2981 2982 case APIC_POWEROFF_NONE: 2983 2984 /* If no APIC direct method, we will try using ACPI */ 2985 if (apic_enable_acpi) { 2986 if (acpi_poweroff() == 1) 2987 return; 2988 } else 2989 return; 2990 2991 break; 2992 } 2993 /* 2994 * Wait a limited time here for power to go off. 2995 * If the power does not go off, then there was a 2996 * problem and we should continue to the halt which 2997 * prints a message for the user to press a key to 2998 * reboot. 2999 */ 3000 drv_usecwait(7000000); /* wait seven seconds */ 3001 3002 } 3003 3004 /* 3005 * Try and disable all interrupts. We just assign interrupts to other 3006 * processors based on policy. If any were bound by user request, we 3007 * let them continue and return failure. We do not bother to check 3008 * for cache affinity while rebinding. 3009 */ 3010 3011 static int 3012 apic_disable_intr(processorid_t cpun) 3013 { 3014 int bind_cpu = 0, i, hardbound = 0, iflag; 3015 apic_irq_t *irq_ptr; 3016 3017 iflag = intr_clear(); 3018 lock_set(&apic_ioapic_lock); 3019 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 3020 lock_clear(&apic_ioapic_lock); 3021 intr_restore(iflag); 3022 apic_cpus[cpun].aci_curipl = 0; 3023 i = apic_min_device_irq; 3024 for (; i <= apic_max_device_irq; i++) { 3025 /* 3026 * If there are bound interrupts on this cpu, then 3027 * rebind them to other processors. 3028 */ 3029 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3030 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3031 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3032 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3033 apic_nproc)); 3034 3035 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3036 hardbound = 1; 3037 continue; 3038 } 3039 3040 if (irq_ptr->airq_temp_cpu == cpun) { 3041 do { 3042 apic_next_bind_cpu += 2; 3043 bind_cpu = apic_next_bind_cpu / 2; 3044 if (bind_cpu >= apic_nproc) { 3045 apic_next_bind_cpu = 1; 3046 bind_cpu = 0; 3047 3048 } 3049 } while (apic_rebind_all(irq_ptr, bind_cpu, 1)); 3050 } 3051 } 3052 } 3053 if (hardbound) { 3054 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3055 "due to user bound interrupts", cpun); 3056 return (PSM_FAILURE); 3057 } 3058 else 3059 return (PSM_SUCCESS); 3060 } 3061 3062 static void 3063 apic_enable_intr(processorid_t cpun) 3064 { 3065 int i, iflag; 3066 apic_irq_t *irq_ptr; 3067 3068 iflag = intr_clear(); 3069 lock_set(&apic_ioapic_lock); 3070 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3071 lock_clear(&apic_ioapic_lock); 3072 intr_restore(iflag); 3073 3074 i = apic_min_device_irq; 3075 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3076 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3077 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3078 (void) apic_rebind_all(irq_ptr, 3079 irq_ptr->airq_cpu, 1); 3080 } 3081 } 3082 } 3083 } 3084 3085 /* 3086 * apic_introp_xlate() replaces apic_translate_irq() and is 3087 * called only from apic_intr_ops(). With the new ADII framework, 3088 * the priority can no longer be retrived through i_ddi_get_intrspec(). 3089 * It has to be passed in from the caller. 3090 */ 3091 int 3092 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3093 { 3094 char dev_type[16]; 3095 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3096 int irqno = ispec->intrspec_vec; 3097 ddi_acc_handle_t cfg_handle; 3098 uchar_t ipin; 3099 struct apic_io_intr *intrp; 3100 iflag_t intr_flag; 3101 APIC_HEADER *hp; 3102 MADT_INTERRUPT_OVERRIDE *isop; 3103 apic_irq_t *airqp; 3104 3105 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3106 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3107 irqno)); 3108 3109 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3110 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) 3111 return (apic_vector_to_irq[airqp->airq_vector]); 3112 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3113 NULL, type)); 3114 } 3115 3116 bustype = 0; 3117 3118 /* check if we have already translated this irq */ 3119 mutex_enter(&airq_mutex); 3120 newirq = apic_min_device_irq; 3121 for (; newirq <= apic_max_device_irq; newirq++) { 3122 airqp = apic_irq_table[newirq]; 3123 while (airqp) { 3124 if ((airqp->airq_dip == dip) && 3125 (airqp->airq_origirq == irqno) && 3126 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3127 3128 mutex_exit(&airq_mutex); 3129 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3130 } 3131 airqp = airqp->airq_next; 3132 } 3133 } 3134 mutex_exit(&airq_mutex); 3135 3136 if (apic_defconf) 3137 goto defconf; 3138 3139 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3140 goto nonpci; 3141 3142 dev_len = sizeof (dev_type); 3143 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 3144 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3145 &dev_len) != DDI_PROP_SUCCESS) { 3146 goto nonpci; 3147 } 3148 3149 if ((strcmp(dev_type, "pci") == 0) || 3150 (strcmp(dev_type, "pciex") == 0)) { 3151 /* pci device */ 3152 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3153 goto nonpci; 3154 if (busid == 0 && apic_pci_bus_total == 1) 3155 busid = (int)apic_single_pci_busid; 3156 3157 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3158 goto nonpci; 3159 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3160 pci_config_teardown(&cfg_handle); 3161 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3162 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3163 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3164 goto nonpci; 3165 3166 intr_flag.bustype = BUS_PCI; 3167 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3168 ispec, &intr_flag, type)) == -1) 3169 goto nonpci; 3170 return (newirq); 3171 } else { 3172 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3173 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3174 == NULL) { 3175 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3176 devid, ipin, &intrp)) == -1) 3177 goto nonpci; 3178 } 3179 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3180 ispec, NULL, type)) == -1) 3181 goto nonpci; 3182 return (newirq); 3183 } 3184 } else if (strcmp(dev_type, "isa") == 0) 3185 bustype = BUS_ISA; 3186 else if (strcmp(dev_type, "eisa") == 0) 3187 bustype = BUS_EISA; 3188 3189 nonpci: 3190 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3191 /* search iso entries first */ 3192 if (acpi_iso_cnt != 0) { 3193 hp = (APIC_HEADER *)acpi_isop; 3194 i = 0; 3195 while (i < acpi_iso_cnt) { 3196 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3197 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3198 if (isop->Bus == 0 && 3199 isop->Source == irqno) { 3200 newirq = isop->Interrupt; 3201 intr_flag.intr_po = 3202 isop->Polarity; 3203 intr_flag.intr_el = 3204 isop->TriggerMode; 3205 intr_flag.bustype = BUS_ISA; 3206 3207 return (apic_setup_irq_table( 3208 dip, newirq, NULL, ispec, 3209 &intr_flag, type)); 3210 3211 } 3212 i++; 3213 } 3214 hp = (APIC_HEADER *)(((char *)hp) + 3215 hp->Length); 3216 } 3217 } 3218 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3219 intr_flag.intr_el = INTR_EL_EDGE; 3220 intr_flag.bustype = BUS_ISA; 3221 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3222 &intr_flag, type)); 3223 } else { 3224 if (bustype == 0) 3225 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3226 for (i = 0; i < 2; i++) { 3227 if (((busid = apic_find_bus_id(bustype)) != -1) && 3228 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3229 != NULL)) { 3230 if ((newirq = apic_setup_irq_table(dip, irqno, 3231 intrp, ispec, NULL, type)) != -1) { 3232 return (newirq); 3233 } 3234 goto defconf; 3235 } 3236 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3237 } 3238 } 3239 3240 /* MPS default configuration */ 3241 defconf: 3242 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3243 if (newirq == -1) 3244 return (newirq); 3245 ASSERT(IRQINDEX(newirq) == irqno); 3246 ASSERT(apic_irq_table[irqno]); 3247 return (newirq); 3248 } 3249 3250 3251 3252 3253 3254 3255 /* 3256 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3257 * needs special handling. We may need to chase up the device tree, 3258 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3259 * to find the IPIN at the root bus that relates to the IPIN on the 3260 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3261 * in the MP table or the ACPI namespace for this device itself. 3262 * We handle both cases in the search below. 3263 */ 3264 /* this is the non-acpi version */ 3265 static int 3266 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3267 struct apic_io_intr **intrp) 3268 { 3269 dev_info_t *dipp, *dip; 3270 int pci_irq; 3271 ddi_acc_handle_t cfg_handle; 3272 int bridge_devno, bridge_bus; 3273 int ipin; 3274 3275 dip = idip; 3276 3277 /*CONSTCOND*/ 3278 while (1) { 3279 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3280 return (-1); 3281 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3282 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3283 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3284 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3285 pci_config_teardown(&cfg_handle); 3286 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3287 NULL) != 0) 3288 return (-1); 3289 /* 3290 * This is the rotating scheme that Compaq is using 3291 * and documented in the pci to pci spec. Also, if 3292 * the pci to pci bridge is behind another pci to 3293 * pci bridge, then it need to keep transversing 3294 * up until an interrupt entry is found or reach 3295 * the top of the tree 3296 */ 3297 ipin = (child_devno + child_ipin) % PCI_INTD; 3298 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3299 bridge_bus = (int)apic_single_pci_busid; 3300 pci_irq = ((bridge_devno & 0x1f) << 2) | 3301 (ipin & 0x3); 3302 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3303 bridge_bus)) != NULL) { 3304 return (pci_irq); 3305 } 3306 dip = dipp; 3307 child_devno = bridge_devno; 3308 child_ipin = ipin; 3309 } else 3310 return (-1); 3311 } 3312 /*LINTED: function will not fall off the bottom */ 3313 } 3314 3315 3316 3317 3318 static uchar_t 3319 acpi_find_ioapic(int irq) 3320 { 3321 int i; 3322 3323 for (i = 0; i < apic_io_max; i++) { 3324 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3325 return (i); 3326 } 3327 return (0xFF); /* shouldn't happen */ 3328 } 3329 3330 /* 3331 * See if two irqs are compatible for sharing a vector. 3332 * Currently we only support sharing of PCI devices. 3333 */ 3334 static int 3335 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3336 { 3337 uint_t level1, po1; 3338 uint_t level2, po2; 3339 3340 /* Assume active high by default */ 3341 po1 = 0; 3342 po2 = 0; 3343 3344 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3345 return (0); 3346 3347 if (iflag1.intr_el == INTR_EL_CONFORM) 3348 level1 = AV_LEVEL; 3349 else 3350 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3351 3352 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3353 (iflag1.intr_po == INTR_PO_CONFORM))) 3354 po1 = AV_ACTIVE_LOW; 3355 3356 if (iflag2.intr_el == INTR_EL_CONFORM) 3357 level2 = AV_LEVEL; 3358 else 3359 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3360 3361 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3362 (iflag2.intr_po == INTR_PO_CONFORM))) 3363 po2 = AV_ACTIVE_LOW; 3364 3365 if ((level1 == level2) && (po1 == po2)) 3366 return (1); 3367 3368 return (0); 3369 } 3370 3371 /* 3372 * Attempt to share vector with someone else 3373 */ 3374 static int 3375 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3376 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3377 { 3378 #ifdef DEBUG 3379 apic_irq_t *tmpirqp = NULL; 3380 #endif /* DEBUG */ 3381 apic_irq_t *irqptr, dummyirq; 3382 int newirq, chosen_irq = -1, share = 127; 3383 int lowest, highest, i; 3384 uchar_t share_id; 3385 3386 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3387 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3388 3389 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3390 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3391 3392 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3393 lowest -= APIC_VECTOR_PER_IPL; 3394 dummyirq.airq_mps_intr_index = intr_index; 3395 dummyirq.airq_ioapicindex = ioapicindex; 3396 dummyirq.airq_intin_no = ipin; 3397 if (intr_flagp) 3398 dummyirq.airq_iflag = *intr_flagp; 3399 apic_record_rdt_entry(&dummyirq, irqno); 3400 for (i = lowest; i <= highest; i++) { 3401 newirq = apic_vector_to_irq[i]; 3402 if (newirq == APIC_RESV_IRQ) 3403 continue; 3404 irqptr = apic_irq_table[newirq]; 3405 3406 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3407 (irqptr->airq_rdt_entry & 0xFF00)) 3408 /* not compatible */ 3409 continue; 3410 3411 if (irqptr->airq_share < share) { 3412 share = irqptr->airq_share; 3413 chosen_irq = newirq; 3414 } 3415 } 3416 if (chosen_irq != -1) { 3417 /* 3418 * Assign a share id which is free or which is larger 3419 * than the largest one. 3420 */ 3421 share_id = 1; 3422 mutex_enter(&airq_mutex); 3423 irqptr = apic_irq_table[chosen_irq]; 3424 while (irqptr) { 3425 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3426 share_id = irqptr->airq_share_id; 3427 break; 3428 } 3429 if (share_id <= irqptr->airq_share_id) 3430 share_id = irqptr->airq_share_id + 1; 3431 #ifdef DEBUG 3432 tmpirqp = irqptr; 3433 #endif /* DEBUG */ 3434 irqptr = irqptr->airq_next; 3435 } 3436 if (!irqptr) { 3437 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3438 irqptr->airq_temp_cpu = IRQ_UNINIT; 3439 irqptr->airq_next = 3440 apic_irq_table[chosen_irq]->airq_next; 3441 apic_irq_table[chosen_irq]->airq_next = irqptr; 3442 #ifdef DEBUG 3443 tmpirqp = apic_irq_table[chosen_irq]; 3444 #endif /* DEBUG */ 3445 } 3446 irqptr->airq_mps_intr_index = intr_index; 3447 irqptr->airq_ioapicindex = ioapicindex; 3448 irqptr->airq_intin_no = ipin; 3449 if (intr_flagp) 3450 irqptr->airq_iflag = *intr_flagp; 3451 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3452 irqptr->airq_share_id = share_id; 3453 apic_record_rdt_entry(irqptr, irqno); 3454 *irqptrp = irqptr; 3455 #ifdef DEBUG 3456 /* shuffle the pointers to test apic_delspl path */ 3457 if (tmpirqp) { 3458 tmpirqp->airq_next = irqptr->airq_next; 3459 irqptr->airq_next = apic_irq_table[chosen_irq]; 3460 apic_irq_table[chosen_irq] = irqptr; 3461 } 3462 #endif /* DEBUG */ 3463 mutex_exit(&airq_mutex); 3464 return (VIRTIRQ(chosen_irq, share_id)); 3465 } 3466 return (-1); 3467 } 3468 3469 /* 3470 * 3471 */ 3472 static int 3473 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3474 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3475 { 3476 int origirq = ispec->intrspec_vec; 3477 uchar_t ipl = ispec->intrspec_pri; 3478 int newirq, intr_index; 3479 uchar_t ipin, ioapic, ioapicindex, vector; 3480 apic_irq_t *irqptr; 3481 major_t major; 3482 dev_info_t *sdip; 3483 3484 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3485 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3486 3487 ASSERT(ispec != NULL); 3488 3489 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3490 3491 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3492 /* MSI/X doesn't need to setup ioapic stuffs */ 3493 ioapicindex = 0xff; 3494 ioapic = 0xff; 3495 ipin = (uchar_t)0xff; 3496 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3497 MSIX_INDEX; 3498 mutex_enter(&airq_mutex); 3499 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 3500 mutex_exit(&airq_mutex); 3501 /* need an irq for MSI/X to index into autovect[] */ 3502 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3503 ddi_get_name(dip), ddi_get_instance(dip)); 3504 return (-1); 3505 } 3506 mutex_exit(&airq_mutex); 3507 3508 } else if (intrp != NULL) { 3509 intr_index = (int)(intrp - apic_io_intrp); 3510 ioapic = intrp->intr_destid; 3511 ipin = intrp->intr_destintin; 3512 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3513 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3514 if (apic_io_id[ioapicindex] == ioapic) 3515 break; 3516 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3517 (ioapic == INTR_ALL_APIC)); 3518 3519 /* check whether this intin# has been used by another irqno */ 3520 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3521 return (newirq); 3522 } 3523 3524 } else if (intr_flagp != NULL) { 3525 /* ACPI case */ 3526 intr_index = ACPI_INDEX; 3527 ioapicindex = acpi_find_ioapic(irqno); 3528 ASSERT(ioapicindex != 0xFF); 3529 ioapic = apic_io_id[ioapicindex]; 3530 ipin = irqno - apic_io_vectbase[ioapicindex]; 3531 if (apic_irq_table[irqno] && 3532 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3533 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3534 apic_irq_table[irqno]->airq_ioapicindex == 3535 ioapicindex); 3536 return (irqno); 3537 } 3538 3539 } else { 3540 /* default configuration */ 3541 ioapicindex = 0; 3542 ioapic = apic_io_id[ioapicindex]; 3543 ipin = (uchar_t)irqno; 3544 intr_index = DEFAULT_INDEX; 3545 } 3546 3547 if (ispec == NULL) { 3548 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3549 irqno)); 3550 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3551 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3552 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3553 irqptr->airq_ipl = ipl; 3554 irqptr->airq_origirq = (uchar_t)origirq; 3555 irqptr->airq_dip = dip; 3556 irqptr->airq_major = major; 3557 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3558 /* This is OK to do really */ 3559 if (sdip == NULL) { 3560 cmn_err(CE_WARN, "Sharing vectors: %s" 3561 " instance %d and SCI", 3562 ddi_get_name(dip), ddi_get_instance(dip)); 3563 } else { 3564 cmn_err(CE_WARN, "Sharing vectors: %s" 3565 " instance %d and %s instance %d", 3566 ddi_get_name(sdip), ddi_get_instance(sdip), 3567 ddi_get_name(dip), ddi_get_instance(dip)); 3568 } 3569 return (newirq); 3570 } 3571 /* try high priority allocation now that share has failed */ 3572 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3573 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3574 ddi_get_name(dip), ddi_get_instance(dip)); 3575 return (-1); 3576 } 3577 } 3578 3579 mutex_enter(&airq_mutex); 3580 if (apic_irq_table[irqno] == NULL) { 3581 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3582 irqptr->airq_temp_cpu = IRQ_UNINIT; 3583 apic_irq_table[irqno] = irqptr; 3584 } else { 3585 irqptr = apic_irq_table[irqno]; 3586 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3587 /* 3588 * The slot is used by another irqno, so allocate 3589 * a free irqno for this interrupt 3590 */ 3591 newirq = apic_allocate_irq(apic_first_avail_irq); 3592 if (newirq == -1) { 3593 mutex_exit(&airq_mutex); 3594 return (-1); 3595 } 3596 irqno = newirq; 3597 irqptr = apic_irq_table[irqno]; 3598 if (irqptr == NULL) { 3599 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3600 KM_SLEEP); 3601 irqptr->airq_temp_cpu = IRQ_UNINIT; 3602 apic_irq_table[irqno] = irqptr; 3603 } 3604 apic_modify_vector(vector, newirq); 3605 } 3606 } 3607 apic_max_device_irq = max(irqno, apic_max_device_irq); 3608 apic_min_device_irq = min(irqno, apic_min_device_irq); 3609 mutex_exit(&airq_mutex); 3610 irqptr->airq_ioapicindex = ioapicindex; 3611 irqptr->airq_intin_no = ipin; 3612 irqptr->airq_ipl = ipl; 3613 irqptr->airq_vector = vector; 3614 irqptr->airq_origirq = (uchar_t)origirq; 3615 irqptr->airq_share_id = 0; 3616 irqptr->airq_mps_intr_index = (short)intr_index; 3617 irqptr->airq_dip = dip; 3618 irqptr->airq_major = major; 3619 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3620 if (intr_flagp) 3621 irqptr->airq_iflag = *intr_flagp; 3622 3623 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3624 /* setup I/O APIC entry for non-MSI/X interrupts */ 3625 apic_record_rdt_entry(irqptr, irqno); 3626 } 3627 return (irqno); 3628 } 3629 3630 /* 3631 * return the cpu to which this intr should be bound. 3632 * Check properties or any other mechanism to see if user wants it 3633 * bound to a specific CPU. If so, return the cpu id with high bit set. 3634 * If not, use the policy to choose a cpu and return the id. 3635 */ 3636 uchar_t 3637 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3638 { 3639 int instance, instno, prop_len, bind_cpu, count; 3640 uint_t i, rc; 3641 uchar_t cpu; 3642 major_t major; 3643 char *name, *drv_name, *prop_val, *cptr; 3644 char prop_name[32]; 3645 3646 3647 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3648 return (IRQ_UNBOUND); 3649 3650 drv_name = NULL; 3651 rc = DDI_PROP_NOT_FOUND; 3652 major = (major_t)-1; 3653 if (dip != NULL) { 3654 name = ddi_get_name(dip); 3655 major = ddi_name_to_major(name); 3656 drv_name = ddi_major_to_name(major); 3657 instance = ddi_get_instance(dip); 3658 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3659 i = apic_min_device_irq; 3660 for (; i <= apic_max_device_irq; i++) { 3661 3662 if ((i == irq) || (apic_irq_table[i] == NULL) || 3663 (apic_irq_table[i]->airq_mps_intr_index 3664 == FREE_INDEX)) 3665 continue; 3666 3667 if ((apic_irq_table[i]->airq_major == major) && 3668 (!(apic_irq_table[i]->airq_cpu & 3669 IRQ_USER_BOUND))) { 3670 3671 cpu = apic_irq_table[i]->airq_cpu; 3672 3673 cmn_err(CE_CONT, 3674 "!pcplusmp: %s (%s) instance #%d " 3675 "vector 0x%x ioapic 0x%x " 3676 "intin 0x%x is bound to cpu %d\n", 3677 name, drv_name, instance, irq, 3678 ioapicid, intin, cpu); 3679 return (cpu); 3680 } 3681 } 3682 } 3683 /* 3684 * search for "drvname"_intpt_bind_cpus property first, the 3685 * syntax of the property should be "a[,b,c,...]" where 3686 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3687 * instance 3 binds to cpu c... 3688 * ddi_getlongprop() will search /option first, then / 3689 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3690 * intpt_bind_cpus property. The syntax is the same, and 3691 * it applies to all the devices if its "drvname" specific 3692 * property doesn't exist 3693 */ 3694 (void) strcpy(prop_name, drv_name); 3695 (void) strcat(prop_name, "_intpt_bind_cpus"); 3696 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3697 (caddr_t)&prop_val, &prop_len); 3698 if (rc != DDI_PROP_SUCCESS) { 3699 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3700 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3701 } 3702 } 3703 if (rc == DDI_PROP_SUCCESS) { 3704 for (i = count = 0; i < (prop_len - 1); i++) 3705 if (prop_val[i] == ',') 3706 count++; 3707 if (prop_val[i-1] != ',') 3708 count++; 3709 /* 3710 * if somehow the binding instances defined in the 3711 * property are not enough for this instno., then 3712 * reuse the pattern for the next instance until 3713 * it reaches the requested instno 3714 */ 3715 instno = instance % count; 3716 i = 0; 3717 cptr = prop_val; 3718 while (i < instno) 3719 if (*cptr++ == ',') 3720 i++; 3721 bind_cpu = stoi(&cptr); 3722 kmem_free(prop_val, prop_len); 3723 /* if specific cpu is bogus, then default to cpu 0 */ 3724 if (bind_cpu >= apic_nproc) { 3725 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3726 prop_name, prop_val, bind_cpu); 3727 bind_cpu = 0; 3728 } else { 3729 /* indicate that we are bound at user request */ 3730 bind_cpu |= IRQ_USER_BOUND; 3731 } 3732 /* 3733 * no need to check apic_cpus[].aci_status, if specific cpu is 3734 * not up, then post_cpu_start will handle it. 3735 */ 3736 } else { 3737 /* 3738 * We change bind_cpu only for every two calls 3739 * as most drivers still do 2 add_intrs for every 3740 * interrupt 3741 */ 3742 bind_cpu = (apic_next_bind_cpu++) / 2; 3743 if (bind_cpu >= apic_nproc) { 3744 apic_next_bind_cpu = 1; 3745 bind_cpu = 0; 3746 } 3747 } 3748 if (drv_name != NULL) 3749 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3750 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3751 name, drv_name, instance, 3752 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3753 else 3754 cmn_err(CE_CONT, "!pcplusmp: " 3755 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3756 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3757 3758 return ((uchar_t)bind_cpu); 3759 } 3760 3761 static struct apic_io_intr * 3762 apic_find_io_intr_w_busid(int irqno, int busid) 3763 { 3764 struct apic_io_intr *intrp; 3765 3766 /* 3767 * It can have more than 1 entry with same source bus IRQ, 3768 * but unique with the source bus id 3769 */ 3770 intrp = apic_io_intrp; 3771 if (intrp != NULL) { 3772 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3773 if (intrp->intr_irq == irqno && 3774 intrp->intr_busid == busid && 3775 intrp->intr_type == IO_INTR_INT) 3776 return (intrp); 3777 intrp++; 3778 } 3779 } 3780 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3781 "busid %x:%x\n", irqno, busid)); 3782 return ((struct apic_io_intr *)NULL); 3783 } 3784 3785 3786 struct mps_bus_info { 3787 char *bus_name; 3788 int bus_id; 3789 } bus_info_array[] = { 3790 "ISA ", BUS_ISA, 3791 "PCI ", BUS_PCI, 3792 "EISA ", BUS_EISA, 3793 "XPRESS", BUS_XPRESS, 3794 "PCMCIA", BUS_PCMCIA, 3795 "VL ", BUS_VL, 3796 "CBUS ", BUS_CBUS, 3797 "CBUSII", BUS_CBUSII, 3798 "FUTURE", BUS_FUTURE, 3799 "INTERN", BUS_INTERN, 3800 "MBI ", BUS_MBI, 3801 "MBII ", BUS_MBII, 3802 "MPI ", BUS_MPI, 3803 "MPSA ", BUS_MPSA, 3804 "NUBUS ", BUS_NUBUS, 3805 "TC ", BUS_TC, 3806 "VME ", BUS_VME 3807 }; 3808 3809 static int 3810 apic_find_bus_type(char *bus) 3811 { 3812 int i = 0; 3813 3814 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3815 if (strncmp(bus, bus_info_array[i].bus_name, 3816 strlen(bus_info_array[i].bus_name)) == 0) 3817 return (bus_info_array[i].bus_id); 3818 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3819 return (0); 3820 } 3821 3822 static int 3823 apic_find_bus(int busid) 3824 { 3825 struct apic_bus *busp; 3826 3827 busp = apic_busp; 3828 while (busp->bus_entry == APIC_BUS_ENTRY) { 3829 if (busp->bus_id == busid) 3830 return (apic_find_bus_type((char *)&busp->bus_str1)); 3831 busp++; 3832 } 3833 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3834 return (0); 3835 } 3836 3837 static int 3838 apic_find_bus_id(int bustype) 3839 { 3840 struct apic_bus *busp; 3841 3842 busp = apic_busp; 3843 while (busp->bus_entry == APIC_BUS_ENTRY) { 3844 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3845 return (busp->bus_id); 3846 busp++; 3847 } 3848 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3849 bustype)); 3850 return (-1); 3851 } 3852 3853 /* 3854 * Check if a particular irq need to be reserved for any io_intr 3855 */ 3856 static struct apic_io_intr * 3857 apic_find_io_intr(int irqno) 3858 { 3859 struct apic_io_intr *intrp; 3860 3861 intrp = apic_io_intrp; 3862 if (intrp != NULL) { 3863 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3864 if (intrp->intr_irq == irqno && 3865 intrp->intr_type == IO_INTR_INT) 3866 return (intrp); 3867 intrp++; 3868 } 3869 } 3870 return ((struct apic_io_intr *)NULL); 3871 } 3872 3873 /* 3874 * Check if the given ioapicindex intin combination has already been assigned 3875 * an irq. If so return irqno. Else -1 3876 */ 3877 static int 3878 apic_find_intin(uchar_t ioapic, uchar_t intin) 3879 { 3880 apic_irq_t *irqptr; 3881 int i; 3882 3883 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3884 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3885 irqptr = apic_irq_table[i]; 3886 while (irqptr) { 3887 if ((irqptr->airq_mps_intr_index >= 0) && 3888 (irqptr->airq_intin_no == intin) && 3889 (irqptr->airq_ioapicindex == ioapic)) { 3890 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3891 "entry for ioapic:intin %x:%x " 3892 "shared interrupts ?", ioapic, intin)); 3893 return (i); 3894 } 3895 irqptr = irqptr->airq_next; 3896 } 3897 } 3898 return (-1); 3899 } 3900 3901 int 3902 apic_allocate_irq(int irq) 3903 { 3904 int freeirq, i; 3905 3906 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 3907 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 3908 (irq - 1))) == -1) { 3909 /* 3910 * if BIOS really defines every single irq in the mps 3911 * table, then don't worry about conflicting with 3912 * them, just use any free slot in apic_irq_table 3913 */ 3914 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 3915 if ((apic_irq_table[i] == NULL) || 3916 apic_irq_table[i]->airq_mps_intr_index == 3917 FREE_INDEX) { 3918 freeirq = i; 3919 break; 3920 } 3921 } 3922 if (freeirq == -1) { 3923 /* This shouldn't happen, but just in case */ 3924 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 3925 return (-1); 3926 } 3927 } 3928 if (apic_irq_table[freeirq] == NULL) { 3929 apic_irq_table[freeirq] = 3930 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 3931 if (apic_irq_table[freeirq] == NULL) { 3932 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 3933 return (-1); 3934 } 3935 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 3936 } 3937 return (freeirq); 3938 } 3939 3940 static int 3941 apic_find_free_irq(int start, int end) 3942 { 3943 int i; 3944 3945 for (i = start; i <= end; i++) 3946 /* Check if any I/O entry needs this IRQ */ 3947 if (apic_find_io_intr(i) == NULL) { 3948 /* Then see if it is free */ 3949 if ((apic_irq_table[i] == NULL) || 3950 (apic_irq_table[i]->airq_mps_intr_index == 3951 FREE_INDEX)) { 3952 return (i); 3953 } 3954 } 3955 return (-1); 3956 } 3957 3958 /* 3959 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 3960 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 3961 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 3962 * requests and allocated only when pri is set. 3963 */ 3964 static uchar_t 3965 apic_allocate_vector(int ipl, int irq, int pri) 3966 { 3967 int lowest, highest, i; 3968 3969 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3970 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 3971 3972 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 3973 lowest -= APIC_VECTOR_PER_IPL; 3974 3975 #ifdef DEBUG 3976 if (apic_restrict_vector) /* for testing shared interrupt logic */ 3977 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 3978 #endif /* DEBUG */ 3979 if (pri == 0) 3980 highest -= APIC_HI_PRI_VECTS; 3981 3982 for (i = lowest; i < highest; i++) { 3983 if ((i == T_FASTTRAP) || (i == APIC_SPUR_INTR) || 3984 (i == T_SYSCALLINT) || (i == T_DTRACE_PROBE) || 3985 (i == T_DTRACE_RET)) 3986 continue; 3987 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 3988 apic_vector_to_irq[i] = (uchar_t)irq; 3989 return (i); 3990 } 3991 } 3992 3993 return (0); 3994 } 3995 3996 static void 3997 apic_modify_vector(uchar_t vector, int irq) 3998 { 3999 apic_vector_to_irq[vector] = (uchar_t)irq; 4000 } 4001 4002 /* 4003 * Mark vector as being in the process of being deleted. Interrupts 4004 * may still come in on some CPU. The moment an interrupt comes with 4005 * the new vector, we know we can free the old one. Called only from 4006 * addspl and delspl with interrupts disabled. Because an interrupt 4007 * can be shared, but no interrupt from either device may come in, 4008 * we also use a timeout mechanism, which we arbitrarily set to 4009 * apic_revector_timeout microseconds. 4010 */ 4011 static void 4012 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4013 { 4014 int iflag = intr_clear(); 4015 lock_set(&apic_revector_lock); 4016 if (!apic_oldvec_to_newvec) { 4017 apic_oldvec_to_newvec = 4018 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4019 KM_NOSLEEP); 4020 4021 if (!apic_oldvec_to_newvec) { 4022 /* 4023 * This failure is not catastrophic. 4024 * But, the oldvec will never be freed. 4025 */ 4026 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4027 lock_clear(&apic_revector_lock); 4028 intr_restore(iflag); 4029 return; 4030 } 4031 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4032 } 4033 4034 /* See if we already did this for drivers which do double addintrs */ 4035 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4036 apic_oldvec_to_newvec[oldvector] = newvector; 4037 apic_newvec_to_oldvec[newvector] = oldvector; 4038 apic_revector_pending++; 4039 } 4040 lock_clear(&apic_revector_lock); 4041 intr_restore(iflag); 4042 (void) timeout(apic_xlate_vector_free_timeout_handler, 4043 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4044 } 4045 4046 /* 4047 * xlate_vector is called from intr_enter if revector_pending is set. 4048 * It will xlate it if needed and mark the old vector as free. 4049 */ 4050 static uchar_t 4051 apic_xlate_vector(uchar_t vector) 4052 { 4053 uchar_t newvector, oldvector = 0; 4054 4055 lock_set(&apic_revector_lock); 4056 /* Do we really need to do this ? */ 4057 if (!apic_revector_pending) { 4058 lock_clear(&apic_revector_lock); 4059 return (vector); 4060 } 4061 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4062 oldvector = vector; 4063 else { 4064 /* 4065 * The incoming vector is new . See if a stale entry is 4066 * remaining 4067 */ 4068 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4069 newvector = vector; 4070 } 4071 4072 if (oldvector) { 4073 apic_revector_pending--; 4074 apic_oldvec_to_newvec[oldvector] = 0; 4075 apic_newvec_to_oldvec[newvector] = 0; 4076 apic_free_vector(oldvector); 4077 lock_clear(&apic_revector_lock); 4078 /* There could have been more than one reprogramming! */ 4079 return (apic_xlate_vector(newvector)); 4080 } 4081 lock_clear(&apic_revector_lock); 4082 return (vector); 4083 } 4084 4085 void 4086 apic_xlate_vector_free_timeout_handler(void *arg) 4087 { 4088 int iflag; 4089 uchar_t oldvector, newvector; 4090 4091 oldvector = (uchar_t)(uintptr_t)arg; 4092 iflag = intr_clear(); 4093 lock_set(&apic_revector_lock); 4094 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4095 apic_free_vector(oldvector); 4096 apic_oldvec_to_newvec[oldvector] = 0; 4097 apic_newvec_to_oldvec[newvector] = 0; 4098 apic_revector_pending--; 4099 } 4100 4101 lock_clear(&apic_revector_lock); 4102 intr_restore(iflag); 4103 } 4104 4105 4106 /* Mark vector as not being used by any irq */ 4107 static void 4108 apic_free_vector(uchar_t vector) 4109 { 4110 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4111 } 4112 4113 /* 4114 * compute the polarity, trigger mode and vector for programming into 4115 * the I/O apic and record in airq_rdt_entry. 4116 */ 4117 static void 4118 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4119 { 4120 int ioapicindex, bus_type, vector; 4121 short intr_index; 4122 uint_t level, po, io_po; 4123 struct apic_io_intr *iointrp; 4124 4125 intr_index = irqptr->airq_mps_intr_index; 4126 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4127 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4128 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4129 4130 if (intr_index == RESERVE_INDEX) { 4131 apic_error |= APIC_ERR_INVALID_INDEX; 4132 return; 4133 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4134 return; 4135 } 4136 4137 vector = irqptr->airq_vector; 4138 ioapicindex = irqptr->airq_ioapicindex; 4139 /* Assume edge triggered by default */ 4140 level = 0; 4141 /* Assume active high by default */ 4142 po = 0; 4143 4144 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4145 ASSERT(irq < 16); 4146 if (eisa_level_intr_mask & (1 << irq)) 4147 level = AV_LEVEL; 4148 if (intr_index == FREE_INDEX && apic_defconf == 0) 4149 apic_error |= APIC_ERR_INVALID_INDEX; 4150 } else if (intr_index == ACPI_INDEX) { 4151 bus_type = irqptr->airq_iflag.bustype; 4152 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4153 if (bus_type == BUS_PCI) 4154 level = AV_LEVEL; 4155 } else 4156 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4157 AV_LEVEL : 0; 4158 if (level && 4159 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4160 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4161 bus_type == BUS_PCI))) 4162 po = AV_ACTIVE_LOW; 4163 } else { 4164 iointrp = apic_io_intrp + intr_index; 4165 bus_type = apic_find_bus(iointrp->intr_busid); 4166 if (iointrp->intr_el == INTR_EL_CONFORM) { 4167 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4168 level = AV_LEVEL; 4169 else if (bus_type == BUS_PCI) 4170 level = AV_LEVEL; 4171 } else 4172 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4173 AV_LEVEL : 0; 4174 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4175 (iointrp->intr_po == INTR_PO_CONFORM && 4176 bus_type == BUS_PCI))) 4177 po = AV_ACTIVE_LOW; 4178 } 4179 if (level) 4180 apic_level_intr[irq] = 1; 4181 /* 4182 * The 82489DX External APIC cannot do active low polarity interrupts. 4183 */ 4184 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4185 io_po = po; 4186 else 4187 io_po = 0; 4188 4189 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4190 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4191 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4192 4193 irqptr->airq_rdt_entry = level|io_po|vector; 4194 } 4195 4196 /* 4197 * Call rebind to do the actual programming. 4198 */ 4199 static int 4200 apic_setup_io_intr(apic_irq_t *irqptr, int irq) 4201 { 4202 int rv; 4203 4204 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4205 IMMEDIATE)) 4206 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4207 rv = apic_rebind(irqptr, 0, 1, IMMEDIATE); 4208 4209 return (rv); 4210 } 4211 4212 /* 4213 * Deferred reprogramming: Call apic_rebind to do the real work. 4214 */ 4215 static int 4216 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq) 4217 { 4218 int rv; 4219 4220 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4221 DEFERRED)) 4222 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4223 rv = apic_rebind(irqptr, 0, 1, DEFERRED); 4224 4225 return (rv); 4226 } 4227 4228 /* 4229 * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock 4230 * if false (0) means lock is already held (e.g: in rebind_all). 4231 */ 4232 static int 4233 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when) 4234 { 4235 int intin_no; 4236 volatile int32_t *ioapic; 4237 uchar_t airq_temp_cpu; 4238 apic_cpus_info_t *cpu_infop; 4239 int iflag; 4240 int which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4241 4242 intin_no = irq_ptr->airq_intin_no; 4243 ioapic = apicioadr[irq_ptr->airq_ioapicindex]; 4244 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4245 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4246 if (airq_temp_cpu & IRQ_USER_BOUND) 4247 /* Mask off high bit so it can be used as array index */ 4248 airq_temp_cpu &= ~IRQ_USER_BOUND; 4249 4250 ASSERT(airq_temp_cpu < apic_nproc); 4251 } 4252 4253 iflag = intr_clear(); 4254 4255 if (acquire_lock) 4256 lock_set(&apic_ioapic_lock); 4257 4258 /* 4259 * Can't bind to a CPU that's not online: 4260 */ 4261 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4262 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) { 4263 4264 if (acquire_lock) 4265 lock_clear(&apic_ioapic_lock); 4266 4267 intr_restore(iflag); 4268 return (1); 4269 } 4270 4271 /* 4272 * If this is a deferred reprogramming attempt, ensure we have 4273 * not been passed stale data: 4274 */ 4275 if ((when == DEFERRED) && 4276 (apic_reprogram_info[which_irq].valid == 0)) { 4277 /* stale info, so just return */ 4278 if (acquire_lock) 4279 lock_clear(&apic_ioapic_lock); 4280 4281 intr_restore(iflag); 4282 return (0); 4283 } 4284 4285 /* 4286 * If this interrupt has been delivered to a CPU and that CPU 4287 * has not handled it yet, we cannot reprogram the IOAPIC now: 4288 */ 4289 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) && 4290 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu, 4291 ioapic, intin_no, which_irq) != 0) { 4292 4293 if (acquire_lock) 4294 lock_clear(&apic_ioapic_lock); 4295 4296 intr_restore(iflag); 4297 return (0); 4298 } 4299 4300 /* 4301 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4302 * come in before we have a chance to reprogram it below. The 4303 * reprogramming below will simultaneously change and unmask the 4304 * RDT entry. 4305 */ 4306 4307 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4308 /* Write the RDT entry -- no specific CPU binding */ 4309 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4310 4311 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4312 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4313 4314 /* Write the vector, trigger, and polarity portion of the RDT */ 4315 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4316 AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry); 4317 if (acquire_lock) 4318 lock_clear(&apic_ioapic_lock); 4319 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4320 intr_restore(iflag); 4321 return (0); 4322 } 4323 4324 if (bind_cpu & IRQ_USER_BOUND) { 4325 cpu_infop->aci_bound++; 4326 } else { 4327 cpu_infop->aci_temp_bound++; 4328 } 4329 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4330 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4331 /* Write the RDT entry -- bind to a specific CPU: */ 4332 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4333 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4334 } 4335 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4336 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4337 } 4338 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4339 /* Write the vector, trigger, and polarity portion of the RDT */ 4340 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4341 AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry); 4342 } else { 4343 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4344 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 4345 (void) pci_msi_disable_mode(irq_ptr->airq_dip, type, 4346 irq_ptr->airq_ioapicindex); 4347 if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) { 4348 /* first one */ 4349 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4350 "apic_pci_msi_enable_vector\n")); 4351 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type, 4352 which_irq, irq_ptr->airq_vector, 4353 irq_ptr->airq_intin_no, 4354 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4355 cmn_err(CE_WARN, "pcplusmp: " 4356 "apic_pci_msi_enable_vector " 4357 "returned PSM_FAILURE"); 4358 } 4359 } 4360 if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) == 4361 irq_ptr->airq_origirq) { /* last one */ 4362 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4363 "pci_msi_enable_mode\n")); 4364 if (pci_msi_enable_mode(irq_ptr->airq_dip, type, 4365 which_irq) != DDI_SUCCESS) { 4366 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4367 "pci_msi_enable failed\n")); 4368 (void) pci_msi_unconfigure(irq_ptr->airq_dip, 4369 (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4370 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX, 4371 which_irq); 4372 } 4373 } 4374 } 4375 if (acquire_lock) 4376 lock_clear(&apic_ioapic_lock); 4377 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4378 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4379 intr_restore(iflag); 4380 return (0); 4381 } 4382 4383 /* 4384 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4385 * bit set. Sets up a timeout to perform the reprogramming at a later time 4386 * if it cannot wait for the Remote IRR bit to clear (or if waiting did not 4387 * result in the bit's clearing). 4388 * 4389 * This function will mask the RDT entry if the Remote IRR bit is set. 4390 * 4391 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4392 */ 4393 static int 4394 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4395 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq) 4396 { 4397 int32_t rdt_entry; 4398 int waited; 4399 4400 /* Mask the RDT entry, but only if it's a level-triggered interrupt */ 4401 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4402 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4403 4404 /* Mask it */ 4405 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4406 AV_MASK | rdt_entry); 4407 } 4408 4409 /* 4410 * Wait for the delivery pending bit to clear. 4411 */ 4412 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4413 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4414 4415 /* 4416 * If we're still waiting on the delivery of this interrupt, 4417 * continue to wait here until it is delivered (this should be 4418 * a very small amount of time, but include a timeout just in 4419 * case). 4420 */ 4421 for (waited = 0; waited < apic_max_usecs_clear_pending; 4422 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4423 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4424 & AV_PENDING) == 0) { 4425 break; 4426 } 4427 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4428 } 4429 4430 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4431 AV_PENDING) != 0) { 4432 cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not " 4433 "deliver interrupt to local APIC within " 4434 "%d usecs.", irq_ptr->airq_ioapicindex, 4435 irq_ptr->airq_intin_no, 4436 apic_max_usecs_clear_pending); 4437 } 4438 } 4439 4440 /* 4441 * If the remote IRR bit is set, then the interrupt has been sent 4442 * to a CPU for processing. We have no choice but to wait for 4443 * that CPU to process the interrupt, at which point the remote IRR 4444 * bit will be cleared. 4445 */ 4446 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4447 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4448 4449 /* 4450 * If the CPU that this RDT is bound to is NOT the current 4451 * CPU, wait until that CPU handles the interrupt and ACKs 4452 * it. If this interrupt is not bound to any CPU (that is, 4453 * if it's bound to the logical destination of "anyone"), it 4454 * may have been delivered to the current CPU so handle that 4455 * case by deferring the reprogramming (below). 4456 */ 4457 kpreempt_disable(); 4458 if ((old_bind_cpu != IRQ_UNBOUND) && 4459 (old_bind_cpu != IRQ_UNINIT) && 4460 (old_bind_cpu != psm_get_cpu_id())) { 4461 for (waited = 0; waited < apic_max_usecs_clear_pending; 4462 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4463 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4464 intin_no) & AV_REMOTE_IRR) == 0) { 4465 4466 /* Clear the reprogramming state: */ 4467 lock_set(&apic_ioapic_reprogram_lock); 4468 4469 apic_reprogram_info[which_irq].valid 4470 = 0; 4471 apic_reprogram_info[which_irq].bindcpu 4472 = 0; 4473 apic_reprogram_info[which_irq].timeouts 4474 = 0; 4475 4476 lock_clear(&apic_ioapic_reprogram_lock); 4477 4478 /* Remote IRR has cleared! */ 4479 kpreempt_enable(); 4480 return (0); 4481 } 4482 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4483 } 4484 } 4485 kpreempt_enable(); 4486 4487 /* 4488 * If we waited and the Remote IRR bit is still not cleared, 4489 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4490 * times for this interrupt, try the last-ditch workarounds: 4491 */ 4492 if (apic_reprogram_info[which_irq].timeouts >= 4493 APIC_REPROGRAM_MAX_TIMEOUTS) { 4494 4495 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4496 & AV_REMOTE_IRR) != 0) { 4497 /* 4498 * Trying to clear the bit through normal 4499 * channels has failed. So as a last-ditch 4500 * effort, try to set the trigger mode to 4501 * edge, then to level. This has been 4502 * observed to work on many systems. 4503 */ 4504 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4505 intin_no, 4506 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4507 intin_no) & ~AV_LEVEL); 4508 4509 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4510 intin_no, 4511 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4512 intin_no) | AV_LEVEL); 4513 4514 /* 4515 * If the bit's STILL set, declare total and 4516 * utter failure 4517 */ 4518 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4519 intin_no) & AV_REMOTE_IRR) != 0) { 4520 cmn_err(CE_WARN, "!IOAPIC %d intin %d: " 4521 "Remote IRR failed to reset " 4522 "within %d usecs. Interrupts to " 4523 "this pin may cease to function.", 4524 irq_ptr->airq_ioapicindex, 4525 irq_ptr->airq_intin_no, 4526 apic_max_usecs_clear_pending); 4527 } 4528 } 4529 /* Clear the reprogramming state: */ 4530 lock_set(&apic_ioapic_reprogram_lock); 4531 4532 apic_reprogram_info[which_irq].valid = 0; 4533 apic_reprogram_info[which_irq].bindcpu = 0; 4534 apic_reprogram_info[which_irq].timeouts = 0; 4535 4536 lock_clear(&apic_ioapic_reprogram_lock); 4537 } else { 4538 #ifdef DEBUG 4539 cmn_err(CE_WARN, "Deferring reprogramming of irq %d", 4540 which_irq); 4541 #endif /* DEBUG */ 4542 /* 4543 * If waiting for the Remote IRR bit (above) didn't 4544 * allow it to clear, defer the reprogramming: 4545 */ 4546 lock_set(&apic_ioapic_reprogram_lock); 4547 4548 apic_reprogram_info[which_irq].valid = 1; 4549 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4550 apic_reprogram_info[which_irq].timeouts++; 4551 4552 lock_clear(&apic_ioapic_reprogram_lock); 4553 4554 /* Fire up a timeout to handle this later */ 4555 (void) timeout(apic_reprogram_timeout_handler, 4556 (void *) 0, 4557 drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY)); 4558 4559 /* Inform caller to defer IOAPIC programming: */ 4560 return (1); 4561 } 4562 } 4563 return (0); 4564 } 4565 4566 /* 4567 * Timeout handler that performs the APIC reprogramming 4568 */ 4569 /*ARGSUSED*/ 4570 static void 4571 apic_reprogram_timeout_handler(void *arg) 4572 { 4573 /*LINTED: set but not used in function*/ 4574 int i, result; 4575 4576 /* Serialize access to this function */ 4577 mutex_enter(&apic_reprogram_timeout_mutex); 4578 4579 /* 4580 * For each entry in the reprogramming state that's valid, 4581 * try the reprogramming again: 4582 */ 4583 for (i = 0; i < APIC_MAX_VECTOR; i++) { 4584 if (apic_reprogram_info[i].valid == 0) 4585 continue; 4586 /* 4587 * Though we can't really do anything about errors 4588 * at this point, keep track of them for reporting. 4589 * Note that it is very possible for apic_setup_io_intr 4590 * to re-register this very timeout if the Remote IRR bit 4591 * has not yet cleared. 4592 */ 4593 result = apic_setup_io_intr_deferred(apic_irq_table[i], i); 4594 4595 #ifdef DEBUG 4596 if (result) 4597 cmn_err(CE_WARN, "apic_reprogram_timeout: " 4598 "apic_setup_io_intr returned nonzero for " 4599 "irq=%d!", i); 4600 #endif /* DEBUG */ 4601 } 4602 4603 mutex_exit(&apic_reprogram_timeout_mutex); 4604 } 4605 4606 4607 /* 4608 * Called to migrate all interrupts at an irq to another cpu. safe 4609 * if true means we are not being called from an interrupt 4610 * context and hence it is safe to do a lock_set. If false 4611 * do only a lock_try and return failure ( non 0 ) if we cannot get it 4612 */ 4613 int 4614 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe) 4615 { 4616 apic_irq_t *irqptr = irq_ptr; 4617 int retval = 0; 4618 int iflag; 4619 4620 iflag = intr_clear(); 4621 if (!safe) { 4622 if (lock_try(&apic_ioapic_lock) == 0) { 4623 intr_restore(iflag); 4624 return (1); 4625 } 4626 } else 4627 lock_set(&apic_ioapic_lock); 4628 4629 while (irqptr) { 4630 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4631 retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE); 4632 irqptr = irqptr->airq_next; 4633 } 4634 lock_clear(&apic_ioapic_lock); 4635 intr_restore(iflag); 4636 return (retval); 4637 } 4638 4639 /* 4640 * apic_intr_redistribute does all the messy computations for identifying 4641 * which interrupt to move to which CPU. Currently we do just one interrupt 4642 * at a time. This reduces the time we spent doing all this within clock 4643 * interrupt. When it is done in idle, we could do more than 1. 4644 * First we find the most busy and the most free CPU (time in ISR only) 4645 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4646 * Then we look for IRQs which are closest to the difference between the 4647 * most busy CPU and the average ISR load. We try to find one whose load 4648 * is less than difference.If none exists, then we chose one larger than the 4649 * difference, provided it does not make the most idle CPU worse than the 4650 * most busy one. In the end, we clear all the busy fields for CPUs. For 4651 * IRQs, they are cleared as they are scanned. 4652 */ 4653 static void 4654 apic_intr_redistribute() 4655 { 4656 int busiest_cpu, most_free_cpu; 4657 int cpu_free, cpu_busy, max_busy, min_busy; 4658 int min_free, diff; 4659 int average_busy, cpus_online; 4660 int i, busy; 4661 apic_cpus_info_t *cpu_infop; 4662 apic_irq_t *min_busy_irq = NULL; 4663 apic_irq_t *max_busy_irq = NULL; 4664 4665 busiest_cpu = most_free_cpu = -1; 4666 cpu_free = cpu_busy = max_busy = average_busy = 0; 4667 min_free = apic_sample_factor_redistribution; 4668 cpus_online = 0; 4669 /* 4670 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4671 * without ioapic_lock. That is OK as we are just doing statistical 4672 * sampling anyway and any inaccuracy now will get corrected next time 4673 * The call to rebind which actually changes things will make sure 4674 * we are consistent. 4675 */ 4676 for (i = 0; i < apic_nproc; i++) { 4677 if (!(apic_redist_cpu_skip & (1 << i)) && 4678 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4679 4680 cpu_infop = &apic_cpus[i]; 4681 /* 4682 * If no unbound interrupts or only 1 total on this 4683 * CPU, skip 4684 */ 4685 if (!cpu_infop->aci_temp_bound || 4686 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4687 == 1) { 4688 apic_redist_cpu_skip |= 1 << i; 4689 continue; 4690 } 4691 4692 busy = cpu_infop->aci_busy; 4693 average_busy += busy; 4694 cpus_online++; 4695 if (max_busy < busy) { 4696 max_busy = busy; 4697 busiest_cpu = i; 4698 } 4699 if (min_free > busy) { 4700 min_free = busy; 4701 most_free_cpu = i; 4702 } 4703 if (busy > apic_int_busy_mark) { 4704 cpu_busy |= 1 << i; 4705 } else { 4706 if (busy < apic_int_free_mark) 4707 cpu_free |= 1 << i; 4708 } 4709 } 4710 } 4711 if ((cpu_busy && cpu_free) || 4712 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4713 4714 apic_num_imbalance++; 4715 #ifdef DEBUG 4716 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4717 prom_printf( 4718 "redistribute busy=%x free=%x max=%x min=%x", 4719 cpu_busy, cpu_free, max_busy, min_free); 4720 } 4721 #endif /* DEBUG */ 4722 4723 4724 average_busy /= cpus_online; 4725 4726 diff = max_busy - average_busy; 4727 min_busy = max_busy; /* start with the max possible value */ 4728 max_busy = 0; 4729 min_busy_irq = max_busy_irq = NULL; 4730 i = apic_min_device_irq; 4731 for (; i < apic_max_device_irq; i++) { 4732 apic_irq_t *irq_ptr; 4733 /* Change to linked list per CPU ? */ 4734 if ((irq_ptr = apic_irq_table[i]) == NULL) 4735 continue; 4736 /* Check for irq_busy & decide which one to move */ 4737 /* Also zero them for next round */ 4738 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4739 irq_ptr->airq_busy) { 4740 if (irq_ptr->airq_busy < diff) { 4741 /* 4742 * Check for least busy CPU, 4743 * best fit or what ? 4744 */ 4745 if (max_busy < irq_ptr->airq_busy) { 4746 /* 4747 * Most busy within the 4748 * required differential 4749 */ 4750 max_busy = irq_ptr->airq_busy; 4751 max_busy_irq = irq_ptr; 4752 } 4753 } else { 4754 if (min_busy > irq_ptr->airq_busy) { 4755 /* 4756 * least busy, but more than 4757 * the reqd diff 4758 */ 4759 if (min_busy < 4760 (diff + average_busy - 4761 min_free)) { 4762 /* 4763 * Making sure new cpu 4764 * will not end up 4765 * worse 4766 */ 4767 min_busy = 4768 irq_ptr->airq_busy; 4769 4770 min_busy_irq = irq_ptr; 4771 } 4772 } 4773 } 4774 } 4775 irq_ptr->airq_busy = 0; 4776 } 4777 4778 if (max_busy_irq != NULL) { 4779 #ifdef DEBUG 4780 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4781 prom_printf("rebinding %x to %x", 4782 max_busy_irq->airq_vector, most_free_cpu); 4783 } 4784 #endif /* DEBUG */ 4785 if (apic_rebind_all(max_busy_irq, most_free_cpu, 0) 4786 == 0) 4787 /* Make change permenant */ 4788 max_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4789 } else if (min_busy_irq != NULL) { 4790 #ifdef DEBUG 4791 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4792 prom_printf("rebinding %x to %x", 4793 min_busy_irq->airq_vector, most_free_cpu); 4794 } 4795 #endif /* DEBUG */ 4796 4797 if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) == 4798 0) 4799 /* Make change permenant */ 4800 min_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4801 } else { 4802 if (cpu_busy != (1 << busiest_cpu)) { 4803 apic_redist_cpu_skip |= 1 << busiest_cpu; 4804 /* 4805 * We leave cpu_skip set so that next time we 4806 * can choose another cpu 4807 */ 4808 } 4809 } 4810 apic_num_rebind++; 4811 } else { 4812 /* 4813 * found nothing. Could be that we skipped over valid CPUs 4814 * or we have balanced everything. If we had a variable 4815 * ticks_for_redistribution, it could be increased here. 4816 * apic_int_busy, int_free etc would also need to be 4817 * changed. 4818 */ 4819 if (apic_redist_cpu_skip) 4820 apic_redist_cpu_skip = 0; 4821 } 4822 for (i = 0; i < apic_nproc; i++) { 4823 apic_cpus[i].aci_busy = 0; 4824 } 4825 } 4826 4827 static void 4828 apic_cleanup_busy() 4829 { 4830 int i; 4831 apic_irq_t *irq_ptr; 4832 4833 for (i = 0; i < apic_nproc; i++) { 4834 apic_cpus[i].aci_busy = 0; 4835 } 4836 4837 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 4838 if ((irq_ptr = apic_irq_table[i]) != NULL) 4839 irq_ptr->airq_busy = 0; 4840 } 4841 apic_skipped_redistribute = 0; 4842 } 4843 4844 4845 /* 4846 * This function will reprogram the timer. 4847 * 4848 * When in oneshot mode the argument is the absolute time in future to 4849 * generate the interrupt at. 4850 * 4851 * When in periodic mode, the argument is the interval at which the 4852 * interrupts should be generated. There is no need to support the periodic 4853 * mode timer change at this time. 4854 */ 4855 static void 4856 apic_timer_reprogram(hrtime_t time) 4857 { 4858 hrtime_t now; 4859 uint_t ticks; 4860 4861 /* 4862 * We should be called from high PIL context (CBE_HIGH_PIL), 4863 * so kpreempt is disabled. 4864 */ 4865 4866 if (!apic_oneshot) { 4867 /* time is the interval for periodic mode */ 4868 ticks = (uint_t)((time) / apic_nsec_per_tick); 4869 } else { 4870 /* one shot mode */ 4871 4872 now = gethrtime(); 4873 4874 if (time <= now) { 4875 /* 4876 * requested to generate an interrupt in the past 4877 * generate an interrupt as soon as possible 4878 */ 4879 ticks = apic_min_timer_ticks; 4880 } else if ((time - now) > apic_nsec_max) { 4881 /* 4882 * requested to generate an interrupt at a time 4883 * further than what we are capable of. Set to max 4884 * the hardware can handle 4885 */ 4886 4887 ticks = APIC_MAXVAL; 4888 #ifdef DEBUG 4889 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 4890 " %lld too far in future, current time" 4891 " %lld \n", time, now); 4892 #endif /* DEBUG */ 4893 } else 4894 ticks = (uint_t)((time - now) / apic_nsec_per_tick); 4895 } 4896 4897 if (ticks < apic_min_timer_ticks) 4898 ticks = apic_min_timer_ticks; 4899 4900 apicadr[APIC_INIT_COUNT] = ticks; 4901 4902 } 4903 4904 /* 4905 * This function will enable timer interrupts. 4906 */ 4907 static void 4908 apic_timer_enable(void) 4909 { 4910 /* 4911 * We should be Called from high PIL context (CBE_HIGH_PIL), 4912 * so kpreempt is disabled. 4913 */ 4914 4915 if (!apic_oneshot) 4916 apicadr[APIC_LOCAL_TIMER] = 4917 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 4918 else { 4919 /* one shot */ 4920 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 4921 } 4922 } 4923 4924 /* 4925 * This function will disable timer interrupts. 4926 */ 4927 static void 4928 apic_timer_disable(void) 4929 { 4930 /* 4931 * We should be Called from high PIL context (CBE_HIGH_PIL), 4932 * so kpreempt is disabled. 4933 */ 4934 4935 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 4936 } 4937 4938 4939 cyclic_id_t apic_cyclic_id; 4940 4941 /* 4942 * If this module needs to be a consumer of cyclic subsystem, they 4943 * can be added here, since at this time kernel cyclic subsystem is initialized 4944 * argument is not currently used, and is reserved for future. 4945 */ 4946 static void 4947 apic_post_cyclic_setup(void *arg) 4948 { 4949 _NOTE(ARGUNUSED(arg)) 4950 cyc_handler_t hdlr; 4951 cyc_time_t when; 4952 4953 /* cpu_lock is held */ 4954 4955 /* set up cyclics for intr redistribution */ 4956 4957 /* 4958 * In peridoc mode intr redistribution processing is done in 4959 * apic_intr_enter during clk intr processing 4960 */ 4961 if (!apic_oneshot) 4962 return; 4963 4964 hdlr.cyh_level = CY_LOW_LEVEL; 4965 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 4966 hdlr.cyh_arg = NULL; 4967 4968 when.cyt_when = 0; 4969 when.cyt_interval = apic_redistribute_sample_interval; 4970 apic_cyclic_id = cyclic_add(&hdlr, &when); 4971 4972 4973 } 4974 4975 static void 4976 apic_redistribute_compute(void) 4977 { 4978 int i, j, max_busy; 4979 4980 if (apic_enable_dynamic_migration) { 4981 if (++apic_nticks == apic_sample_factor_redistribution) { 4982 /* 4983 * Time to call apic_intr_redistribute(). 4984 * reset apic_nticks. This will cause max_busy 4985 * to be calculated below and if it is more than 4986 * apic_int_busy, we will do the whole thing 4987 */ 4988 apic_nticks = 0; 4989 } 4990 max_busy = 0; 4991 for (i = 0; i < apic_nproc; i++) { 4992 4993 /* 4994 * Check if curipl is non zero & if ISR is in 4995 * progress 4996 */ 4997 if (((j = apic_cpus[i].aci_curipl) != 0) && 4998 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 4999 5000 int irq; 5001 apic_cpus[i].aci_busy++; 5002 irq = apic_cpus[i].aci_current[j]; 5003 apic_irq_table[irq]->airq_busy++; 5004 } 5005 5006 if (!apic_nticks && 5007 (apic_cpus[i].aci_busy > max_busy)) 5008 max_busy = apic_cpus[i].aci_busy; 5009 } 5010 if (!apic_nticks) { 5011 if (max_busy > apic_int_busy_mark) { 5012 /* 5013 * We could make the following check be 5014 * skipped > 1 in which case, we get a 5015 * redistribution at half the busy mark (due to 5016 * double interval). Need to be able to collect 5017 * more empirical data to decide if that is a 5018 * good strategy. Punt for now. 5019 */ 5020 if (apic_skipped_redistribute) 5021 apic_cleanup_busy(); 5022 else 5023 apic_intr_redistribute(); 5024 } else 5025 apic_skipped_redistribute++; 5026 } 5027 } 5028 } 5029 5030 5031 static int 5032 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5033 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5034 { 5035 5036 int status; 5037 acpi_psm_lnk_t acpipsmlnk; 5038 5039 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5040 intr_flagp)) == ACPI_PSM_SUCCESS) { 5041 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5042 "from cache for device %s, instance #%d\n", *pci_irqp, 5043 ddi_get_name(dip), ddi_get_instance(dip))); 5044 return (status); 5045 } 5046 5047 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5048 5049 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5050 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5051 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5052 " acpi_translate_pci_irq failed for device %s, instance" 5053 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5054 return (status); 5055 } 5056 5057 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5058 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5059 intr_flagp); 5060 if (status != ACPI_PSM_SUCCESS) { 5061 status = acpi_get_current_irq_resource(&acpipsmlnk, 5062 pci_irqp, intr_flagp); 5063 } 5064 } 5065 5066 if (status == ACPI_PSM_SUCCESS) { 5067 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5068 intr_flagp, &acpipsmlnk); 5069 5070 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5071 "new irq %d for device %s, instance #%d\n", 5072 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5073 } 5074 5075 return (status); 5076 } 5077 5078 /* 5079 * Configures the irq for the interrupt link device identified by 5080 * acpipsmlnkp. 5081 * 5082 * Gets the current and the list of possible irq settings for the 5083 * device. If apic_unconditional_srs is not set, and the current 5084 * resource setting is in the list of possible irq settings, 5085 * current irq resource setting is passed to the caller. 5086 * 5087 * Otherwise, picks an irq number from the list of possible irq 5088 * settings, and sets the irq of the device to this value. 5089 * If prefer_crs is set, among a set of irq numbers in the list that have 5090 * the least number of devices sharing the interrupt, we pick current irq 5091 * resource setting if it is a member of this set. 5092 * 5093 * Passes the irq number in the value pointed to by pci_irqp, and 5094 * polarity and sensitivity in the structure pointed to by dipintrflagp 5095 * to the caller. 5096 * 5097 * Note that if setting the irq resource failed, but successfuly obtained 5098 * the current irq resource settings, passes the current irq resources 5099 * and considers it a success. 5100 * 5101 * Returns: 5102 * ACPI_PSM_SUCCESS on success. 5103 * 5104 * ACPI_PSM_FAILURE if an error occured during the configuration or 5105 * if a suitable irq was not found for this device, or if setting the 5106 * irq resource and obtaining the current resource fails. 5107 * 5108 */ 5109 static int 5110 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5111 int *pci_irqp, iflag_t *dipintr_flagp) 5112 { 5113 5114 int i, min_share, foundnow, done = 0; 5115 int32_t irq; 5116 int32_t share_irq = -1; 5117 int32_t chosen_irq = -1; 5118 int cur_irq = -1; 5119 acpi_irqlist_t *irqlistp; 5120 acpi_irqlist_t *irqlistent; 5121 5122 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5123 == ACPI_PSM_FAILURE) { 5124 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5125 "or assign IRQ for device %s, instance #%d: The system was " 5126 "unable to get the list of potential IRQs from ACPI.", 5127 ddi_get_name(dip), ddi_get_instance(dip))); 5128 5129 return (ACPI_PSM_FAILURE); 5130 } 5131 5132 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5133 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5134 (cur_irq > 0)) { 5135 /* 5136 * If an IRQ is set in CRS and that IRQ exists in the set 5137 * returned from _PRS, return that IRQ, otherwise print 5138 * a warning 5139 */ 5140 5141 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5142 == ACPI_PSM_SUCCESS) { 5143 5144 acpi_free_irqlist(irqlistp); 5145 ASSERT(pci_irqp != NULL); 5146 *pci_irqp = cur_irq; 5147 return (ACPI_PSM_SUCCESS); 5148 } 5149 5150 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5151 "current irq %d for device %s, instance #%d in ACPI's " 5152 "list of possible irqs for this device. Picking one from " 5153 " the latter list.", cur_irq, ddi_get_name(dip), 5154 ddi_get_instance(dip))); 5155 } 5156 5157 irqlistent = irqlistp; 5158 min_share = 255; 5159 5160 while (irqlistent != NULL) { 5161 irqlistent->intr_flags.bustype = BUS_PCI; 5162 5163 for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) { 5164 5165 irq = irqlistent->irqs[i]; 5166 5167 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5168 continue; 5169 5170 if (irq == 0) { 5171 /* invalid irq number */ 5172 continue; 5173 } 5174 5175 if ((apic_irq_table[irq] == NULL) || 5176 (apic_irq_table[irq]->airq_dip == dip)) { 5177 chosen_irq = irq; 5178 foundnow = 1; 5179 /* 5180 * If we do not prefer current irq from crs 5181 * or if we do and this irq is the same as 5182 * current irq from crs, this is the one 5183 * to pick. 5184 */ 5185 if (!(apic_prefer_crs) || (irq == cur_irq)) { 5186 done = 1; 5187 break; 5188 } 5189 continue; 5190 } 5191 5192 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5193 continue; 5194 5195 if (!acpi_intr_compatible(irqlistent->intr_flags, 5196 apic_irq_table[irq]->airq_iflag)) 5197 continue; 5198 5199 if ((apic_irq_table[irq]->airq_share < min_share) || 5200 ((apic_irq_table[irq]->airq_share == min_share) && 5201 (cur_irq == irq) && (apic_prefer_crs))) { 5202 min_share = apic_irq_table[irq]->airq_share; 5203 share_irq = irq; 5204 foundnow = 1; 5205 } 5206 } 5207 5208 /* 5209 * If we found an IRQ in the inner loop this time, save the 5210 * details from the irqlist for later use. 5211 */ 5212 if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) { 5213 /* 5214 * Copy the acpi_prs_private_t and flags from this 5215 * irq list entry, since we found an irq from this 5216 * entry. 5217 */ 5218 acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv; 5219 *dipintr_flagp = irqlistent->intr_flags; 5220 } 5221 5222 if (done) 5223 break; 5224 5225 /* Go to the next irqlist entry */ 5226 irqlistent = irqlistent->next; 5227 } 5228 5229 5230 acpi_free_irqlist(irqlistp); 5231 if (chosen_irq != -1) 5232 irq = chosen_irq; 5233 else if (share_irq != -1) 5234 irq = share_irq; 5235 else { 5236 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5237 "suitable irq from the list of possible irqs for device " 5238 "%s, instance #%d in ACPI's list of possible irqs", 5239 ddi_get_name(dip), ddi_get_instance(dip))); 5240 return (ACPI_PSM_FAILURE); 5241 } 5242 5243 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s " 5244 "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip))); 5245 5246 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) { 5247 /* 5248 * setting irq was successful, check to make sure CRS 5249 * reflects that. If CRS does not agree with what we 5250 * set, return the irq that was set. 5251 */ 5252 5253 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5254 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5255 5256 if (cur_irq != irq) 5257 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: " 5258 "IRQ resource set (irqno %d) for device %s " 5259 "instance #%d, differs from current " 5260 "setting irqno %d", 5261 irq, ddi_get_name(dip), 5262 ddi_get_instance(dip), cur_irq)); 5263 } 5264 5265 /* 5266 * return the irq that was set, and not what CRS reports, 5267 * since CRS has been seen to be bogus on some systems 5268 */ 5269 cur_irq = irq; 5270 } else { 5271 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d " 5272 "failed for device %s instance #%d", 5273 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5274 5275 if (cur_irq == -1) 5276 return (ACPI_PSM_FAILURE); 5277 } 5278 5279 ASSERT(pci_irqp != NULL); 5280 *pci_irqp = cur_irq; 5281 return (ACPI_PSM_SUCCESS); 5282 } 5283