1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 30 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 31 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 32 * PSMI 1.5 extensions are supported in Solaris Nevada. 33 */ 34 #define PSMI_1_5 35 36 #include <sys/processor.h> 37 #include <sys/time.h> 38 #include <sys/psm.h> 39 #include <sys/smp_impldefs.h> 40 #include <sys/cram.h> 41 #include <sys/acpi/acpi.h> 42 #include <sys/acpica.h> 43 #include <sys/psm_common.h> 44 #include "apic.h" 45 #include <sys/pit.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/ddi_impldefs.h> 49 #include <sys/pci.h> 50 #include <sys/promif.h> 51 #include <sys/x86_archext.h> 52 #include <sys/cpc_impl.h> 53 #include <sys/uadmin.h> 54 #include <sys/panic.h> 55 #include <sys/debug.h> 56 #include <sys/archsystm.h> 57 #include <sys/trap.h> 58 #include <sys/machsystm.h> 59 #include <sys/cpuvar.h> 60 #include <sys/rm_platter.h> 61 #include <sys/privregs.h> 62 #include <sys/cyclic.h> 63 #include <sys/note.h> 64 #include <sys/pci_intr_lib.h> 65 66 /* 67 * Local Function Prototypes 68 */ 69 static void apic_init_intr(); 70 static void apic_ret(); 71 static int apic_handle_defconf(); 72 static int apic_parse_mpct(caddr_t mpct, int bypass); 73 static struct apic_mpfps_hdr *apic_find_fps_sig(caddr_t fptr, int size); 74 static int apic_checksum(caddr_t bptr, int len); 75 static int get_apic_cmd1(); 76 static int get_apic_pri(); 77 static int apic_find_bus_type(char *bus); 78 static int apic_find_bus(int busid); 79 static int apic_find_bus_id(int bustype); 80 static struct apic_io_intr *apic_find_io_intr(int irqno); 81 int apic_allocate_irq(int irq); 82 static int apic_find_free_irq(int start, int end); 83 static uchar_t apic_allocate_vector(int ipl, int irq, int pri); 84 static void apic_modify_vector(uchar_t vector, int irq); 85 static void apic_mark_vector(uchar_t oldvector, uchar_t newvector); 86 static uchar_t apic_xlate_vector(uchar_t oldvector); 87 static void apic_xlate_vector_free_timeout_handler(void *arg); 88 static void apic_free_vector(uchar_t vector); 89 static void apic_reprogram_timeout_handler(void *arg); 90 static int apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 91 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq); 92 static int apic_setup_io_intr(apic_irq_t *irqptr, int irq); 93 static int apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq); 94 static void apic_record_rdt_entry(apic_irq_t *irqptr, int irq); 95 static struct apic_io_intr *apic_find_io_intr_w_busid(int irqno, int busid); 96 static int apic_find_intin(uchar_t ioapic, uchar_t intin); 97 static int apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, 98 int child_ipin, struct apic_io_intr **intrp); 99 static int apic_setup_irq_table(dev_info_t *dip, int irqno, 100 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *intr_flagp, 101 int type); 102 static int apic_setup_sci_irq_table(int irqno, uchar_t ipl, 103 iflag_t *intr_flagp); 104 static void apic_nmi_intr(caddr_t arg); 105 uchar_t apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, 106 uchar_t intin); 107 static int apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, 108 int when); 109 int apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe); 110 static void apic_intr_redistribute(); 111 static void apic_cleanup_busy(); 112 static void apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp); 113 int apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type); 114 115 /* ACPI support routines */ 116 static int acpi_probe(void); 117 static int apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 118 int *pci_irqp, iflag_t *intr_flagp); 119 120 static int apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 121 int ipin, int *pci_irqp, iflag_t *intr_flagp); 122 static uchar_t acpi_find_ioapic(int irq); 123 static int acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2); 124 125 /* 126 * standard MP entries 127 */ 128 static int apic_probe(); 129 static int apic_clkinit(); 130 static int apic_getclkirq(int ipl); 131 static uint_t apic_calibrate(volatile uint32_t *addr, 132 uint16_t *pit_ticks_adj); 133 static hrtime_t apic_gettime(); 134 static hrtime_t apic_gethrtime(); 135 static void apic_init(); 136 static void apic_picinit(void); 137 static void apic_cpu_start(processorid_t cpun, caddr_t rm_code); 138 static int apic_post_cpu_start(void); 139 static void apic_send_ipi(int cpun, int ipl); 140 static void apic_set_softintr(int softintr); 141 static void apic_set_idlecpu(processorid_t cpun); 142 static void apic_unset_idlecpu(processorid_t cpun); 143 static int apic_softlvl_to_irq(int ipl); 144 static int apic_intr_enter(int ipl, int *vect); 145 static void apic_intr_exit(int ipl, int vect); 146 static void apic_setspl(int ipl); 147 static int apic_addspl(int ipl, int vector, int min_ipl, int max_ipl); 148 static int apic_delspl(int ipl, int vector, int min_ipl, int max_ipl); 149 static void apic_shutdown(int cmd, int fcn); 150 static void apic_preshutdown(int cmd, int fcn); 151 static int apic_disable_intr(processorid_t cpun); 152 static void apic_enable_intr(processorid_t cpun); 153 static processorid_t apic_get_next_processorid(processorid_t cpun); 154 static int apic_get_ipivect(int ipl, int type); 155 static void apic_timer_reprogram(hrtime_t time); 156 static void apic_timer_enable(void); 157 static void apic_timer_disable(void); 158 static void apic_post_cyclic_setup(void *arg); 159 extern int apic_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *, 160 psm_intr_op_t, int *); 161 162 static int apic_oneshot = 0; 163 int apic_oneshot_enable = 1; /* to allow disabling one-shot capability */ 164 165 /* 166 * These variables are frequently accessed in apic_intr_enter(), 167 * apic_intr_exit and apic_setspl, so group them together 168 */ 169 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 170 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 171 int apic_clkvect; 172 173 /* ACPI SCI interrupt configuration; -1 if SCI not used */ 174 int apic_sci_vect = -1; 175 iflag_t apic_sci_flags; 176 177 /* vector at which error interrupts come in */ 178 int apic_errvect; 179 int apic_enable_error_intr = 1; 180 int apic_error_display_delay = 100; 181 182 /* vector at which performance counter overflow interrupts come in */ 183 int apic_cpcovf_vect; 184 int apic_enable_cpcovf_intr = 1; 185 186 /* Max wait time (in microsecs) for flags to clear in an RDT entry. */ 187 static int apic_max_usecs_clear_pending = 1000; 188 189 /* Amt of usecs to wait before checking if RDT flags have reset. */ 190 #define APIC_USECS_PER_WAIT_INTERVAL 100 191 192 /* Maximum number of times to retry reprogramming via the timeout */ 193 #define APIC_REPROGRAM_MAX_TIMEOUTS 10 194 195 /* timeout delay for IOAPIC delayed reprogramming */ 196 #define APIC_REPROGRAM_TIMEOUT_DELAY 5 /* microseconds */ 197 198 /* Parameter to apic_rebind(): Should reprogramming be done now or later? */ 199 #define DEFERRED 1 200 #define IMMEDIATE 0 201 202 /* 203 * number of bits per byte, from <sys/param.h> 204 */ 205 #define UCHAR_MAX ((1 << NBBY) - 1) 206 207 uchar_t apic_reserved_irqlist[MAX_ISA_IRQ + 1]; 208 209 /* 210 * The following vector assignments influence the value of ipltopri and 211 * vectortoipl. Note that vectors 0 - 0x1f are not used. We can program 212 * idle to 0 and IPL 0 to 0x10 to differentiate idle in case 213 * we care to do so in future. Note some IPLs which are rarely used 214 * will share the vector ranges and heavily used IPLs (5 and 6) have 215 * a wide range. 216 * IPL Vector range. as passed to intr_enter 217 * 0 none. 218 * 1,2,3 0x20-0x2f 0x0-0xf 219 * 4 0x30-0x3f 0x10-0x1f 220 * 5 0x40-0x5f 0x20-0x3f 221 * 6 0x60-0x7f 0x40-0x5f 222 * 7,8,9 0x80-0x8f 0x60-0x6f 223 * 10 0x90-0x9f 0x70-0x7f 224 * 11 0xa0-0xaf 0x80-0x8f 225 * ... ... 226 * 16 0xf0-0xff 0xd0-0xdf 227 */ 228 uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = { 229 3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 16 230 }; 231 /* 232 * The ipl of an ISR at vector X is apic_vectortoipl[X<<4] 233 * NOTE that this is vector as passed into intr_enter which is 234 * programmed vector - 0x20 (APIC_BASE_VECT) 235 */ 236 237 uchar_t apic_ipltopri[MAXIPL + 1]; /* unix ipl to apic pri */ 238 /* The taskpri to be programmed into apic to mask given ipl */ 239 240 #if defined(__amd64) 241 uchar_t apic_cr8pri[MAXIPL + 1]; /* unix ipl to cr8 pri */ 242 #endif 243 244 /* 245 * Patchable global variables. 246 */ 247 int apic_forceload = 0; 248 249 #define INTR_ROUND_ROBIN_WITH_AFFINITY 0 250 #define INTR_ROUND_ROBIN 1 251 #define INTR_LOWEST_PRIORITY 2 252 253 int apic_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY; 254 255 static int apic_next_bind_cpu = 1; /* For round robin assignment */ 256 /* start with cpu 1 */ 257 258 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 259 /* 1 - use gettime() for performance */ 260 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 261 int apic_enable_hwsoftint = 0; /* 0 - disable, 1 - enable */ 262 int apic_enable_bind_log = 1; /* 1 - display interrupt binding log */ 263 int apic_panic_on_nmi = 0; 264 int apic_panic_on_apic_error = 0; 265 266 int apic_verbose = 0; 267 268 /* Flag definitions for apic_verbose */ 269 #define APIC_VERBOSE_IOAPIC_FLAG 0x00000001 270 #define APIC_VERBOSE_IRQ_FLAG 0x00000002 271 #define APIC_VERBOSE_POWEROFF_FLAG 0x00000004 272 #define APIC_VERBOSE_POWEROFF_PAUSE_FLAG 0x00000008 273 274 275 #define APIC_VERBOSE_IOAPIC(fmt) \ 276 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) \ 277 cmn_err fmt; 278 279 #define APIC_VERBOSE_IRQ(fmt) \ 280 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) \ 281 cmn_err fmt; 282 283 #define APIC_VERBOSE_POWEROFF(fmt) \ 284 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) \ 285 prom_printf fmt; 286 287 288 /* Now the ones for Dynamic Interrupt distribution */ 289 int apic_enable_dynamic_migration = 0; 290 291 /* 292 * If enabled, the distribution works as follows: 293 * On every interrupt entry, the current ipl for the CPU is set in cpu_info 294 * and the irq corresponding to the ipl is also set in the aci_current array. 295 * interrupt exit and setspl (due to soft interrupts) will cause the current 296 * ipl to be be changed. This is cache friendly as these frequently used 297 * paths write into a per cpu structure. 298 * 299 * Sampling is done by checking the structures for all CPUs and incrementing 300 * the busy field of the irq (if any) executing on each CPU and the busy field 301 * of the corresponding CPU. 302 * In periodic mode this is done on every clock interrupt. 303 * In one-shot mode, this is done thru a cyclic with an interval of 304 * apic_redistribute_sample_interval (default 10 milli sec). 305 * 306 * Every apic_sample_factor_redistribution times we sample, we do computations 307 * to decide which interrupt needs to be migrated (see comments 308 * before apic_intr_redistribute(). 309 */ 310 311 /* 312 * Following 3 variables start as % and can be patched or set using an 313 * API to be defined in future. They will be scaled to 314 * sample_factor_redistribution which is in turn set to hertz+1 (in periodic 315 * mode), or 101 in one-shot mode to stagger it away from one sec processing 316 */ 317 318 int apic_int_busy_mark = 60; 319 int apic_int_free_mark = 20; 320 int apic_diff_for_redistribution = 10; 321 322 /* sampling interval for interrupt redistribution for dynamic migration */ 323 int apic_redistribute_sample_interval = NANOSEC / 100; /* 10 millisec */ 324 325 /* 326 * number of times we sample before deciding to redistribute interrupts 327 * for dynamic migration 328 */ 329 int apic_sample_factor_redistribution = 101; 330 331 /* timeout for xlate_vector, mark_vector */ 332 int apic_revector_timeout = 16 * 10000; /* 160 millisec */ 333 334 int apic_redist_cpu_skip = 0; 335 int apic_num_imbalance = 0; 336 int apic_num_rebind = 0; 337 338 int apic_nproc = 0; 339 int apic_defconf = 0; 340 int apic_irq_translate = 0; 341 int apic_spec_rev = 0; 342 int apic_imcrp = 0; 343 344 int apic_use_acpi = 1; /* 1 = use ACPI, 0 = don't use ACPI */ 345 int apic_use_acpi_madt_only = 0; /* 1=ONLY use MADT from ACPI */ 346 347 /* 348 * For interrupt link devices, if apic_unconditional_srs is set, an irq resource 349 * will be assigned (via _SRS). If it is not set, use the current 350 * irq setting (via _CRS), but only if that irq is in the set of possible 351 * irqs (returned by _PRS) for the device. 352 */ 353 int apic_unconditional_srs = 1; 354 355 /* 356 * For interrupt link devices, if apic_prefer_crs is set when we are 357 * assigning an IRQ resource to a device, prefer the current IRQ setting 358 * over other possible irq settings under same conditions. 359 */ 360 361 int apic_prefer_crs = 1; 362 363 364 /* minimum number of timer ticks to program to */ 365 int apic_min_timer_ticks = 1; 366 /* 367 * Local static data 368 */ 369 static struct psm_ops apic_ops = { 370 apic_probe, 371 372 apic_init, 373 apic_picinit, 374 apic_intr_enter, 375 apic_intr_exit, 376 apic_setspl, 377 apic_addspl, 378 apic_delspl, 379 apic_disable_intr, 380 apic_enable_intr, 381 apic_softlvl_to_irq, 382 apic_set_softintr, 383 384 apic_set_idlecpu, 385 apic_unset_idlecpu, 386 387 apic_clkinit, 388 apic_getclkirq, 389 (void (*)(void))NULL, /* psm_hrtimeinit */ 390 apic_gethrtime, 391 392 apic_get_next_processorid, 393 apic_cpu_start, 394 apic_post_cpu_start, 395 apic_shutdown, 396 apic_get_ipivect, 397 apic_send_ipi, 398 399 (int (*)(dev_info_t *, int))NULL, /* psm_translate_irq */ 400 (int (*)(todinfo_t *))NULL, /* psm_tod_get */ 401 (int (*)(todinfo_t *))NULL, /* psm_tod_set */ 402 (void (*)(int, char *))NULL, /* psm_notify_error */ 403 (void (*)(int))NULL, /* psm_notify_func */ 404 apic_timer_reprogram, 405 apic_timer_enable, 406 apic_timer_disable, 407 apic_post_cyclic_setup, 408 apic_preshutdown, 409 apic_intr_ops /* Advanced DDI Interrupt framework */ 410 }; 411 412 413 static struct psm_info apic_psm_info = { 414 PSM_INFO_VER01_5, /* version */ 415 PSM_OWN_EXCLUSIVE, /* ownership */ 416 (struct psm_ops *)&apic_ops, /* operation */ 417 "pcplusmp", /* machine name */ 418 "pcplusmp v1.4 compatible %I%", 419 }; 420 421 static void *apic_hdlp; 422 423 #ifdef DEBUG 424 #define DENT 0x0001 425 int apic_debug = 0; 426 /* 427 * set apic_restrict_vector to the # of vectors we want to allow per range 428 * useful in testing shared interrupt logic by setting it to 2 or 3 429 */ 430 int apic_restrict_vector = 0; 431 432 #define APIC_DEBUG_MSGBUFSIZE 2048 433 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 434 int apic_debug_msgbufindex = 0; 435 436 /* 437 * Put "int" info into debug buffer. No MP consistency, but light weight. 438 * Good enough for most debugging. 439 */ 440 #define APIC_DEBUG_BUF_PUT(x) \ 441 apic_debug_msgbuf[apic_debug_msgbufindex++] = x; \ 442 if (apic_debug_msgbufindex >= (APIC_DEBUG_MSGBUFSIZE - NCPU)) \ 443 apic_debug_msgbufindex = 0; 444 445 #endif /* DEBUG */ 446 447 apic_cpus_info_t *apic_cpus; 448 449 static cpuset_t apic_cpumask; 450 static uint_t apic_flag; 451 452 /* Flag to indicate that we need to shut down all processors */ 453 static uint_t apic_shutdown_processors; 454 455 uint_t apic_nsec_per_intr = 0; 456 457 /* 458 * apic_let_idle_redistribute can have the following values: 459 * 0 - If clock decremented it from 1 to 0, clock has to call redistribute. 460 * apic_redistribute_lock prevents multiple idle cpus from redistributing 461 */ 462 int apic_num_idle_redistributions = 0; 463 static int apic_let_idle_redistribute = 0; 464 static uint_t apic_nticks = 0; 465 static uint_t apic_skipped_redistribute = 0; 466 467 /* to gather intr data and redistribute */ 468 static void apic_redistribute_compute(void); 469 470 static uint_t last_count_read = 0; 471 static lock_t apic_gethrtime_lock; 472 volatile int apic_hrtime_stamp = 0; 473 volatile hrtime_t apic_nsec_since_boot = 0; 474 static uint_t apic_hertz_count, apic_nsec_per_tick; 475 static hrtime_t apic_nsec_max; 476 477 static hrtime_t apic_last_hrtime = 0; 478 int apic_hrtime_error = 0; 479 int apic_remote_hrterr = 0; 480 int apic_num_nmis = 0; 481 int apic_apic_error = 0; 482 int apic_num_apic_errors = 0; 483 int apic_num_cksum_errors = 0; 484 485 static uchar_t apic_io_id[MAX_IO_APIC]; 486 static uchar_t apic_io_ver[MAX_IO_APIC]; 487 static uchar_t apic_io_vectbase[MAX_IO_APIC]; 488 static uchar_t apic_io_vectend[MAX_IO_APIC]; 489 volatile int32_t *apicioadr[MAX_IO_APIC]; 490 491 /* 492 * First available slot to be used as IRQ index into the apic_irq_table 493 * for those interrupts (like MSI/X) that don't have a physical IRQ. 494 */ 495 int apic_first_avail_irq = APIC_FIRST_FREE_IRQ; 496 497 /* 498 * apic_ioapic_lock protects the ioapics (reg select), the status, temp_bound 499 * and bound elements of cpus_info and the temp_cpu element of irq_struct 500 */ 501 lock_t apic_ioapic_lock; 502 503 /* 504 * apic_ioapic_reprogram_lock prevents a CPU from exiting 505 * apic_intr_exit before IOAPIC reprogramming information 506 * is collected. 507 */ 508 static lock_t apic_ioapic_reprogram_lock; 509 static int apic_io_max = 0; /* no. of i/o apics enabled */ 510 511 static struct apic_io_intr *apic_io_intrp = 0; 512 static struct apic_bus *apic_busp; 513 514 uchar_t apic_vector_to_irq[APIC_MAX_VECTOR+1]; 515 static uchar_t apic_resv_vector[MAXIPL+1]; 516 517 static char apic_level_intr[APIC_MAX_VECTOR+1]; 518 static int apic_error = 0; 519 /* values which apic_error can take. Not catastrophic, but may help debug */ 520 #define APIC_ERR_BOOT_EOI 0x1 521 #define APIC_ERR_GET_IPIVECT_FAIL 0x2 522 #define APIC_ERR_INVALID_INDEX 0x4 523 #define APIC_ERR_MARK_VECTOR_FAIL 0x8 524 #define APIC_ERR_APIC_ERROR 0x40000000 525 #define APIC_ERR_NMI 0x80000000 526 527 static int apic_cmos_ssb_set = 0; 528 529 static uint32_t eisa_level_intr_mask = 0; 530 /* At least MSB will be set if EISA bus */ 531 532 static int apic_pci_bus_total = 0; 533 static uchar_t apic_single_pci_busid = 0; 534 535 536 /* 537 * airq_mutex protects additions to the apic_irq_table - the first 538 * pointer and any airq_nexts off of that one. It also protects 539 * apic_max_device_irq & apic_min_device_irq. It also guarantees 540 * that share_id is unique as new ids are generated only when new 541 * irq_t structs are linked in. Once linked in the structs are never 542 * deleted. temp_cpu & mps_intr_index field indicate if it is programmed 543 * or allocated. Note that there is a slight gap between allocating in 544 * apic_introp_xlate and programming in addspl. 545 */ 546 kmutex_t airq_mutex; 547 apic_irq_t *apic_irq_table[APIC_MAX_VECTOR+1]; 548 int apic_max_device_irq = 0; 549 int apic_min_device_irq = APIC_MAX_VECTOR; 550 551 /* use to make sure only one cpu handles the nmi */ 552 static lock_t apic_nmi_lock; 553 /* use to make sure only one cpu handles the error interrupt */ 554 static lock_t apic_error_lock; 555 556 /* 557 * Following declarations are for revectoring; used when ISRs at different 558 * IPLs share an irq. 559 */ 560 static lock_t apic_revector_lock; 561 static int apic_revector_pending = 0; 562 static uchar_t *apic_oldvec_to_newvec; 563 static uchar_t *apic_newvec_to_oldvec; 564 565 /* Ensures that the IOAPIC-reprogramming timeout is not reentrant */ 566 static kmutex_t apic_reprogram_timeout_mutex; 567 568 static struct ioapic_reprogram_data { 569 int valid; /* This entry is valid */ 570 int bindcpu; /* The CPU to which the int will be bound */ 571 unsigned timeouts; /* # times the reprogram timeout was called */ 572 } apic_reprogram_info[APIC_MAX_VECTOR+1]; 573 /* 574 * APIC_MAX_VECTOR + 1 is the maximum # of IRQs as well. apic_reprogram_info 575 * is indexed by IRQ number, NOT by vector number. 576 */ 577 578 579 /* 580 * The following added to identify a software poweroff method if available. 581 */ 582 583 static struct { 584 int poweroff_method; 585 char oem_id[APIC_MPS_OEM_ID_LEN + 1]; /* MAX + 1 for NULL */ 586 char prod_id[APIC_MPS_PROD_ID_LEN + 1]; /* MAX + 1 for NULL */ 587 } apic_mps_ids[] = { 588 { APIC_POWEROFF_VIA_RTC, "INTEL", "ALDER" }, /* 4300 */ 589 { APIC_POWEROFF_VIA_RTC, "NCR", "AMC" }, /* 4300 */ 590 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "A450NX" }, /* 4400? */ 591 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AD450NX" }, /* 4400 */ 592 { APIC_POWEROFF_VIA_ASPEN_BMC, "INTEL", "AC450NX" }, /* 4400R */ 593 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "S450NX" }, /* S50 */ 594 { APIC_POWEROFF_VIA_SITKA_BMC, "INTEL", "SC450NX" } /* S50? */ 595 }; 596 597 int apic_poweroff_method = APIC_POWEROFF_NONE; 598 599 static struct { 600 uchar_t cntl; 601 uchar_t data; 602 } aspen_bmc[] = { 603 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 604 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 605 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 606 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 607 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 608 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 609 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 610 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 611 612 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 613 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 614 }; 615 616 static struct { 617 int port; 618 uchar_t data; 619 } sitka_bmc[] = { 620 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 621 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 622 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 623 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 624 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 625 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 626 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 627 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 628 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 629 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 630 631 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 632 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 633 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 634 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 635 }; 636 637 638 /* Patchable global variables. */ 639 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 640 int apic_debug_mps_id = 0; /* 1 - print MPS ID strings */ 641 642 /* 643 * ACPI definitions 644 */ 645 /* _PIC method arguments */ 646 #define ACPI_PIC_MODE 0 647 #define ACPI_APIC_MODE 1 648 649 /* APIC error flags we care about */ 650 #define APIC_SEND_CS_ERROR 0x01 651 #define APIC_RECV_CS_ERROR 0x02 652 #define APIC_CS_ERRORS (APIC_SEND_CS_ERROR|APIC_RECV_CS_ERROR) 653 654 /* 655 * ACPI variables 656 */ 657 /* 1 = acpi is enabled & working, 0 = acpi is not enabled or not there */ 658 static int apic_enable_acpi = 0; 659 660 /* ACPI Multiple APIC Description Table ptr */ 661 static MULTIPLE_APIC_TABLE *acpi_mapic_dtp = NULL; 662 663 /* ACPI Interrupt Source Override Structure ptr */ 664 static MADT_INTERRUPT_OVERRIDE *acpi_isop = NULL; 665 static int acpi_iso_cnt = 0; 666 667 /* ACPI Non-maskable Interrupt Sources ptr */ 668 static MADT_NMI_SOURCE *acpi_nmi_sp = NULL; 669 static int acpi_nmi_scnt = 0; 670 static MADT_LOCAL_APIC_NMI *acpi_nmi_cp = NULL; 671 static int acpi_nmi_ccnt = 0; 672 673 /* 674 * extern declarations 675 */ 676 extern int intr_clear(void); 677 extern void intr_restore(uint_t); 678 #if defined(__amd64) 679 extern int intpri_use_cr8; 680 #endif /* __amd64 */ 681 682 extern int apic_pci_msi_enable_vector(dev_info_t *, int, int, 683 int, int, int); 684 extern apic_irq_t *apic_find_irq(dev_info_t *, struct intrspec *, int); 685 extern int apic_pci_msi_unconfigure(dev_info_t *, int, int); 686 extern int apic_pci_msi_disable_mode(dev_info_t *, int, int); 687 extern int apic_pci_msi_enable_mode(dev_info_t *, int, int); 688 689 /* 690 * This is the loadable module wrapper 691 */ 692 693 int 694 _init(void) 695 { 696 if (apic_coarse_hrtime) 697 apic_ops.psm_gethrtime = &apic_gettime; 698 return (psm_mod_init(&apic_hdlp, &apic_psm_info)); 699 } 700 701 int 702 _fini(void) 703 { 704 return (psm_mod_fini(&apic_hdlp, &apic_psm_info)); 705 } 706 707 int 708 _info(struct modinfo *modinfop) 709 { 710 return (psm_mod_info(&apic_hdlp, &apic_psm_info, modinfop)); 711 } 712 713 /* 714 * Auto-configuration routines 715 */ 716 717 /* 718 * Look at MPSpec 1.4 (Intel Order # 242016-005) for details of what we do here 719 * May work with 1.1 - but not guaranteed. 720 * According to the MP Spec, the MP floating pointer structure 721 * will be searched in the order described below: 722 * 1. In the first kilobyte of Extended BIOS Data Area (EBDA) 723 * 2. Within the last kilobyte of system base memory 724 * 3. In the BIOS ROM address space between 0F0000h and 0FFFFh 725 * Once we find the right signature with proper checksum, we call 726 * either handle_defconf or parse_mpct to get all info necessary for 727 * subsequent operations. 728 */ 729 static int 730 apic_probe() 731 { 732 uint32_t mpct_addr, ebda_start = 0, base_mem_end; 733 caddr_t biosdatap; 734 caddr_t mpct; 735 caddr_t fptr; 736 int i, mpct_size, mapsize, retval = PSM_FAILURE; 737 ushort_t ebda_seg, base_mem_size; 738 struct apic_mpfps_hdr *fpsp; 739 struct apic_mp_cnf_hdr *hdrp; 740 int bypass_cpu_and_ioapics_in_mptables; 741 int acpi_user_options; 742 743 if (apic_forceload < 0) 744 return (retval); 745 746 /* Allow override for MADT-only mode */ 747 acpi_user_options = ddi_prop_get_int(DDI_DEV_T_ANY, ddi_root_node(), 0, 748 "acpi-user-options", 0); 749 apic_use_acpi_madt_only = ((acpi_user_options & ACPI_OUSER_MADT) != 0); 750 751 /* Allow apic_use_acpi to override MADT-only mode */ 752 if (!apic_use_acpi) 753 apic_use_acpi_madt_only = 0; 754 755 retval = acpi_probe(); 756 757 /* 758 * mapin the bios data area 40:0 759 * 40:13h - two-byte location reports the base memory size 760 * 40:0Eh - two-byte location for the exact starting address of 761 * the EBDA segment for EISA 762 */ 763 biosdatap = psm_map_phys(0x400, 0x20, PROT_READ); 764 if (!biosdatap) 765 return (retval); 766 fpsp = (struct apic_mpfps_hdr *)NULL; 767 mapsize = MPFPS_RAM_WIN_LEN; 768 /*LINTED: pointer cast may result in improper alignment */ 769 ebda_seg = *((ushort_t *)(biosdatap+0xe)); 770 /* check the 1k of EBDA */ 771 if (ebda_seg) { 772 ebda_start = ((uint32_t)ebda_seg) << 4; 773 fptr = psm_map_phys(ebda_start, MPFPS_RAM_WIN_LEN, PROT_READ); 774 if (fptr) { 775 if (!(fpsp = 776 apic_find_fps_sig(fptr, MPFPS_RAM_WIN_LEN))) 777 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 778 } 779 } 780 /* If not in EBDA, check the last k of system base memory */ 781 if (!fpsp) { 782 /*LINTED: pointer cast may result in improper alignment */ 783 base_mem_size = *((ushort_t *)(biosdatap + 0x13)); 784 785 if (base_mem_size > 512) 786 base_mem_end = 639 * 1024; 787 else 788 base_mem_end = 511 * 1024; 789 /* if ebda == last k of base mem, skip to check BIOS ROM */ 790 if (base_mem_end != ebda_start) { 791 792 fptr = psm_map_phys(base_mem_end, MPFPS_RAM_WIN_LEN, 793 PROT_READ); 794 795 if (fptr) { 796 if (!(fpsp = apic_find_fps_sig(fptr, 797 MPFPS_RAM_WIN_LEN))) 798 psm_unmap_phys(fptr, MPFPS_RAM_WIN_LEN); 799 } 800 } 801 } 802 psm_unmap_phys(biosdatap, 0x20); 803 804 /* If still cannot find it, check the BIOS ROM space */ 805 if (!fpsp) { 806 mapsize = MPFPS_ROM_WIN_LEN; 807 fptr = psm_map_phys(MPFPS_ROM_WIN_START, 808 MPFPS_ROM_WIN_LEN, PROT_READ); 809 if (fptr) { 810 if (!(fpsp = 811 apic_find_fps_sig(fptr, MPFPS_ROM_WIN_LEN))) { 812 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 813 return (retval); 814 } 815 } 816 } 817 818 if (apic_checksum((caddr_t)fpsp, fpsp->mpfps_length * 16) != 0) { 819 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 820 return (retval); 821 } 822 823 apic_spec_rev = fpsp->mpfps_spec_rev; 824 if ((apic_spec_rev != 04) && (apic_spec_rev != 01)) { 825 psm_unmap_phys(fptr, MPFPS_ROM_WIN_LEN); 826 return (retval); 827 } 828 829 /* check IMCR is present or not */ 830 apic_imcrp = fpsp->mpfps_featinfo2 & MPFPS_FEATINFO2_IMCRP; 831 832 /* check default configuration (dual CPUs) */ 833 if ((apic_defconf = fpsp->mpfps_featinfo1) != 0) { 834 psm_unmap_phys(fptr, mapsize); 835 return (apic_handle_defconf()); 836 } 837 838 /* MP Configuration Table */ 839 mpct_addr = (uint32_t)(fpsp->mpfps_mpct_paddr); 840 841 psm_unmap_phys(fptr, mapsize); /* unmap floating ptr struct */ 842 843 /* 844 * Map in enough memory for the MP Configuration Table Header. 845 * Use this table to read the total length of the BIOS data and 846 * map in all the info 847 */ 848 /*LINTED: pointer cast may result in improper alignment */ 849 hdrp = (struct apic_mp_cnf_hdr *)psm_map_phys(mpct_addr, 850 sizeof (struct apic_mp_cnf_hdr), PROT_READ); 851 if (!hdrp) 852 return (retval); 853 854 /* check mp configuration table signature PCMP */ 855 if (hdrp->mpcnf_sig != 0x504d4350) { 856 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 857 return (retval); 858 } 859 mpct_size = (int)hdrp->mpcnf_tbl_length; 860 861 apic_set_pwroff_method_from_mpcnfhdr(hdrp); 862 863 psm_unmap_phys((caddr_t)hdrp, sizeof (struct apic_mp_cnf_hdr)); 864 865 if ((retval == PSM_SUCCESS) && !apic_use_acpi_madt_only) { 866 /* This is an ACPI machine No need for further checks */ 867 return (retval); 868 } 869 870 /* 871 * Map in the entries for this machine, ie. Processor 872 * Entry Tables, Bus Entry Tables, etc. 873 * They are in fixed order following one another 874 */ 875 mpct = psm_map_phys(mpct_addr, mpct_size, PROT_READ); 876 if (!mpct) 877 return (retval); 878 879 if (apic_checksum(mpct, mpct_size) != 0) 880 goto apic_fail1; 881 882 883 /*LINTED: pointer cast may result in improper alignment */ 884 hdrp = (struct apic_mp_cnf_hdr *)mpct; 885 /*LINTED: pointer cast may result in improper alignment */ 886 apicadr = (uint32_t *)psm_map_phys((uint32_t)hdrp->mpcnf_local_apic, 887 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 888 if (!apicadr) 889 goto apic_fail1; 890 891 /* Parse all information in the tables */ 892 bypass_cpu_and_ioapics_in_mptables = (retval == PSM_SUCCESS); 893 if (apic_parse_mpct(mpct, bypass_cpu_and_ioapics_in_mptables) == 894 PSM_SUCCESS) 895 return (PSM_SUCCESS); 896 897 for (i = 0; i < apic_io_max; i++) 898 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 899 if (apic_cpus) 900 kmem_free(apic_cpus, sizeof (*apic_cpus) * apic_nproc); 901 if (apicadr) 902 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 903 apic_fail1: 904 psm_unmap_phys(mpct, mpct_size); 905 return (retval); 906 } 907 908 static void 909 apic_set_pwroff_method_from_mpcnfhdr(struct apic_mp_cnf_hdr *hdrp) 910 { 911 int i; 912 913 for (i = 0; i < (sizeof (apic_mps_ids) / sizeof (apic_mps_ids[0])); 914 i++) { 915 if ((strncmp(hdrp->mpcnf_oem_str, apic_mps_ids[i].oem_id, 916 strlen(apic_mps_ids[i].oem_id)) == 0) && 917 (strncmp(hdrp->mpcnf_prod_str, apic_mps_ids[i].prod_id, 918 strlen(apic_mps_ids[i].prod_id)) == 0)) { 919 920 apic_poweroff_method = apic_mps_ids[i].poweroff_method; 921 break; 922 } 923 } 924 925 if (apic_debug_mps_id != 0) { 926 cmn_err(CE_CONT, "pcplusmp: MPS OEM ID = '%c%c%c%c%c%c%c%c'" 927 "Product ID = '%c%c%c%c%c%c%c%c%c%c%c%c'\n", 928 hdrp->mpcnf_oem_str[0], 929 hdrp->mpcnf_oem_str[1], 930 hdrp->mpcnf_oem_str[2], 931 hdrp->mpcnf_oem_str[3], 932 hdrp->mpcnf_oem_str[4], 933 hdrp->mpcnf_oem_str[5], 934 hdrp->mpcnf_oem_str[6], 935 hdrp->mpcnf_oem_str[7], 936 hdrp->mpcnf_prod_str[0], 937 hdrp->mpcnf_prod_str[1], 938 hdrp->mpcnf_prod_str[2], 939 hdrp->mpcnf_prod_str[3], 940 hdrp->mpcnf_prod_str[4], 941 hdrp->mpcnf_prod_str[5], 942 hdrp->mpcnf_prod_str[6], 943 hdrp->mpcnf_prod_str[7], 944 hdrp->mpcnf_prod_str[8], 945 hdrp->mpcnf_prod_str[9], 946 hdrp->mpcnf_prod_str[10], 947 hdrp->mpcnf_prod_str[11]); 948 } 949 } 950 951 static int 952 acpi_probe(void) 953 { 954 int i, id, intmax, ver, index, rv; 955 int acpi_verboseflags = 0; 956 int madt_seen, madt_size; 957 APIC_HEADER *ap; 958 MADT_PROCESSOR_APIC *mpa; 959 MADT_IO_APIC *mia; 960 MADT_IO_SAPIC *misa; 961 MADT_INTERRUPT_OVERRIDE *mio; 962 MADT_NMI_SOURCE *mns; 963 MADT_INTERRUPT_SOURCE *mis; 964 MADT_LOCAL_APIC_NMI *mlan; 965 MADT_ADDRESS_OVERRIDE *mao; 966 ACPI_OBJECT_LIST arglist; 967 ACPI_OBJECT arg; 968 int sci; 969 iflag_t sci_flags; 970 volatile int32_t *ioapic; 971 char local_ids[NCPU]; 972 char proc_ids[NCPU]; 973 uchar_t hid; 974 975 if (!apic_use_acpi) 976 return (PSM_FAILURE); 977 978 if (AcpiGetFirmwareTable(APIC_SIG, 1, ACPI_LOGICAL_ADDRESSING, 979 (ACPI_TABLE_HEADER **) &acpi_mapic_dtp) != AE_OK) 980 return (PSM_FAILURE); 981 982 apicadr = (uint32_t *)psm_map_phys( 983 (uint32_t)acpi_mapic_dtp->LocalApicAddress, 984 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 985 if (!apicadr) 986 return (PSM_FAILURE); 987 988 id = apicadr[APIC_LID_REG]; 989 local_ids[0] = (uchar_t)(((uint_t)id) >> 24); 990 apic_nproc = index = 1; 991 CPUSET_ONLY(apic_cpumask, 0); 992 apic_io_max = 0; 993 994 ap = (APIC_HEADER *) (acpi_mapic_dtp + 1); 995 madt_size = acpi_mapic_dtp->Length; 996 madt_seen = sizeof (*acpi_mapic_dtp); 997 998 while (madt_seen < madt_size) { 999 switch (ap->Type) { 1000 case APIC_PROCESSOR: 1001 mpa = (MADT_PROCESSOR_APIC *) ap; 1002 if (mpa->ProcessorEnabled) { 1003 if (mpa->LocalApicId == local_ids[0]) 1004 proc_ids[0] = mpa->ProcessorId; 1005 else if (apic_nproc < NCPU) { 1006 local_ids[index] = mpa->LocalApicId; 1007 proc_ids[index] = mpa->ProcessorId; 1008 CPUSET_ADD(apic_cpumask, index); 1009 index++; 1010 apic_nproc++; 1011 } else 1012 cmn_err(CE_WARN, "pcplusmp: exceeded " 1013 "maximum no. of CPUs (= %d)", NCPU); 1014 } 1015 break; 1016 1017 case APIC_IO: 1018 mia = (MADT_IO_APIC *) ap; 1019 if (apic_io_max < MAX_IO_APIC) { 1020 apic_io_id[apic_io_max] = mia->IoApicId; 1021 apic_io_vectbase[apic_io_max] = 1022 mia->Interrupt; 1023 ioapic = apicioadr[apic_io_max] = 1024 (int32_t *)psm_map_phys( 1025 (uint32_t)mia->Address, 1026 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1027 if (!ioapic) 1028 goto cleanup; 1029 apic_io_max++; 1030 } 1031 break; 1032 1033 case APIC_XRUPT_OVERRIDE: 1034 mio = (MADT_INTERRUPT_OVERRIDE *) ap; 1035 if (acpi_isop == NULL) 1036 acpi_isop = mio; 1037 acpi_iso_cnt++; 1038 break; 1039 1040 case APIC_NMI: 1041 /* UNIMPLEMENTED */ 1042 mns = (MADT_NMI_SOURCE *) ap; 1043 if (acpi_nmi_sp == NULL) 1044 acpi_nmi_sp = mns; 1045 acpi_nmi_scnt++; 1046 1047 cmn_err(CE_NOTE, "!apic: nmi source: %d %d %d\n", 1048 mns->Interrupt, mns->Polarity, 1049 mns->TriggerMode); 1050 break; 1051 1052 case APIC_LOCAL_NMI: 1053 /* UNIMPLEMENTED */ 1054 mlan = (MADT_LOCAL_APIC_NMI *) ap; 1055 if (acpi_nmi_cp == NULL) 1056 acpi_nmi_cp = mlan; 1057 acpi_nmi_ccnt++; 1058 1059 cmn_err(CE_NOTE, "!apic: local nmi: %d %d %d %d\n", 1060 mlan->ProcessorId, mlan->Polarity, 1061 mlan->TriggerMode, mlan->Lint); 1062 break; 1063 1064 case APIC_ADDRESS_OVERRIDE: 1065 /* UNIMPLEMENTED */ 1066 mao = (MADT_ADDRESS_OVERRIDE *) ap; 1067 cmn_err(CE_NOTE, "!apic: address override: %lx\n", 1068 (long)mao->Address); 1069 break; 1070 1071 case APIC_IO_SAPIC: 1072 /* UNIMPLEMENTED */ 1073 misa = (MADT_IO_SAPIC *) ap; 1074 1075 cmn_err(CE_NOTE, "!apic: io sapic: %d %d %lx\n", 1076 misa->IoSapicId, misa->InterruptBase, 1077 (long)misa->Address); 1078 break; 1079 1080 case APIC_XRUPT_SOURCE: 1081 /* UNIMPLEMENTED */ 1082 mis = (MADT_INTERRUPT_SOURCE *) ap; 1083 1084 cmn_err(CE_NOTE, 1085 "!apic: irq source: %d %d %d %d %d %d %d\n", 1086 mis->ProcessorId, mis->ProcessorEid, 1087 mis->Interrupt, mis->Polarity, 1088 mis->TriggerMode, mis->InterruptType, 1089 mis->IoSapicVector); 1090 break; 1091 case APIC_RESERVED: 1092 default: 1093 break; /* ignore unknown items as per ACPI spec */ 1094 } 1095 1096 /* advance to next entry */ 1097 madt_seen += ap->Length; 1098 ap = (APIC_HEADER *)(((char *)ap) + ap->Length); 1099 } 1100 1101 if ((apic_cpus = kmem_zalloc(sizeof (*apic_cpus) * apic_nproc, 1102 KM_NOSLEEP)) == NULL) 1103 goto cleanup; 1104 1105 /* 1106 * ACPI doesn't provide the local apic ver, get it directly from the 1107 * local apic 1108 */ 1109 ver = apicadr[APIC_VERS_REG]; 1110 for (i = 0; i < apic_nproc; i++) { 1111 apic_cpus[i].aci_local_id = local_ids[i]; 1112 apic_cpus[i].aci_local_ver = (uchar_t)(ver & 0xFF); 1113 } 1114 for (i = 0; i < apic_io_max; i++) { 1115 ioapic = apicioadr[i]; 1116 1117 /* 1118 * need to check Sitka on the following acpi problem 1119 * On the Sitka, the ioapic's apic_id field isn't reporting 1120 * the actual io apic id. We have reported this problem 1121 * to Intel. Until they fix the problem, we will get the 1122 * actual id directly from the ioapic. 1123 */ 1124 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1125 id = ioapic[APIC_IO_DATA]; 1126 hid = (uchar_t)(((uint_t)id) >> 24); 1127 1128 if (hid != apic_io_id[i]) { 1129 if (apic_io_id[i] == 0) 1130 apic_io_id[i] = hid; 1131 else { /* set ioapic id to whatever reported by ACPI */ 1132 id = ((int32_t)apic_io_id[i]) << 24; 1133 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1134 ioapic[APIC_IO_DATA] = id; 1135 } 1136 } 1137 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1138 ver = ioapic[APIC_IO_DATA]; 1139 apic_io_ver[i] = (uchar_t)(ver & 0xff); 1140 intmax = (ver >> 16) & 0xff; 1141 apic_io_vectend[i] = apic_io_vectbase[i] + intmax; 1142 if (apic_first_avail_irq <= apic_io_vectend[i]) 1143 apic_first_avail_irq = apic_io_vectend[i] + 1; 1144 } 1145 1146 1147 /* 1148 * Process SCI configuration here 1149 * An error may be returned here if 1150 * acpi-user-options specifies legacy mode 1151 * (no SCI, no ACPI mode) 1152 */ 1153 if (acpica_get_sci(&sci, &sci_flags) != AE_OK) 1154 sci = -1; 1155 1156 /* 1157 * Now call acpi_init() to generate namespaces 1158 * If this fails, we don't attempt to use ACPI 1159 * even if we were able to get a MADT above 1160 */ 1161 if (acpica_init() != AE_OK) 1162 goto cleanup; 1163 1164 /* 1165 * Squirrel away the SCI and flags for later on 1166 * in apic_picinit() when we're ready 1167 */ 1168 apic_sci_vect = sci; 1169 apic_sci_flags = sci_flags; 1170 1171 if (apic_verbose & APIC_VERBOSE_IRQ_FLAG) 1172 acpi_verboseflags |= PSM_VERBOSE_IRQ_FLAG; 1173 1174 if (apic_verbose & APIC_VERBOSE_POWEROFF_FLAG) 1175 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_FLAG; 1176 1177 if (apic_verbose & APIC_VERBOSE_POWEROFF_PAUSE_FLAG) 1178 acpi_verboseflags |= PSM_VERBOSE_POWEROFF_PAUSE_FLAG; 1179 1180 if (acpi_psm_init(apic_psm_info.p_mach_idstring, acpi_verboseflags) == 1181 ACPI_PSM_FAILURE) 1182 goto cleanup; 1183 1184 /* Enable ACPI APIC interrupt routing */ 1185 arglist.Count = 1; 1186 arglist.Pointer = &arg; 1187 arg.Type = ACPI_TYPE_INTEGER; 1188 arg.Integer.Value = ACPI_APIC_MODE; /* 1 */ 1189 rv = AcpiEvaluateObject(NULL, "\\_PIC", &arglist, NULL); 1190 if (rv == AE_OK) { 1191 build_reserved_irqlist((uchar_t *)apic_reserved_irqlist); 1192 apic_enable_acpi = 1; 1193 if (apic_use_acpi_madt_only) { 1194 cmn_err(CE_CONT, 1195 "?Using ACPI for CPU/IOAPIC information ONLY\n"); 1196 } 1197 return (PSM_SUCCESS); 1198 } 1199 /* if setting APIC mode failed above, we fall through to cleanup */ 1200 1201 cleanup: 1202 if (apicadr != NULL) { 1203 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1204 apicadr = NULL; 1205 } 1206 apic_nproc = 0; 1207 for (i = 0; i < apic_io_max; i++) { 1208 psm_unmap_phys((caddr_t)apicioadr[i], APIC_IO_MEMLEN); 1209 apicioadr[i] = NULL; 1210 } 1211 apic_io_max = 0; 1212 acpi_isop = NULL; 1213 acpi_iso_cnt = 0; 1214 acpi_nmi_sp = NULL; 1215 acpi_nmi_scnt = 0; 1216 acpi_nmi_cp = NULL; 1217 acpi_nmi_ccnt = 0; 1218 return (PSM_FAILURE); 1219 } 1220 1221 /* 1222 * Handle default configuration. Fill in reqd global variables & tables 1223 * Fill all details as MP table does not give any more info 1224 */ 1225 static int 1226 apic_handle_defconf() 1227 { 1228 uint_t lid; 1229 1230 /*LINTED: pointer cast may result in improper alignment */ 1231 apicioadr[0] = (int32_t *)psm_map_phys(APIC_IO_ADDR, 1232 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1233 /*LINTED: pointer cast may result in improper alignment */ 1234 apicadr = (uint32_t *)psm_map_phys(APIC_LOCAL_ADDR, 1235 APIC_LOCAL_MEMLEN, PROT_READ | PROT_WRITE); 1236 apic_cpus = (apic_cpus_info_t *) 1237 kmem_zalloc(sizeof (*apic_cpus) * 2, KM_NOSLEEP); 1238 if ((!apicadr) || (!apicioadr[0]) || (!apic_cpus)) 1239 goto apic_handle_defconf_fail; 1240 CPUSET_ONLY(apic_cpumask, 0); 1241 CPUSET_ADD(apic_cpumask, 1); 1242 apic_nproc = 2; 1243 lid = apicadr[APIC_LID_REG]; 1244 apic_cpus[0].aci_local_id = (uchar_t)(lid >> APIC_ID_BIT_OFFSET); 1245 /* 1246 * According to the PC+MP spec 1.1, the local ids 1247 * for the default configuration has to be 0 or 1 1248 */ 1249 if (apic_cpus[0].aci_local_id == 1) 1250 apic_cpus[1].aci_local_id = 0; 1251 else if (apic_cpus[0].aci_local_id == 0) 1252 apic_cpus[1].aci_local_id = 1; 1253 else 1254 goto apic_handle_defconf_fail; 1255 1256 apic_io_id[0] = 2; 1257 apic_io_max = 1; 1258 if (apic_defconf >= 5) { 1259 apic_cpus[0].aci_local_ver = APIC_INTEGRATED_VERS; 1260 apic_cpus[1].aci_local_ver = APIC_INTEGRATED_VERS; 1261 apic_io_ver[0] = APIC_INTEGRATED_VERS; 1262 } else { 1263 apic_cpus[0].aci_local_ver = 0; /* 82489 DX */ 1264 apic_cpus[1].aci_local_ver = 0; 1265 apic_io_ver[0] = 0; 1266 } 1267 if (apic_defconf == 2 || apic_defconf == 3 || apic_defconf == 6) 1268 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1269 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1270 return (PSM_SUCCESS); 1271 1272 apic_handle_defconf_fail: 1273 if (apic_cpus) 1274 kmem_free(apic_cpus, sizeof (*apic_cpus) * 2); 1275 if (apicadr) 1276 psm_unmap_phys((caddr_t)apicadr, APIC_LOCAL_MEMLEN); 1277 if (apicioadr[0]) 1278 psm_unmap_phys((caddr_t)apicioadr[0], APIC_IO_MEMLEN); 1279 return (PSM_FAILURE); 1280 } 1281 1282 /* Parse the entries in MP configuration table and collect info that we need */ 1283 static int 1284 apic_parse_mpct(caddr_t mpct, int bypass_cpus_and_ioapics) 1285 { 1286 struct apic_procent *procp; 1287 struct apic_bus *busp; 1288 struct apic_io_entry *ioapicp; 1289 struct apic_io_intr *intrp; 1290 volatile int32_t *ioapic; 1291 uint_t lid; 1292 int id; 1293 uchar_t hid; 1294 1295 /*LINTED: pointer cast may result in improper alignment */ 1296 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1297 1298 /* No need to count cpu entries if we won't use them */ 1299 if (!bypass_cpus_and_ioapics) { 1300 1301 /* Find max # of CPUS and allocate structure accordingly */ 1302 apic_nproc = 0; 1303 CPUSET_ZERO(apic_cpumask); 1304 while (procp->proc_entry == APIC_CPU_ENTRY) { 1305 if (procp->proc_cpuflags & CPUFLAGS_EN) { 1306 if (apic_nproc < NCPU) 1307 CPUSET_ADD(apic_cpumask, apic_nproc); 1308 apic_nproc++; 1309 } 1310 procp++; 1311 } 1312 if (apic_nproc > NCPU) 1313 cmn_err(CE_WARN, "pcplusmp: exceeded " 1314 "maximum no. of CPUs (= %d)", NCPU); 1315 if (!apic_nproc || !(apic_cpus = (apic_cpus_info_t *) 1316 kmem_zalloc(sizeof (*apic_cpus)*apic_nproc, KM_NOSLEEP))) 1317 return (PSM_FAILURE); 1318 } 1319 1320 /*LINTED: pointer cast may result in improper alignment */ 1321 procp = (struct apic_procent *)(mpct + sizeof (struct apic_mp_cnf_hdr)); 1322 1323 /* 1324 * start with index 1 as 0 needs to be filled in with Boot CPU, but 1325 * if we're bypassing this information, it has already been filled 1326 * in by acpi_probe(), so don't overwrite it. 1327 */ 1328 if (!bypass_cpus_and_ioapics) 1329 apic_nproc = 1; 1330 1331 while (procp->proc_entry == APIC_CPU_ENTRY) { 1332 /* check whether the cpu exists or not */ 1333 if (!bypass_cpus_and_ioapics && 1334 procp->proc_cpuflags & CPUFLAGS_EN) { 1335 if (procp->proc_cpuflags & CPUFLAGS_BP) { /* Boot CPU */ 1336 lid = apicadr[APIC_LID_REG]; 1337 apic_cpus[0].aci_local_id = procp->proc_apicid; 1338 if (apic_cpus[0].aci_local_id != 1339 (uchar_t)(lid >> APIC_ID_BIT_OFFSET)) { 1340 return (PSM_FAILURE); 1341 } 1342 apic_cpus[0].aci_local_ver = 1343 procp->proc_version; 1344 } else { 1345 1346 apic_cpus[apic_nproc].aci_local_id = 1347 procp->proc_apicid; 1348 apic_cpus[apic_nproc].aci_local_ver = 1349 procp->proc_version; 1350 apic_nproc++; 1351 1352 } 1353 } 1354 procp++; 1355 } 1356 1357 /* 1358 * Save start of bus entries for later use. 1359 * Get EISA level cntrl if EISA bus is present. 1360 * Also get the CPI bus id for single CPI bus case 1361 */ 1362 apic_busp = busp = (struct apic_bus *)procp; 1363 while (busp->bus_entry == APIC_BUS_ENTRY) { 1364 lid = apic_find_bus_type((char *)&busp->bus_str1); 1365 if (lid == BUS_EISA) { 1366 eisa_level_intr_mask = (inb(EISA_LEVEL_CNTL + 1) << 8) | 1367 inb(EISA_LEVEL_CNTL) | ((uint_t)INT32_MAX + 1); 1368 } else if (lid == BUS_PCI) { 1369 /* 1370 * apic_single_pci_busid will be used only if 1371 * apic_pic_bus_total is equal to 1 1372 */ 1373 apic_pci_bus_total++; 1374 apic_single_pci_busid = busp->bus_id; 1375 } 1376 busp++; 1377 } 1378 1379 ioapicp = (struct apic_io_entry *)busp; 1380 1381 if (!bypass_cpus_and_ioapics) 1382 apic_io_max = 0; 1383 do { 1384 if (!bypass_cpus_and_ioapics && apic_io_max < MAX_IO_APIC) { 1385 if (ioapicp->io_flags & IOAPIC_FLAGS_EN) { 1386 apic_io_id[apic_io_max] = ioapicp->io_apicid; 1387 apic_io_ver[apic_io_max] = ioapicp->io_version; 1388 /*LINTED: pointer cast may result in improper alignment */ 1389 apicioadr[apic_io_max] = 1390 (int32_t *)psm_map_phys( 1391 (uint32_t)ioapicp->io_apic_addr, 1392 APIC_IO_MEMLEN, PROT_READ | PROT_WRITE); 1393 1394 if (!apicioadr[apic_io_max]) 1395 return (PSM_FAILURE); 1396 1397 ioapic = apicioadr[apic_io_max]; 1398 ioapic[APIC_IO_REG] = APIC_ID_CMD; 1399 id = ioapic[APIC_IO_DATA]; 1400 hid = (uchar_t)(((uint_t)id) >> 24); 1401 1402 if (hid != apic_io_id[apic_io_max]) { 1403 if (apic_io_id[apic_io_max] == 0) 1404 apic_io_id[apic_io_max] = hid; 1405 else { 1406 /* 1407 * set ioapic id to whatever 1408 * reported by MPS 1409 * 1410 * may not need to set index 1411 * again ??? 1412 * take it out and try 1413 */ 1414 1415 id = ((int32_t) 1416 apic_io_id[apic_io_max]) << 1417 24; 1418 1419 ioapic[APIC_IO_REG] = 1420 APIC_ID_CMD; 1421 1422 ioapic[APIC_IO_DATA] = id; 1423 1424 } 1425 } 1426 apic_io_max++; 1427 } 1428 } 1429 ioapicp++; 1430 } while (ioapicp->io_entry == APIC_IO_ENTRY); 1431 1432 apic_io_intrp = (struct apic_io_intr *)ioapicp; 1433 1434 intrp = apic_io_intrp; 1435 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 1436 if ((intrp->intr_irq > APIC_MAX_ISA_IRQ) || 1437 (apic_find_bus(intrp->intr_busid) == BUS_PCI)) { 1438 apic_irq_translate = 1; 1439 break; 1440 } 1441 intrp++; 1442 } 1443 1444 return (PSM_SUCCESS); 1445 } 1446 1447 boolean_t 1448 apic_cpu_in_range(int cpu) 1449 { 1450 return ((cpu & ~IRQ_USER_BOUND) < apic_nproc); 1451 } 1452 1453 static struct apic_mpfps_hdr * 1454 apic_find_fps_sig(caddr_t cptr, int len) 1455 { 1456 int i; 1457 1458 /* Look for the pattern "_MP_" */ 1459 for (i = 0; i < len; i += 16) { 1460 if ((*(cptr+i) == '_') && 1461 (*(cptr+i+1) == 'M') && 1462 (*(cptr+i+2) == 'P') && 1463 (*(cptr+i+3) == '_')) 1464 /*LINTED: pointer cast may result in improper alignment */ 1465 return ((struct apic_mpfps_hdr *)(cptr + i)); 1466 } 1467 return (NULL); 1468 } 1469 1470 static int 1471 apic_checksum(caddr_t bptr, int len) 1472 { 1473 int i; 1474 uchar_t cksum; 1475 1476 cksum = 0; 1477 for (i = 0; i < len; i++) 1478 cksum += *bptr++; 1479 return ((int)cksum); 1480 } 1481 1482 1483 /* 1484 * Initialise vector->ipl and ipl->pri arrays. level_intr and irqtable 1485 * are also set to NULL. vector->irq is set to a value which cannot map 1486 * to a real irq to show that it is free. 1487 */ 1488 void 1489 apic_init() 1490 { 1491 int i; 1492 int *iptr; 1493 1494 int j = 1; 1495 apic_ipltopri[0] = APIC_VECTOR_PER_IPL; /* leave 0 for idle */ 1496 for (i = 0; i < (APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL); i++) { 1497 if ((i < ((APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL) - 1)) && 1498 (apic_vectortoipl[i + 1] == apic_vectortoipl[i])) 1499 /* get to highest vector at the same ipl */ 1500 continue; 1501 for (; j <= apic_vectortoipl[i]; j++) { 1502 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + 1503 APIC_BASE_VECT; 1504 } 1505 } 1506 for (; j < MAXIPL + 1; j++) 1507 /* fill up any empty ipltopri slots */ 1508 apic_ipltopri[j] = (i << APIC_IPL_SHIFT) + APIC_BASE_VECT; 1509 1510 /* cpu 0 is always up */ 1511 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 1512 1513 iptr = (int *)&apic_irq_table[0]; 1514 for (i = 0; i <= APIC_MAX_VECTOR; i++) { 1515 apic_level_intr[i] = 0; 1516 *iptr++ = NULL; 1517 apic_vector_to_irq[i] = APIC_RESV_IRQ; 1518 apic_reprogram_info[i].valid = 0; 1519 apic_reprogram_info[i].bindcpu = 0; 1520 apic_reprogram_info[i].timeouts = 0; 1521 } 1522 1523 /* 1524 * Allocate a dummy irq table entry for the reserved entry. 1525 * This takes care of the race between removing an irq and 1526 * clock detecting a CPU in that irq during interrupt load 1527 * sampling. 1528 */ 1529 apic_irq_table[APIC_RESV_IRQ] = 1530 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 1531 1532 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL); 1533 mutex_init(&apic_reprogram_timeout_mutex, NULL, MUTEX_DEFAULT, NULL); 1534 #if defined(__amd64) 1535 /* 1536 * Make cpu-specific interrupt info point to cr8pri vector 1537 */ 1538 for (i = 0; i <= MAXIPL; i++) 1539 apic_cr8pri[i] = apic_ipltopri[i] >> APIC_IPL_SHIFT; 1540 CPU->cpu_pri_data = apic_cr8pri; 1541 intpri_use_cr8 = 1; 1542 #endif /* __amd64 */ 1543 } 1544 1545 /* 1546 * handler for APIC Error interrupt. Just print a warning and continue 1547 */ 1548 static int 1549 apic_error_intr() 1550 { 1551 uint_t error0, error1, error; 1552 uint_t i; 1553 1554 /* 1555 * We need to write before read as per 7.4.17 of system prog manual. 1556 * We do both and or the results to be safe 1557 */ 1558 error0 = apicadr[APIC_ERROR_STATUS]; 1559 apicadr[APIC_ERROR_STATUS] = 0; 1560 error1 = apicadr[APIC_ERROR_STATUS]; 1561 error = error0 | error1; 1562 1563 /* 1564 * Clear the APIC error status (do this on all cpus that enter here) 1565 * (two writes are required due to the semantics of accessing the 1566 * error status register.) 1567 */ 1568 apicadr[APIC_ERROR_STATUS] = 0; 1569 apicadr[APIC_ERROR_STATUS] = 0; 1570 1571 /* 1572 * Prevent more than 1 CPU from handling error interrupt causing 1573 * double printing (interleave of characters from multiple 1574 * CPU's when using prom_printf) 1575 */ 1576 if (lock_try(&apic_error_lock) == 0) 1577 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 1578 if (error) { 1579 #if DEBUG 1580 if (apic_debug) 1581 debug_enter("pcplusmp: APIC Error interrupt received"); 1582 #endif /* DEBUG */ 1583 if (apic_panic_on_apic_error) 1584 cmn_err(CE_PANIC, 1585 "APIC Error interrupt on CPU %d. Status = %x\n", 1586 psm_get_cpu_id(), error); 1587 else { 1588 if ((error & ~APIC_CS_ERRORS) == 0) { 1589 /* cksum error only */ 1590 apic_error |= APIC_ERR_APIC_ERROR; 1591 apic_apic_error |= error; 1592 apic_num_apic_errors++; 1593 apic_num_cksum_errors++; 1594 } else { 1595 /* 1596 * prom_printf is the best shot we have of 1597 * something which is problem free from 1598 * high level/NMI type of interrupts 1599 */ 1600 prom_printf("APIC Error interrupt on CPU %d. " 1601 "Status 0 = %x, Status 1 = %x\n", 1602 psm_get_cpu_id(), error0, error1); 1603 apic_error |= APIC_ERR_APIC_ERROR; 1604 apic_apic_error |= error; 1605 apic_num_apic_errors++; 1606 for (i = 0; i < apic_error_display_delay; i++) { 1607 tenmicrosec(); 1608 } 1609 /* 1610 * provide more delay next time limited to 1611 * roughly 1 clock tick time 1612 */ 1613 if (apic_error_display_delay < 500) 1614 apic_error_display_delay *= 2; 1615 } 1616 } 1617 lock_clear(&apic_error_lock); 1618 return (DDI_INTR_CLAIMED); 1619 } else { 1620 lock_clear(&apic_error_lock); 1621 return (DDI_INTR_UNCLAIMED); 1622 } 1623 /* NOTREACHED */ 1624 } 1625 1626 /* 1627 * Turn off the mask bit in the performance counter Local Vector Table entry. 1628 */ 1629 static void 1630 apic_cpcovf_mask_clear(void) 1631 { 1632 apicadr[APIC_PCINT_VECT] &= ~APIC_LVT_MASK; 1633 } 1634 1635 static void 1636 apic_init_intr() 1637 { 1638 processorid_t cpun = psm_get_cpu_id(); 1639 1640 #if defined(__amd64) 1641 setcr8((ulong_t)(APIC_MASK_ALL >> APIC_IPL_SHIFT)); 1642 #else 1643 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1644 #endif 1645 1646 if (apic_flat_model) 1647 apicadr[APIC_FORMAT_REG] = APIC_FLAT_MODEL; 1648 else 1649 apicadr[APIC_FORMAT_REG] = APIC_CLUSTER_MODEL; 1650 apicadr[APIC_DEST_REG] = AV_HIGH_ORDER >> cpun; 1651 1652 /* need to enable APIC before unmasking NMI */ 1653 apicadr[APIC_SPUR_INT_REG] = AV_UNIT_ENABLE | APIC_SPUR_INTR; 1654 1655 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1656 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1657 apicadr[APIC_INT_VECT1] = AV_NMI; /* enable NMI */ 1658 1659 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) 1660 return; 1661 1662 /* Enable performance counter overflow interrupt */ 1663 1664 if ((x86_feature & X86_MSR) != X86_MSR) 1665 apic_enable_cpcovf_intr = 0; 1666 if (apic_enable_cpcovf_intr) { 1667 if (apic_cpcovf_vect == 0) { 1668 int ipl = APIC_PCINT_IPL; 1669 int irq = apic_get_ipivect(ipl, -1); 1670 1671 ASSERT(irq != -1); 1672 apic_cpcovf_vect = apic_irq_table[irq]->airq_vector; 1673 ASSERT(apic_cpcovf_vect); 1674 (void) add_avintr(NULL, ipl, 1675 (avfunc)kcpc_hw_overflow_intr, 1676 "apic pcint", irq, NULL, NULL, NULL, NULL); 1677 kcpc_hw_overflow_intr_installed = 1; 1678 kcpc_hw_enable_cpc_intr = apic_cpcovf_mask_clear; 1679 } 1680 apicadr[APIC_PCINT_VECT] = apic_cpcovf_vect; 1681 } 1682 1683 /* Enable error interrupt */ 1684 1685 if (apic_enable_error_intr) { 1686 if (apic_errvect == 0) { 1687 int ipl = 0xf; /* get highest priority intr */ 1688 int irq = apic_get_ipivect(ipl, -1); 1689 1690 ASSERT(irq != -1); 1691 apic_errvect = apic_irq_table[irq]->airq_vector; 1692 ASSERT(apic_errvect); 1693 /* 1694 * Not PSMI compliant, but we are going to merge 1695 * with ON anyway 1696 */ 1697 (void) add_avintr((void *)NULL, ipl, 1698 (avfunc)apic_error_intr, "apic error intr", 1699 irq, NULL, NULL, NULL, NULL); 1700 } 1701 apicadr[APIC_ERR_VECT] = apic_errvect; 1702 apicadr[APIC_ERROR_STATUS] = 0; 1703 apicadr[APIC_ERROR_STATUS] = 0; 1704 } 1705 } 1706 1707 static void 1708 apic_disable_local_apic() 1709 { 1710 apicadr[APIC_TASK_REG] = APIC_MASK_ALL; 1711 apicadr[APIC_LOCAL_TIMER] = AV_MASK; 1712 apicadr[APIC_INT_VECT0] = AV_MASK; /* local intr reg 0 */ 1713 apicadr[APIC_INT_VECT1] = AV_MASK; /* disable NMI */ 1714 apicadr[APIC_ERR_VECT] = AV_MASK; /* and error interrupt */ 1715 apicadr[APIC_PCINT_VECT] = AV_MASK; /* and perf counter intr */ 1716 apicadr[APIC_SPUR_INT_REG] = APIC_SPUR_INTR; 1717 } 1718 1719 static void 1720 apic_picinit(void) 1721 { 1722 int i, j; 1723 uint_t isr; 1724 volatile int32_t *ioapic; 1725 apic_irq_t *irqptr; 1726 struct intrspec ispec; 1727 1728 /* 1729 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr 1730 * bit on without clearing it with EOI. Since softint 1731 * uses vector 0x20 to interrupt itself, so softint will 1732 * not work on this machine. In order to fix this problem 1733 * a check is made to verify all the isr bits are clear. 1734 * If not, EOIs are issued to clear the bits. 1735 */ 1736 for (i = 7; i >= 1; i--) { 1737 if ((isr = apicadr[APIC_ISR_REG + (i * 4)]) != 0) 1738 for (j = 0; ((j < 32) && (isr != 0)); j++) 1739 if (isr & (1 << j)) { 1740 apicadr[APIC_EOI_REG] = 0; 1741 isr &= ~(1 << j); 1742 apic_error |= APIC_ERR_BOOT_EOI; 1743 } 1744 } 1745 1746 /* set a flag so we know we have run apic_picinit() */ 1747 apic_flag = 1; 1748 LOCK_INIT_CLEAR(&apic_gethrtime_lock); 1749 LOCK_INIT_CLEAR(&apic_ioapic_lock); 1750 LOCK_INIT_CLEAR(&apic_revector_lock); 1751 LOCK_INIT_CLEAR(&apic_ioapic_reprogram_lock); 1752 LOCK_INIT_CLEAR(&apic_error_lock); 1753 1754 picsetup(); /* initialise the 8259 */ 1755 1756 /* add nmi handler - least priority nmi handler */ 1757 LOCK_INIT_CLEAR(&apic_nmi_lock); 1758 1759 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr, 1760 "pcplusmp NMI handler", (caddr_t)NULL)) 1761 cmn_err(CE_WARN, "pcplusmp: Unable to add nmi handler"); 1762 1763 apic_init_intr(); 1764 1765 /* enable apic mode if imcr present */ 1766 if (apic_imcrp) { 1767 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1768 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC); 1769 } 1770 1771 /* mask interrupt vectors */ 1772 for (j = 0; j < apic_io_max; j++) { 1773 int intin_max; 1774 ioapic = apicioadr[j]; 1775 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 1776 /* Bits 23-16 define the maximum redirection entries */ 1777 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 1778 for (i = 0; i < intin_max; i++) { 1779 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 1780 ioapic[APIC_IO_DATA] = AV_MASK; 1781 } 1782 } 1783 1784 /* 1785 * Hack alert: deal with ACPI SCI interrupt chicken/egg here 1786 */ 1787 if (apic_sci_vect > 0) { 1788 /* 1789 * acpica has already done add_avintr(); we just 1790 * to finish the job by mimicing translate_irq() 1791 * 1792 * Fake up an intrspec and setup the tables 1793 */ 1794 ispec.intrspec_vec = apic_sci_vect; 1795 ispec.intrspec_pri = SCI_IPL; 1796 1797 if (apic_setup_irq_table(NULL, apic_sci_vect, NULL, 1798 &ispec, &apic_sci_flags, DDI_INTR_TYPE_FIXED) < 0) { 1799 cmn_err(CE_WARN, "!apic: SCI setup failed"); 1800 return; 1801 } 1802 irqptr = apic_irq_table[apic_sci_vect]; 1803 1804 /* Program I/O APIC */ 1805 (void) apic_setup_io_intr(irqptr, apic_sci_vect); 1806 1807 irqptr->airq_share++; 1808 } 1809 } 1810 1811 1812 static void 1813 apic_cpu_start(processorid_t cpun, caddr_t rm_code) 1814 { 1815 int loop_count; 1816 uint32_t vector; 1817 uint_t cpu_id, iflag; 1818 1819 cpu_id = apic_cpus[cpun].aci_local_id; 1820 1821 apic_cmos_ssb_set = 1; 1822 1823 /* 1824 * Interrupts on BSP cpu will be disabled during these startup 1825 * steps in order to avoid unwanted side effects from 1826 * executing interrupt handlers on a problematic BIOS. 1827 */ 1828 1829 iflag = intr_clear(); 1830 outb(CMOS_ADDR, SSB); 1831 outb(CMOS_DATA, BIOS_SHUTDOWN); 1832 1833 while (get_apic_cmd1() & AV_PENDING) 1834 apic_ret(); 1835 1836 /* for integrated - make sure there is one INIT IPI in buffer */ 1837 /* for external - it will wake up the cpu */ 1838 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1839 apicadr[APIC_INT_CMD1] = AV_ASSERT | AV_RESET; 1840 1841 /* If only 1 CPU is installed, PENDING bit will not go low */ 1842 for (loop_count = 0x1000; loop_count; loop_count--) 1843 if (get_apic_cmd1() & AV_PENDING) 1844 apic_ret(); 1845 else 1846 break; 1847 1848 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1849 apicadr[APIC_INT_CMD1] = AV_DEASSERT | AV_RESET; 1850 1851 drv_usecwait(20000); /* 20 milli sec */ 1852 1853 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 1854 /* integrated apic */ 1855 1856 rm_code = (caddr_t)(uintptr_t)rm_platter_pa; 1857 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 1858 (APIC_VECTOR_MASK | APIC_IPL_MASK); 1859 1860 /* to offset the INIT IPI queue up in the buffer */ 1861 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1862 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1863 1864 drv_usecwait(200); /* 20 micro sec */ 1865 1866 apicadr[APIC_INT_CMD2] = cpu_id << APIC_ICR_ID_BIT_OFFSET; 1867 apicadr[APIC_INT_CMD1] = vector | AV_STARTUP; 1868 1869 drv_usecwait(200); /* 20 micro sec */ 1870 } 1871 intr_restore(iflag); 1872 } 1873 1874 1875 #ifdef DEBUG 1876 int apic_break_on_cpu = 9; 1877 int apic_stretch_interrupts = 0; 1878 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 1879 1880 void 1881 apic_break() 1882 { 1883 } 1884 #endif /* DEBUG */ 1885 1886 /* 1887 * platform_intr_enter 1888 * 1889 * Called at the beginning of the interrupt service routine to 1890 * mask all level equal to and below the interrupt priority 1891 * of the interrupting vector. An EOI should be given to 1892 * the interrupt controller to enable other HW interrupts. 1893 * 1894 * Return -1 for spurious interrupts 1895 * 1896 */ 1897 /*ARGSUSED*/ 1898 static int 1899 apic_intr_enter(int ipl, int *vectorp) 1900 { 1901 uchar_t vector; 1902 int nipl; 1903 int irq, iflag; 1904 apic_cpus_info_t *cpu_infop; 1905 1906 /* 1907 * The real vector programmed in APIC is *vectorp + 0x20 1908 * But, cmnint code subtracts 0x20 before pushing it. 1909 * Hence APIC_BASE_VECT is 0x20. 1910 */ 1911 1912 vector = (uchar_t)*vectorp; 1913 1914 /* if interrupted by the clock, increment apic_nsec_since_boot */ 1915 if (vector == apic_clkvect) { 1916 if (!apic_oneshot) { 1917 /* NOTE: this is not MT aware */ 1918 apic_hrtime_stamp++; 1919 apic_nsec_since_boot += apic_nsec_per_intr; 1920 apic_hrtime_stamp++; 1921 last_count_read = apic_hertz_count; 1922 apic_redistribute_compute(); 1923 } 1924 1925 /* We will avoid all the book keeping overhead for clock */ 1926 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1927 #if defined(__amd64) 1928 setcr8((ulong_t)apic_cr8pri[nipl]); 1929 #else 1930 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1931 #endif 1932 *vectorp = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1933 apicadr[APIC_EOI_REG] = 0; 1934 return (nipl); 1935 } 1936 1937 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 1938 1939 if (vector == (APIC_SPUR_INTR - APIC_BASE_VECT)) { 1940 cpu_infop->aci_spur_cnt++; 1941 return (APIC_INT_SPURIOUS); 1942 } 1943 1944 /* Check if the vector we got is really what we need */ 1945 if (apic_revector_pending) { 1946 /* 1947 * Disable interrupts for the duration of 1948 * the vector translation to prevent a self-race for 1949 * the apic_revector_lock. This cannot be done 1950 * in apic_xlate_vector because it is recursive and 1951 * we want the vector translation to be atomic with 1952 * respect to other (higher-priority) interrupts. 1953 */ 1954 iflag = intr_clear(); 1955 vector = apic_xlate_vector(vector + APIC_BASE_VECT) - 1956 APIC_BASE_VECT; 1957 intr_restore(iflag); 1958 } 1959 1960 nipl = apic_vectortoipl[vector >> APIC_IPL_SHIFT]; 1961 *vectorp = irq = apic_vector_to_irq[vector + APIC_BASE_VECT]; 1962 1963 #if defined(__amd64) 1964 setcr8((ulong_t)apic_cr8pri[nipl]); 1965 #else 1966 apicadr[APIC_TASK_REG] = apic_ipltopri[nipl]; 1967 #endif 1968 1969 cpu_infop->aci_current[nipl] = (uchar_t)irq; 1970 cpu_infop->aci_curipl = (uchar_t)nipl; 1971 cpu_infop->aci_ISR_in_progress |= 1 << nipl; 1972 1973 /* 1974 * apic_level_intr could have been assimilated into the irq struct. 1975 * but, having it as a character array is more efficient in terms of 1976 * cache usage. So, we leave it as is. 1977 */ 1978 if (!apic_level_intr[irq]) 1979 apicadr[APIC_EOI_REG] = 0; 1980 1981 #ifdef DEBUG 1982 APIC_DEBUG_BUF_PUT(vector); 1983 APIC_DEBUG_BUF_PUT(irq); 1984 APIC_DEBUG_BUF_PUT(nipl); 1985 APIC_DEBUG_BUF_PUT(psm_get_cpu_id()); 1986 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl))) 1987 drv_usecwait(apic_stretch_interrupts); 1988 1989 if (apic_break_on_cpu == psm_get_cpu_id()) 1990 apic_break(); 1991 #endif /* DEBUG */ 1992 return (nipl); 1993 } 1994 1995 static void 1996 apic_intr_exit(int prev_ipl, int irq) 1997 { 1998 apic_cpus_info_t *cpu_infop; 1999 2000 #if defined(__amd64) 2001 setcr8((ulong_t)apic_cr8pri[prev_ipl]); 2002 #else 2003 apicadr[APIC_TASK_REG] = apic_ipltopri[prev_ipl]; 2004 #endif 2005 2006 cpu_infop = &apic_cpus[psm_get_cpu_id()]; 2007 if (apic_level_intr[irq]) 2008 apicadr[APIC_EOI_REG] = 0; 2009 2010 cpu_infop->aci_curipl = (uchar_t)prev_ipl; 2011 /* ISR above current pri could not be in progress */ 2012 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1; 2013 } 2014 2015 /* 2016 * Mask all interrupts below or equal to the given IPL 2017 */ 2018 static void 2019 apic_setspl(int ipl) 2020 { 2021 2022 #if defined(__amd64) 2023 setcr8((ulong_t)apic_cr8pri[ipl]); 2024 #else 2025 apicadr[APIC_TASK_REG] = apic_ipltopri[ipl]; 2026 #endif 2027 2028 /* interrupts at ipl above this cannot be in progress */ 2029 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; 2030 /* 2031 * this is a patch fix for the ALR QSMP P5 machine, so that interrupts 2032 * have enough time to come in before the priority is raised again 2033 * during the idle() loop. 2034 */ 2035 if (apic_setspl_delay) 2036 (void) get_apic_pri(); 2037 } 2038 2039 /* 2040 * trigger a software interrupt at the given IPL 2041 */ 2042 static void 2043 apic_set_softintr(int ipl) 2044 { 2045 int vector; 2046 uint_t flag; 2047 2048 vector = apic_resv_vector[ipl]; 2049 2050 flag = intr_clear(); 2051 2052 while (get_apic_cmd1() & AV_PENDING) 2053 apic_ret(); 2054 2055 /* generate interrupt at vector on itself only */ 2056 apicadr[APIC_INT_CMD1] = AV_SH_SELF | vector; 2057 2058 intr_restore(flag); 2059 } 2060 2061 /* 2062 * generates an interprocessor interrupt to another CPU 2063 */ 2064 static void 2065 apic_send_ipi(int cpun, int ipl) 2066 { 2067 int vector; 2068 uint_t flag; 2069 2070 vector = apic_resv_vector[ipl]; 2071 2072 flag = intr_clear(); 2073 2074 while (get_apic_cmd1() & AV_PENDING) 2075 apic_ret(); 2076 2077 apicadr[APIC_INT_CMD2] = 2078 apic_cpus[cpun].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2079 apicadr[APIC_INT_CMD1] = vector; 2080 2081 intr_restore(flag); 2082 } 2083 2084 2085 /*ARGSUSED*/ 2086 static void 2087 apic_set_idlecpu(processorid_t cpun) 2088 { 2089 } 2090 2091 /*ARGSUSED*/ 2092 static void 2093 apic_unset_idlecpu(processorid_t cpun) 2094 { 2095 } 2096 2097 2098 static void 2099 apic_ret() 2100 { 2101 } 2102 2103 static int 2104 get_apic_cmd1() 2105 { 2106 return (apicadr[APIC_INT_CMD1]); 2107 } 2108 2109 static int 2110 get_apic_pri() 2111 { 2112 #if defined(__amd64) 2113 return ((int)getcr8()); 2114 #else 2115 return (apicadr[APIC_TASK_REG]); 2116 #endif 2117 } 2118 2119 /* 2120 * If apic_coarse_time == 1, then apic_gettime() is used instead of 2121 * apic_gethrtime(). This is used for performance instead of accuracy. 2122 */ 2123 2124 static hrtime_t 2125 apic_gettime() 2126 { 2127 int old_hrtime_stamp; 2128 hrtime_t temp; 2129 2130 /* 2131 * In one-shot mode, we do not keep time, so if anyone 2132 * calls psm_gettime() directly, we vector over to 2133 * gethrtime(). 2134 * one-shot mode MUST NOT be enabled if this psm is the source of 2135 * hrtime. 2136 */ 2137 2138 if (apic_oneshot) 2139 return (gethrtime()); 2140 2141 2142 gettime_again: 2143 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2144 apic_ret(); 2145 2146 temp = apic_nsec_since_boot; 2147 2148 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2149 goto gettime_again; 2150 } 2151 return (temp); 2152 } 2153 2154 /* 2155 * Here we return the number of nanoseconds since booting. Note every 2156 * clock interrupt increments apic_nsec_since_boot by the appropriate 2157 * amount. 2158 */ 2159 static hrtime_t 2160 apic_gethrtime() 2161 { 2162 int curr_timeval, countval, elapsed_ticks, oflags; 2163 int old_hrtime_stamp, status; 2164 hrtime_t temp; 2165 uchar_t cpun; 2166 2167 2168 /* 2169 * In one-shot mode, we do not keep time, so if anyone 2170 * calls psm_gethrtime() directly, we vector over to 2171 * gethrtime(). 2172 * one-shot mode MUST NOT be enabled if this psm is the source of 2173 * hrtime. 2174 */ 2175 2176 if (apic_oneshot) 2177 return (gethrtime()); 2178 2179 oflags = intr_clear(); /* prevent migration */ 2180 2181 cpun = (uchar_t)((uint_t)apicadr[APIC_LID_REG] >> APIC_ID_BIT_OFFSET); 2182 2183 lock_set(&apic_gethrtime_lock); 2184 2185 gethrtime_again: 2186 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 2187 apic_ret(); 2188 2189 /* 2190 * Check to see which CPU we are on. Note the time is kept on 2191 * the local APIC of CPU 0. If on CPU 0, simply read the current 2192 * counter. If on another CPU, issue a remote read command to CPU 0. 2193 */ 2194 if (cpun == apic_cpus[0].aci_local_id) { 2195 countval = apicadr[APIC_CURR_COUNT]; 2196 } else { 2197 while (get_apic_cmd1() & AV_PENDING) 2198 apic_ret(); 2199 2200 apicadr[APIC_INT_CMD2] = 2201 apic_cpus[0].aci_local_id << APIC_ICR_ID_BIT_OFFSET; 2202 apicadr[APIC_INT_CMD1] = APIC_CURR_ADD|AV_REMOTE; 2203 2204 while ((status = get_apic_cmd1()) & AV_READ_PENDING) 2205 apic_ret(); 2206 2207 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 2208 countval = apicadr[APIC_REMOTE_READ]; 2209 else { /* 0 = invalid */ 2210 apic_remote_hrterr++; 2211 /* 2212 * return last hrtime right now, will need more 2213 * testing if change to retry 2214 */ 2215 temp = apic_last_hrtime; 2216 2217 lock_clear(&apic_gethrtime_lock); 2218 2219 intr_restore(oflags); 2220 2221 return (temp); 2222 } 2223 } 2224 if (countval > last_count_read) 2225 countval = 0; 2226 else 2227 last_count_read = countval; 2228 2229 elapsed_ticks = apic_hertz_count - countval; 2230 2231 curr_timeval = elapsed_ticks * apic_nsec_per_tick; 2232 temp = apic_nsec_since_boot + curr_timeval; 2233 2234 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 2235 /* we might have clobbered last_count_read. Restore it */ 2236 last_count_read = apic_hertz_count; 2237 goto gethrtime_again; 2238 } 2239 2240 if (temp < apic_last_hrtime) { 2241 /* return last hrtime if error occurs */ 2242 apic_hrtime_error++; 2243 temp = apic_last_hrtime; 2244 } 2245 else 2246 apic_last_hrtime = temp; 2247 2248 lock_clear(&apic_gethrtime_lock); 2249 intr_restore(oflags); 2250 2251 return (temp); 2252 } 2253 2254 /* apic NMI handler */ 2255 /*ARGSUSED*/ 2256 static void 2257 apic_nmi_intr(caddr_t arg) 2258 { 2259 if (apic_shutdown_processors) { 2260 apic_disable_local_apic(); 2261 return; 2262 } 2263 2264 if (lock_try(&apic_nmi_lock)) { 2265 if (apic_kmdb_on_nmi) { 2266 if (psm_debugger() == 0) { 2267 cmn_err(CE_PANIC, 2268 "NMI detected, kmdb is not available."); 2269 } else { 2270 debug_enter("\nNMI detected, entering kmdb.\n"); 2271 } 2272 } else { 2273 if (apic_panic_on_nmi) { 2274 /* Keep panic from entering kmdb. */ 2275 nopanicdebug = 1; 2276 cmn_err(CE_PANIC, "pcplusmp: NMI received"); 2277 } else { 2278 /* 2279 * prom_printf is the best shot we have 2280 * of something which is problem free from 2281 * high level/NMI type of interrupts 2282 */ 2283 prom_printf("pcplusmp: NMI received\n"); 2284 apic_error |= APIC_ERR_NMI; 2285 apic_num_nmis++; 2286 } 2287 } 2288 lock_clear(&apic_nmi_lock); 2289 } 2290 } 2291 2292 /* 2293 * Add mask bits to disable interrupt vector from happening 2294 * at or above IPL. In addition, it should remove mask bits 2295 * to enable interrupt vectors below the given IPL. 2296 * 2297 * Both add and delspl are complicated by the fact that different interrupts 2298 * may share IRQs. This can happen in two ways. 2299 * 1. The same H/W line is shared by more than 1 device 2300 * 1a. with interrupts at different IPLs 2301 * 1b. with interrupts at same IPL 2302 * 2. We ran out of vectors at a given IPL and started sharing vectors. 2303 * 1b and 2 should be handled gracefully, except for the fact some ISRs 2304 * will get called often when no interrupt is pending for the device. 2305 * For 1a, we just hope that the machine blows up with the person who 2306 * set it up that way!. In the meantime, we handle it at the higher IPL. 2307 */ 2308 /*ARGSUSED*/ 2309 static int 2310 apic_addspl(int irqno, int ipl, int min_ipl, int max_ipl) 2311 { 2312 uchar_t vector; 2313 int iflag; 2314 apic_irq_t *irqptr, *irqheadptr; 2315 int irqindex; 2316 2317 ASSERT(max_ipl <= UCHAR_MAX); 2318 irqindex = IRQINDEX(irqno); 2319 2320 if ((irqindex == -1) || (!apic_irq_table[irqindex])) 2321 return (PSM_FAILURE); 2322 2323 irqptr = irqheadptr = apic_irq_table[irqindex]; 2324 2325 DDI_INTR_IMPLDBG((CE_CONT, "apic_addspl: dip=0x%p type=%d irqno=0x%x " 2326 "vector=0x%x\n", (void *)irqptr->airq_dip, 2327 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2328 2329 while (irqptr) { 2330 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2331 break; 2332 irqptr = irqptr->airq_next; 2333 } 2334 irqptr->airq_share++; 2335 2336 /* return if it is not hardware interrupt */ 2337 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2338 return (PSM_SUCCESS); 2339 2340 /* Or if there are more interupts at a higher IPL */ 2341 if (ipl != max_ipl) 2342 return (PSM_SUCCESS); 2343 2344 /* 2345 * if apic_picinit() has not been called yet, just return. 2346 * At the end of apic_picinit(), we will call setup_io_intr(). 2347 */ 2348 2349 if (!apic_flag) 2350 return (PSM_SUCCESS); 2351 2352 iflag = intr_clear(); 2353 2354 /* 2355 * Upgrade vector if max_ipl is not earlier ipl. If we cannot allocate, 2356 * return failure. Not very elegant, but then we hope the 2357 * machine will blow up with ... 2358 */ 2359 if (irqptr->airq_ipl != max_ipl) { 2360 vector = apic_allocate_vector(max_ipl, irqindex, 1); 2361 if (vector == 0) { 2362 intr_restore(iflag); 2363 irqptr->airq_share--; 2364 return (PSM_FAILURE); 2365 } 2366 irqptr = irqheadptr; 2367 apic_mark_vector(irqptr->airq_vector, vector); 2368 while (irqptr) { 2369 irqptr->airq_vector = vector; 2370 irqptr->airq_ipl = (uchar_t)max_ipl; 2371 /* 2372 * reprogram irq being added and every one else 2373 * who is not in the UNINIT state 2374 */ 2375 if ((VIRTIRQ(irqindex, irqptr->airq_share_id) == 2376 irqno) || (irqptr->airq_temp_cpu != IRQ_UNINIT)) { 2377 apic_record_rdt_entry(irqptr, irqindex); 2378 (void) apic_setup_io_intr(irqptr, irqindex); 2379 } 2380 irqptr = irqptr->airq_next; 2381 } 2382 intr_restore(iflag); 2383 return (PSM_SUCCESS); 2384 } 2385 2386 ASSERT(irqptr); 2387 (void) apic_setup_io_intr(irqptr, irqindex); 2388 intr_restore(iflag); 2389 return (PSM_SUCCESS); 2390 } 2391 2392 /* 2393 * Recompute mask bits for the given interrupt vector. 2394 * If there is no interrupt servicing routine for this 2395 * vector, this function should disable interrupt vector 2396 * from happening at all IPLs. If there are still 2397 * handlers using the given vector, this function should 2398 * disable the given vector from happening below the lowest 2399 * IPL of the remaining hadlers. 2400 */ 2401 /*ARGSUSED*/ 2402 static int 2403 apic_delspl(int irqno, int ipl, int min_ipl, int max_ipl) 2404 { 2405 uchar_t vector, bind_cpu; 2406 int iflag, intin, irqindex; 2407 volatile int32_t *ioapic; 2408 apic_irq_t *irqptr, *irqheadptr; 2409 2410 irqindex = IRQINDEX(irqno); 2411 irqptr = irqheadptr = apic_irq_table[irqindex]; 2412 2413 DDI_INTR_IMPLDBG((CE_CONT, "apic_delspl: dip=0x%p type=%d irqno=0x%x " 2414 "vector=0x%x\n", (void *)irqptr->airq_dip, 2415 irqptr->airq_mps_intr_index, irqno, irqptr->airq_vector)); 2416 2417 while (irqptr) { 2418 if (VIRTIRQ(irqindex, irqptr->airq_share_id) == irqno) 2419 break; 2420 irqptr = irqptr->airq_next; 2421 } 2422 ASSERT(irqptr); 2423 2424 irqptr->airq_share--; 2425 2426 if (ipl < max_ipl) 2427 return (PSM_SUCCESS); 2428 2429 /* return if it is not hardware interrupt */ 2430 if (irqptr->airq_mps_intr_index == RESERVE_INDEX) 2431 return (PSM_SUCCESS); 2432 2433 if (!apic_flag) { 2434 /* 2435 * Clear irq_struct. If two devices shared an intpt 2436 * line & 1 unloaded before picinit, we are hosed. But, then 2437 * we hope the machine will ... 2438 */ 2439 irqptr->airq_mps_intr_index = FREE_INDEX; 2440 irqptr->airq_temp_cpu = IRQ_UNINIT; 2441 apic_free_vector(irqptr->airq_vector); 2442 return (PSM_SUCCESS); 2443 } 2444 /* 2445 * Downgrade vector to new max_ipl if needed.If we cannot allocate, 2446 * use old IPL. Not very elegant, but then we hope ... 2447 */ 2448 if ((irqptr->airq_ipl != max_ipl) && (max_ipl != PSM_INVALID_IPL)) { 2449 apic_irq_t *irqp; 2450 if (vector = apic_allocate_vector(max_ipl, irqno, 1)) { 2451 apic_mark_vector(irqheadptr->airq_vector, vector); 2452 irqp = irqheadptr; 2453 while (irqp) { 2454 irqp->airq_vector = vector; 2455 irqp->airq_ipl = (uchar_t)max_ipl; 2456 if (irqp->airq_temp_cpu != IRQ_UNINIT) { 2457 apic_record_rdt_entry(irqp, irqindex); 2458 (void) apic_setup_io_intr(irqp, 2459 irqindex); 2460 } 2461 irqp = irqp->airq_next; 2462 } 2463 } 2464 } 2465 2466 if (irqptr->airq_share) 2467 return (PSM_SUCCESS); 2468 2469 ioapic = apicioadr[irqptr->airq_ioapicindex]; 2470 intin = irqptr->airq_intin_no; 2471 iflag = intr_clear(); 2472 lock_set(&apic_ioapic_lock); 2473 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * intin; 2474 ioapic[APIC_IO_DATA] = AV_MASK; 2475 2476 /* Disable the MSI/X vector */ 2477 if (APIC_IS_MSI_OR_MSIX_INDEX(irqptr->airq_mps_intr_index)) { 2478 int type = (irqptr->airq_mps_intr_index == MSI_INDEX) ? 2479 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 2480 2481 /* 2482 * Make sure we only disable on the last 2483 * of the multi-MSI support 2484 */ 2485 if (i_ddi_intr_get_current_nintrs(irqptr->airq_dip) == 1) { 2486 (void) apic_pci_msi_unconfigure(irqptr->airq_dip, 2487 type, irqptr->airq_ioapicindex); 2488 (void) apic_pci_msi_disable_mode(irqptr->airq_dip, 2489 type, irqptr->airq_ioapicindex); 2490 } 2491 } 2492 2493 if (max_ipl == PSM_INVALID_IPL) { 2494 ASSERT(irqheadptr == irqptr); 2495 bind_cpu = irqptr->airq_temp_cpu; 2496 if (((uchar_t)bind_cpu != IRQ_UNBOUND) && 2497 ((uchar_t)bind_cpu != IRQ_UNINIT)) { 2498 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 2499 if (bind_cpu & IRQ_USER_BOUND) { 2500 /* If hardbound, temp_cpu == cpu */ 2501 bind_cpu &= ~IRQ_USER_BOUND; 2502 apic_cpus[bind_cpu].aci_bound--; 2503 } else 2504 apic_cpus[bind_cpu].aci_temp_bound--; 2505 } 2506 lock_clear(&apic_ioapic_lock); 2507 intr_restore(iflag); 2508 irqptr->airq_temp_cpu = IRQ_UNINIT; 2509 irqptr->airq_mps_intr_index = FREE_INDEX; 2510 apic_free_vector(irqptr->airq_vector); 2511 return (PSM_SUCCESS); 2512 } 2513 lock_clear(&apic_ioapic_lock); 2514 intr_restore(iflag); 2515 2516 mutex_enter(&airq_mutex); 2517 if ((irqptr == apic_irq_table[irqindex])) { 2518 apic_irq_t *oldirqptr; 2519 /* Move valid irq entry to the head */ 2520 irqheadptr = oldirqptr = irqptr; 2521 irqptr = irqptr->airq_next; 2522 ASSERT(irqptr); 2523 while (irqptr) { 2524 if (irqptr->airq_mps_intr_index != FREE_INDEX) 2525 break; 2526 oldirqptr = irqptr; 2527 irqptr = irqptr->airq_next; 2528 } 2529 /* remove all invalid ones from the beginning */ 2530 apic_irq_table[irqindex] = irqptr; 2531 /* 2532 * and link them back after the head. The invalid ones 2533 * begin with irqheadptr and end at oldirqptr 2534 */ 2535 oldirqptr->airq_next = irqptr->airq_next; 2536 irqptr->airq_next = irqheadptr; 2537 } 2538 mutex_exit(&airq_mutex); 2539 2540 irqptr->airq_temp_cpu = IRQ_UNINIT; 2541 irqptr->airq_mps_intr_index = FREE_INDEX; 2542 return (PSM_SUCCESS); 2543 } 2544 2545 /* 2546 * Return HW interrupt number corresponding to the given IPL 2547 */ 2548 /*ARGSUSED*/ 2549 static int 2550 apic_softlvl_to_irq(int ipl) 2551 { 2552 /* 2553 * Do not use apic to trigger soft interrupt. 2554 * It will cause the system to hang when 2 hardware interrupts 2555 * at the same priority with the softint are already accepted 2556 * by the apic. Cause the AV_PENDING bit will not be cleared 2557 * until one of the hardware interrupt is eoi'ed. If we need 2558 * to send an ipi at this time, we will end up looping forever 2559 * to wait for the AV_PENDING bit to clear. 2560 */ 2561 return (PSM_SV_SOFTWARE); 2562 } 2563 2564 static int 2565 apic_post_cpu_start() 2566 { 2567 int i, cpun; 2568 apic_irq_t *irq_ptr; 2569 2570 apic_init_intr(); 2571 2572 /* 2573 * since some systems don't enable the internal cache on the non-boot 2574 * cpus, so we have to enable them here 2575 */ 2576 setcr0(getcr0() & ~(0x60000000)); 2577 2578 while (get_apic_cmd1() & AV_PENDING) 2579 apic_ret(); 2580 2581 cpun = psm_get_cpu_id(); 2582 apic_cpus[cpun].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE; 2583 2584 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 2585 irq_ptr = apic_irq_table[i]; 2586 if ((irq_ptr == NULL) || 2587 ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) != cpun)) 2588 continue; 2589 2590 while (irq_ptr) { 2591 if (irq_ptr->airq_temp_cpu != IRQ_UNINIT) 2592 (void) apic_rebind(irq_ptr, cpun, 1, IMMEDIATE); 2593 irq_ptr = irq_ptr->airq_next; 2594 } 2595 } 2596 2597 return (PSM_SUCCESS); 2598 } 2599 2600 processorid_t 2601 apic_get_next_processorid(processorid_t cpu_id) 2602 { 2603 2604 int i; 2605 2606 if (cpu_id == -1) 2607 return ((processorid_t)0); 2608 2609 for (i = cpu_id + 1; i < NCPU; i++) { 2610 if (CPU_IN_SET(apic_cpumask, i)) 2611 return (i); 2612 } 2613 2614 return ((processorid_t)-1); 2615 } 2616 2617 2618 /* 2619 * type == -1 indicates it is an internal request. Do not change 2620 * resv_vector for these requests 2621 */ 2622 static int 2623 apic_get_ipivect(int ipl, int type) 2624 { 2625 uchar_t vector; 2626 int irq; 2627 2628 if (irq = apic_allocate_irq(APIC_VECTOR(ipl))) { 2629 if (vector = apic_allocate_vector(ipl, irq, 1)) { 2630 apic_irq_table[irq]->airq_mps_intr_index = 2631 RESERVE_INDEX; 2632 apic_irq_table[irq]->airq_vector = vector; 2633 if (type != -1) { 2634 apic_resv_vector[ipl] = vector; 2635 } 2636 return (irq); 2637 } 2638 } 2639 apic_error |= APIC_ERR_GET_IPIVECT_FAIL; 2640 return (-1); /* shouldn't happen */ 2641 } 2642 2643 static int 2644 apic_getclkirq(int ipl) 2645 { 2646 int irq; 2647 2648 if ((irq = apic_get_ipivect(ipl, -1)) == -1) 2649 return (-1); 2650 /* 2651 * Note the vector in apic_clkvect for per clock handling. 2652 */ 2653 apic_clkvect = apic_irq_table[irq]->airq_vector - APIC_BASE_VECT; 2654 APIC_VERBOSE_IOAPIC((CE_NOTE, "get_clkirq: vector = %x\n", 2655 apic_clkvect)); 2656 return (irq); 2657 } 2658 2659 /* 2660 * Return the number of APIC clock ticks elapsed for 8245 to decrement 2661 * (APIC_TIME_COUNT + pit_ticks_adj) ticks. 2662 */ 2663 static uint_t 2664 apic_calibrate(volatile uint32_t *addr, uint16_t *pit_ticks_adj) 2665 { 2666 uint8_t pit_tick_lo; 2667 uint16_t pit_tick, target_pit_tick; 2668 uint32_t start_apic_tick, end_apic_tick; 2669 int iflag; 2670 2671 addr += APIC_CURR_COUNT; 2672 2673 iflag = intr_clear(); 2674 2675 do { 2676 pit_tick_lo = inb(PITCTR0_PORT); 2677 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2678 } while (pit_tick < APIC_TIME_MIN || 2679 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 2680 2681 /* 2682 * Wait for the 8254 to decrement by 5 ticks to ensure 2683 * we didn't start in the middle of a tick. 2684 * Compare with 0x10 for the wrap around case. 2685 */ 2686 target_pit_tick = pit_tick - 5; 2687 do { 2688 pit_tick_lo = inb(PITCTR0_PORT); 2689 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2690 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2691 2692 start_apic_tick = *addr; 2693 2694 /* 2695 * Wait for the 8254 to decrement by 2696 * (APIC_TIME_COUNT + pit_ticks_adj) ticks 2697 */ 2698 target_pit_tick = pit_tick - APIC_TIME_COUNT; 2699 do { 2700 pit_tick_lo = inb(PITCTR0_PORT); 2701 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 2702 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 2703 2704 end_apic_tick = *addr; 2705 2706 *pit_ticks_adj = target_pit_tick - pit_tick; 2707 2708 intr_restore(iflag); 2709 2710 return (start_apic_tick - end_apic_tick); 2711 } 2712 2713 /* 2714 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 2715 * frequency. Note at this stage in the boot sequence, the boot processor 2716 * is the only active processor. 2717 * hertz value of 0 indicates a one-shot mode request. In this case 2718 * the function returns the resolution (in nanoseconds) for the hardware 2719 * timer interrupt. If one-shot mode capability is not available, 2720 * the return value will be 0. apic_enable_oneshot is a global switch 2721 * for disabling the functionality. 2722 * A non-zero positive value for hertz indicates a periodic mode request. 2723 * In this case the hardware will be programmed to generate clock interrupts 2724 * at hertz frequency and returns the resolution of interrupts in 2725 * nanosecond. 2726 */ 2727 2728 static int 2729 apic_clkinit(int hertz) 2730 { 2731 2732 uint_t apic_ticks = 0; 2733 uint_t pit_time; 2734 int ret; 2735 uint16_t pit_ticks_adj; 2736 static int firsttime = 1; 2737 2738 if (firsttime) { 2739 /* first time calibrate */ 2740 2741 apicadr[APIC_DIVIDE_REG] = 0x0; 2742 apicadr[APIC_INIT_COUNT] = APIC_MAXVAL; 2743 2744 /* set periodic interrupt based on CLKIN */ 2745 apicadr[APIC_LOCAL_TIMER] = 2746 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2747 tenmicrosec(); 2748 2749 apic_ticks = apic_calibrate(apicadr, &pit_ticks_adj); 2750 2751 apicadr[APIC_LOCAL_TIMER] = 2752 (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 2753 /* 2754 * pit time is the amount of real time (in nanoseconds ) it took 2755 * the 8254 to decrement (APIC_TIME_COUNT + pit_ticks_adj) ticks 2756 */ 2757 pit_time = ((longlong_t)(APIC_TIME_COUNT + 2758 pit_ticks_adj) * NANOSEC) / PIT_HZ; 2759 2760 /* 2761 * Determine the number of nanoseconds per APIC clock tick 2762 * and then determine how many APIC ticks to interrupt at the 2763 * desired frequency 2764 */ 2765 apic_nsec_per_tick = pit_time / apic_ticks; 2766 if (apic_nsec_per_tick == 0) 2767 apic_nsec_per_tick = 1; 2768 2769 /* the interval timer initial count is 32 bit max */ 2770 apic_nsec_max = (hrtime_t)apic_nsec_per_tick * APIC_MAXVAL; 2771 firsttime = 0; 2772 } 2773 2774 if (hertz != 0) { 2775 /* periodic */ 2776 apic_nsec_per_intr = NANOSEC / hertz; 2777 apic_hertz_count = (longlong_t)apic_nsec_per_intr / 2778 apic_nsec_per_tick; 2779 apic_sample_factor_redistribution = hertz + 1; 2780 } 2781 2782 apic_int_busy_mark = (apic_int_busy_mark * 2783 apic_sample_factor_redistribution) / 100; 2784 apic_int_free_mark = (apic_int_free_mark * 2785 apic_sample_factor_redistribution) / 100; 2786 apic_diff_for_redistribution = (apic_diff_for_redistribution * 2787 apic_sample_factor_redistribution) / 100; 2788 2789 if (hertz == 0) { 2790 /* requested one_shot */ 2791 if (!apic_oneshot_enable) 2792 return (0); 2793 apic_oneshot = 1; 2794 ret = (int)apic_nsec_per_tick; 2795 } else { 2796 /* program the local APIC to interrupt at the given frequency */ 2797 apicadr[APIC_INIT_COUNT] = apic_hertz_count; 2798 apicadr[APIC_LOCAL_TIMER] = 2799 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 2800 apic_oneshot = 0; 2801 ret = NANOSEC / hertz; 2802 } 2803 2804 return (ret); 2805 2806 } 2807 2808 /* 2809 * apic_preshutdown: 2810 * Called early in shutdown whilst we can still access filesystems to do 2811 * things like loading modules which will be required to complete shutdown 2812 * after filesystems are all unmounted. 2813 */ 2814 static void 2815 apic_preshutdown(int cmd, int fcn) 2816 { 2817 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 2818 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 2819 2820 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2821 return; 2822 } 2823 } 2824 2825 static void 2826 apic_shutdown(int cmd, int fcn) 2827 { 2828 int iflag, restarts, attempts; 2829 int i, j; 2830 volatile int32_t *ioapic; 2831 uchar_t byte; 2832 2833 /* Send NMI to all CPUs except self to do per processor shutdown */ 2834 iflag = intr_clear(); 2835 while (get_apic_cmd1() & AV_PENDING) 2836 apic_ret(); 2837 apic_shutdown_processors = 1; 2838 apicadr[APIC_INT_CMD1] = AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF; 2839 2840 /* restore cmos shutdown byte before reboot */ 2841 if (apic_cmos_ssb_set) { 2842 outb(CMOS_ADDR, SSB); 2843 outb(CMOS_DATA, 0); 2844 } 2845 /* Disable the I/O APIC redirection entries */ 2846 for (j = 0; j < apic_io_max; j++) { 2847 int intin_max; 2848 ioapic = apicioadr[j]; 2849 ioapic[APIC_IO_REG] = APIC_VERS_CMD; 2850 /* Bits 23-16 define the maximum redirection entries */ 2851 intin_max = (ioapic[APIC_IO_DATA] >> 16) & 0xff; 2852 for (i = 0; i < intin_max; i++) { 2853 ioapic[APIC_IO_REG] = APIC_RDT_CMD + 2 * i; 2854 ioapic[APIC_IO_DATA] = AV_MASK; 2855 } 2856 } 2857 2858 /* disable apic mode if imcr present */ 2859 if (apic_imcrp) { 2860 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 2861 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 2862 } 2863 2864 apic_disable_local_apic(); 2865 2866 intr_restore(iflag); 2867 2868 if ((cmd != A_SHUTDOWN) || (fcn != AD_POWEROFF)) { 2869 return; 2870 } 2871 2872 switch (apic_poweroff_method) { 2873 case APIC_POWEROFF_VIA_RTC: 2874 2875 /* select the extended NVRAM bank in the RTC */ 2876 outb(CMOS_ADDR, RTC_REGA); 2877 byte = inb(CMOS_DATA); 2878 outb(CMOS_DATA, (byte | EXT_BANK)); 2879 2880 outb(CMOS_ADDR, PFR_REG); 2881 2882 /* for Predator must toggle the PAB bit */ 2883 byte = inb(CMOS_DATA); 2884 2885 /* 2886 * clear power active bar, wakeup alarm and 2887 * kickstart 2888 */ 2889 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 2890 outb(CMOS_DATA, byte); 2891 2892 /* delay before next write */ 2893 drv_usecwait(1000); 2894 2895 /* for S40 the following would suffice */ 2896 byte = inb(CMOS_DATA); 2897 2898 /* power active bar control bit */ 2899 byte |= PAB_CBIT; 2900 outb(CMOS_DATA, byte); 2901 2902 break; 2903 2904 case APIC_POWEROFF_VIA_ASPEN_BMC: 2905 restarts = 0; 2906 restart_aspen_bmc: 2907 if (++restarts == 3) 2908 break; 2909 attempts = 0; 2910 do { 2911 byte = inb(MISMIC_FLAG_REGISTER); 2912 byte &= MISMIC_BUSY_MASK; 2913 if (byte != 0) { 2914 drv_usecwait(1000); 2915 if (attempts >= 3) 2916 goto restart_aspen_bmc; 2917 ++attempts; 2918 } 2919 } while (byte != 0); 2920 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 2921 byte = inb(MISMIC_FLAG_REGISTER); 2922 byte |= 0x1; 2923 outb(MISMIC_FLAG_REGISTER, byte); 2924 i = 0; 2925 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 2926 i++) { 2927 attempts = 0; 2928 do { 2929 byte = inb(MISMIC_FLAG_REGISTER); 2930 byte &= MISMIC_BUSY_MASK; 2931 if (byte != 0) { 2932 drv_usecwait(1000); 2933 if (attempts >= 3) 2934 goto restart_aspen_bmc; 2935 ++attempts; 2936 } 2937 } while (byte != 0); 2938 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 2939 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 2940 byte = inb(MISMIC_FLAG_REGISTER); 2941 byte |= 0x1; 2942 outb(MISMIC_FLAG_REGISTER, byte); 2943 } 2944 break; 2945 2946 case APIC_POWEROFF_VIA_SITKA_BMC: 2947 restarts = 0; 2948 restart_sitka_bmc: 2949 if (++restarts == 3) 2950 break; 2951 attempts = 0; 2952 do { 2953 byte = inb(SMS_STATUS_REGISTER); 2954 byte &= SMS_STATE_MASK; 2955 if ((byte == SMS_READ_STATE) || 2956 (byte == SMS_WRITE_STATE)) { 2957 drv_usecwait(1000); 2958 if (attempts >= 3) 2959 goto restart_sitka_bmc; 2960 ++attempts; 2961 } 2962 } while ((byte == SMS_READ_STATE) || 2963 (byte == SMS_WRITE_STATE)); 2964 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 2965 i = 0; 2966 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 2967 i++) { 2968 attempts = 0; 2969 do { 2970 byte = inb(SMS_STATUS_REGISTER); 2971 byte &= SMS_IBF_MASK; 2972 if (byte != 0) { 2973 drv_usecwait(1000); 2974 if (attempts >= 3) 2975 goto restart_sitka_bmc; 2976 ++attempts; 2977 } 2978 } while (byte != 0); 2979 outb(sitka_bmc[i].port, sitka_bmc[i].data); 2980 } 2981 break; 2982 2983 case APIC_POWEROFF_NONE: 2984 2985 /* If no APIC direct method, we will try using ACPI */ 2986 if (apic_enable_acpi) { 2987 if (acpi_poweroff() == 1) 2988 return; 2989 } else 2990 return; 2991 2992 break; 2993 } 2994 /* 2995 * Wait a limited time here for power to go off. 2996 * If the power does not go off, then there was a 2997 * problem and we should continue to the halt which 2998 * prints a message for the user to press a key to 2999 * reboot. 3000 */ 3001 drv_usecwait(7000000); /* wait seven seconds */ 3002 3003 } 3004 3005 /* 3006 * Try and disable all interrupts. We just assign interrupts to other 3007 * processors based on policy. If any were bound by user request, we 3008 * let them continue and return failure. We do not bother to check 3009 * for cache affinity while rebinding. 3010 */ 3011 3012 static int 3013 apic_disable_intr(processorid_t cpun) 3014 { 3015 int bind_cpu = 0, i, hardbound = 0, iflag; 3016 apic_irq_t *irq_ptr; 3017 3018 iflag = intr_clear(); 3019 lock_set(&apic_ioapic_lock); 3020 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE; 3021 lock_clear(&apic_ioapic_lock); 3022 intr_restore(iflag); 3023 apic_cpus[cpun].aci_curipl = 0; 3024 i = apic_min_device_irq; 3025 for (; i <= apic_max_device_irq; i++) { 3026 /* 3027 * If there are bound interrupts on this cpu, then 3028 * rebind them to other processors. 3029 */ 3030 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3031 ASSERT((irq_ptr->airq_temp_cpu == IRQ_UNBOUND) || 3032 (irq_ptr->airq_temp_cpu == IRQ_UNINIT) || 3033 ((irq_ptr->airq_temp_cpu & ~IRQ_USER_BOUND) < 3034 apic_nproc)); 3035 3036 if (irq_ptr->airq_temp_cpu == (cpun | IRQ_USER_BOUND)) { 3037 hardbound = 1; 3038 continue; 3039 } 3040 3041 if (irq_ptr->airq_temp_cpu == cpun) { 3042 do { 3043 apic_next_bind_cpu += 2; 3044 bind_cpu = apic_next_bind_cpu / 2; 3045 if (bind_cpu >= apic_nproc) { 3046 apic_next_bind_cpu = 1; 3047 bind_cpu = 0; 3048 3049 } 3050 } while (apic_rebind_all(irq_ptr, bind_cpu, 1)); 3051 } 3052 } 3053 } 3054 if (hardbound) { 3055 cmn_err(CE_WARN, "Could not disable interrupts on %d" 3056 "due to user bound interrupts", cpun); 3057 return (PSM_FAILURE); 3058 } 3059 else 3060 return (PSM_SUCCESS); 3061 } 3062 3063 static void 3064 apic_enable_intr(processorid_t cpun) 3065 { 3066 int i, iflag; 3067 apic_irq_t *irq_ptr; 3068 3069 iflag = intr_clear(); 3070 lock_set(&apic_ioapic_lock); 3071 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE; 3072 lock_clear(&apic_ioapic_lock); 3073 intr_restore(iflag); 3074 3075 i = apic_min_device_irq; 3076 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3077 if ((irq_ptr = apic_irq_table[i]) != NULL) { 3078 if ((irq_ptr->airq_cpu & ~IRQ_USER_BOUND) == cpun) { 3079 (void) apic_rebind_all(irq_ptr, 3080 irq_ptr->airq_cpu, 1); 3081 } 3082 } 3083 } 3084 } 3085 3086 /* 3087 * apic_introp_xlate() replaces apic_translate_irq() and is 3088 * called only from apic_intr_ops(). With the new ADII framework, 3089 * the priority can no longer be retrived through i_ddi_get_intrspec(). 3090 * It has to be passed in from the caller. 3091 */ 3092 int 3093 apic_introp_xlate(dev_info_t *dip, struct intrspec *ispec, int type) 3094 { 3095 char dev_type[16]; 3096 int dev_len, pci_irq, newirq, bustype, devid, busid, i; 3097 int irqno = ispec->intrspec_vec; 3098 ddi_acc_handle_t cfg_handle; 3099 uchar_t ipin; 3100 struct apic_io_intr *intrp; 3101 iflag_t intr_flag; 3102 APIC_HEADER *hp; 3103 MADT_INTERRUPT_OVERRIDE *isop; 3104 apic_irq_t *airqp; 3105 3106 DDI_INTR_IMPLDBG((CE_CONT, "apic_introp_xlate: dip=0x%p name=%s " 3107 "type=%d irqno=0x%x\n", (void *)dip, ddi_get_name(dip), type, 3108 irqno)); 3109 3110 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3111 if ((airqp = apic_find_irq(dip, ispec, type)) != NULL) 3112 return (apic_vector_to_irq[airqp->airq_vector]); 3113 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3114 NULL, type)); 3115 } 3116 3117 bustype = 0; 3118 3119 /* check if we have already translated this irq */ 3120 mutex_enter(&airq_mutex); 3121 newirq = apic_min_device_irq; 3122 for (; newirq <= apic_max_device_irq; newirq++) { 3123 airqp = apic_irq_table[newirq]; 3124 while (airqp) { 3125 if ((airqp->airq_dip == dip) && 3126 (airqp->airq_origirq == irqno) && 3127 (airqp->airq_mps_intr_index != FREE_INDEX)) { 3128 3129 mutex_exit(&airq_mutex); 3130 return (VIRTIRQ(newirq, airqp->airq_share_id)); 3131 } 3132 airqp = airqp->airq_next; 3133 } 3134 } 3135 mutex_exit(&airq_mutex); 3136 3137 if (apic_defconf) 3138 goto defconf; 3139 3140 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) 3141 goto nonpci; 3142 3143 dev_len = sizeof (dev_type); 3144 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip), 3145 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type, 3146 &dev_len) != DDI_PROP_SUCCESS) { 3147 goto nonpci; 3148 } 3149 3150 if ((strcmp(dev_type, "pci") == 0) || 3151 (strcmp(dev_type, "pciex") == 0)) { 3152 /* pci device */ 3153 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0) 3154 goto nonpci; 3155 if (busid == 0 && apic_pci_bus_total == 1) 3156 busid = (int)apic_single_pci_busid; 3157 3158 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS) 3159 goto nonpci; 3160 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA; 3161 pci_config_teardown(&cfg_handle); 3162 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3163 if (apic_acpi_translate_pci_irq(dip, busid, devid, 3164 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS) 3165 goto nonpci; 3166 3167 intr_flag.bustype = BUS_PCI; 3168 if ((newirq = apic_setup_irq_table(dip, pci_irq, NULL, 3169 ispec, &intr_flag, type)) == -1) 3170 goto nonpci; 3171 return (newirq); 3172 } else { 3173 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3); 3174 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) 3175 == NULL) { 3176 if ((pci_irq = apic_handle_pci_pci_bridge(dip, 3177 devid, ipin, &intrp)) == -1) 3178 goto nonpci; 3179 } 3180 if ((newirq = apic_setup_irq_table(dip, pci_irq, intrp, 3181 ispec, NULL, type)) == -1) 3182 goto nonpci; 3183 return (newirq); 3184 } 3185 } else if (strcmp(dev_type, "isa") == 0) 3186 bustype = BUS_ISA; 3187 else if (strcmp(dev_type, "eisa") == 0) 3188 bustype = BUS_EISA; 3189 3190 nonpci: 3191 if (apic_enable_acpi && !apic_use_acpi_madt_only) { 3192 /* search iso entries first */ 3193 if (acpi_iso_cnt != 0) { 3194 hp = (APIC_HEADER *)acpi_isop; 3195 i = 0; 3196 while (i < acpi_iso_cnt) { 3197 if (hp->Type == APIC_XRUPT_OVERRIDE) { 3198 isop = (MADT_INTERRUPT_OVERRIDE *)hp; 3199 if (isop->Bus == 0 && 3200 isop->Source == irqno) { 3201 newirq = isop->Interrupt; 3202 intr_flag.intr_po = 3203 isop->Polarity; 3204 intr_flag.intr_el = 3205 isop->TriggerMode; 3206 intr_flag.bustype = BUS_ISA; 3207 3208 return (apic_setup_irq_table( 3209 dip, newirq, NULL, ispec, 3210 &intr_flag, type)); 3211 3212 } 3213 i++; 3214 } 3215 hp = (APIC_HEADER *)(((char *)hp) + 3216 hp->Length); 3217 } 3218 } 3219 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH; 3220 intr_flag.intr_el = INTR_EL_EDGE; 3221 intr_flag.bustype = BUS_ISA; 3222 return (apic_setup_irq_table(dip, irqno, NULL, ispec, 3223 &intr_flag, type)); 3224 } else { 3225 if (bustype == 0) 3226 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA; 3227 for (i = 0; i < 2; i++) { 3228 if (((busid = apic_find_bus_id(bustype)) != -1) && 3229 ((intrp = apic_find_io_intr_w_busid(irqno, busid)) 3230 != NULL)) { 3231 if ((newirq = apic_setup_irq_table(dip, irqno, 3232 intrp, ispec, NULL, type)) != -1) { 3233 return (newirq); 3234 } 3235 goto defconf; 3236 } 3237 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA; 3238 } 3239 } 3240 3241 /* MPS default configuration */ 3242 defconf: 3243 newirq = apic_setup_irq_table(dip, irqno, NULL, ispec, NULL, type); 3244 if (newirq == -1) 3245 return (newirq); 3246 ASSERT(IRQINDEX(newirq) == irqno); 3247 ASSERT(apic_irq_table[irqno]); 3248 return (newirq); 3249 } 3250 3251 3252 3253 3254 3255 3256 /* 3257 * On machines with PCI-PCI bridges, a device behind a PCI-PCI bridge 3258 * needs special handling. We may need to chase up the device tree, 3259 * using the PCI-PCI Bridge specification's "rotating IPIN assumptions", 3260 * to find the IPIN at the root bus that relates to the IPIN on the 3261 * subsidiary bus (for ACPI or MP). We may, however, have an entry 3262 * in the MP table or the ACPI namespace for this device itself. 3263 * We handle both cases in the search below. 3264 */ 3265 /* this is the non-acpi version */ 3266 static int 3267 apic_handle_pci_pci_bridge(dev_info_t *idip, int child_devno, int child_ipin, 3268 struct apic_io_intr **intrp) 3269 { 3270 dev_info_t *dipp, *dip; 3271 int pci_irq; 3272 ddi_acc_handle_t cfg_handle; 3273 int bridge_devno, bridge_bus; 3274 int ipin; 3275 3276 dip = idip; 3277 3278 /*CONSTCOND*/ 3279 while (1) { 3280 if ((dipp = ddi_get_parent(dip)) == (dev_info_t *)NULL) 3281 return (-1); 3282 if ((pci_config_setup(dipp, &cfg_handle) == DDI_SUCCESS) && 3283 (pci_config_get8(cfg_handle, PCI_CONF_BASCLASS) == 3284 PCI_CLASS_BRIDGE) && (pci_config_get8(cfg_handle, 3285 PCI_CONF_SUBCLASS) == PCI_BRIDGE_PCI)) { 3286 pci_config_teardown(&cfg_handle); 3287 if (acpica_get_bdf(dipp, &bridge_bus, &bridge_devno, 3288 NULL) != 0) 3289 return (-1); 3290 /* 3291 * This is the rotating scheme that Compaq is using 3292 * and documented in the pci to pci spec. Also, if 3293 * the pci to pci bridge is behind another pci to 3294 * pci bridge, then it need to keep transversing 3295 * up until an interrupt entry is found or reach 3296 * the top of the tree 3297 */ 3298 ipin = (child_devno + child_ipin) % PCI_INTD; 3299 if (bridge_bus == 0 && apic_pci_bus_total == 1) 3300 bridge_bus = (int)apic_single_pci_busid; 3301 pci_irq = ((bridge_devno & 0x1f) << 2) | 3302 (ipin & 0x3); 3303 if ((*intrp = apic_find_io_intr_w_busid(pci_irq, 3304 bridge_bus)) != NULL) { 3305 return (pci_irq); 3306 } 3307 dip = dipp; 3308 child_devno = bridge_devno; 3309 child_ipin = ipin; 3310 } else 3311 return (-1); 3312 } 3313 /*LINTED: function will not fall off the bottom */ 3314 } 3315 3316 3317 3318 3319 static uchar_t 3320 acpi_find_ioapic(int irq) 3321 { 3322 int i; 3323 3324 for (i = 0; i < apic_io_max; i++) { 3325 if (irq >= apic_io_vectbase[i] && irq <= apic_io_vectend[i]) 3326 return (i); 3327 } 3328 return (0xFF); /* shouldn't happen */ 3329 } 3330 3331 /* 3332 * See if two irqs are compatible for sharing a vector. 3333 * Currently we only support sharing of PCI devices. 3334 */ 3335 static int 3336 acpi_intr_compatible(iflag_t iflag1, iflag_t iflag2) 3337 { 3338 uint_t level1, po1; 3339 uint_t level2, po2; 3340 3341 /* Assume active high by default */ 3342 po1 = 0; 3343 po2 = 0; 3344 3345 if (iflag1.bustype != iflag2.bustype || iflag1.bustype != BUS_PCI) 3346 return (0); 3347 3348 if (iflag1.intr_el == INTR_EL_CONFORM) 3349 level1 = AV_LEVEL; 3350 else 3351 level1 = (iflag1.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3352 3353 if (level1 && ((iflag1.intr_po == INTR_PO_ACTIVE_LOW) || 3354 (iflag1.intr_po == INTR_PO_CONFORM))) 3355 po1 = AV_ACTIVE_LOW; 3356 3357 if (iflag2.intr_el == INTR_EL_CONFORM) 3358 level2 = AV_LEVEL; 3359 else 3360 level2 = (iflag2.intr_el == INTR_EL_LEVEL) ? AV_LEVEL : 0; 3361 3362 if (level2 && ((iflag2.intr_po == INTR_PO_ACTIVE_LOW) || 3363 (iflag2.intr_po == INTR_PO_CONFORM))) 3364 po2 = AV_ACTIVE_LOW; 3365 3366 if ((level1 == level2) && (po1 == po2)) 3367 return (1); 3368 3369 return (0); 3370 } 3371 3372 /* 3373 * Attempt to share vector with someone else 3374 */ 3375 static int 3376 apic_share_vector(int irqno, iflag_t *intr_flagp, short intr_index, int ipl, 3377 uchar_t ioapicindex, uchar_t ipin, apic_irq_t **irqptrp) 3378 { 3379 #ifdef DEBUG 3380 apic_irq_t *tmpirqp = NULL; 3381 #endif /* DEBUG */ 3382 apic_irq_t *irqptr, dummyirq; 3383 int newirq, chosen_irq = -1, share = 127; 3384 int lowest, highest, i; 3385 uchar_t share_id; 3386 3387 DDI_INTR_IMPLDBG((CE_CONT, "apic_share_vector: irqno=0x%x " 3388 "intr_index=0x%x ipl=0x%x\n", irqno, intr_index, ipl)); 3389 3390 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3391 lowest = apic_ipltopri[ipl-1] + APIC_VECTOR_PER_IPL; 3392 3393 if (highest < lowest) /* Both ipl and ipl-1 map to same pri */ 3394 lowest -= APIC_VECTOR_PER_IPL; 3395 dummyirq.airq_mps_intr_index = intr_index; 3396 dummyirq.airq_ioapicindex = ioapicindex; 3397 dummyirq.airq_intin_no = ipin; 3398 if (intr_flagp) 3399 dummyirq.airq_iflag = *intr_flagp; 3400 apic_record_rdt_entry(&dummyirq, irqno); 3401 for (i = lowest; i <= highest; i++) { 3402 newirq = apic_vector_to_irq[i]; 3403 if (newirq == APIC_RESV_IRQ) 3404 continue; 3405 irqptr = apic_irq_table[newirq]; 3406 3407 if ((dummyirq.airq_rdt_entry & 0xFF00) != 3408 (irqptr->airq_rdt_entry & 0xFF00)) 3409 /* not compatible */ 3410 continue; 3411 3412 if (irqptr->airq_share < share) { 3413 share = irqptr->airq_share; 3414 chosen_irq = newirq; 3415 } 3416 } 3417 if (chosen_irq != -1) { 3418 /* 3419 * Assign a share id which is free or which is larger 3420 * than the largest one. 3421 */ 3422 share_id = 1; 3423 mutex_enter(&airq_mutex); 3424 irqptr = apic_irq_table[chosen_irq]; 3425 while (irqptr) { 3426 if (irqptr->airq_mps_intr_index == FREE_INDEX) { 3427 share_id = irqptr->airq_share_id; 3428 break; 3429 } 3430 if (share_id <= irqptr->airq_share_id) 3431 share_id = irqptr->airq_share_id + 1; 3432 #ifdef DEBUG 3433 tmpirqp = irqptr; 3434 #endif /* DEBUG */ 3435 irqptr = irqptr->airq_next; 3436 } 3437 if (!irqptr) { 3438 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3439 irqptr->airq_temp_cpu = IRQ_UNINIT; 3440 irqptr->airq_next = 3441 apic_irq_table[chosen_irq]->airq_next; 3442 apic_irq_table[chosen_irq]->airq_next = irqptr; 3443 #ifdef DEBUG 3444 tmpirqp = apic_irq_table[chosen_irq]; 3445 #endif /* DEBUG */ 3446 } 3447 irqptr->airq_mps_intr_index = intr_index; 3448 irqptr->airq_ioapicindex = ioapicindex; 3449 irqptr->airq_intin_no = ipin; 3450 if (intr_flagp) 3451 irqptr->airq_iflag = *intr_flagp; 3452 irqptr->airq_vector = apic_irq_table[chosen_irq]->airq_vector; 3453 irqptr->airq_share_id = share_id; 3454 apic_record_rdt_entry(irqptr, irqno); 3455 *irqptrp = irqptr; 3456 #ifdef DEBUG 3457 /* shuffle the pointers to test apic_delspl path */ 3458 if (tmpirqp) { 3459 tmpirqp->airq_next = irqptr->airq_next; 3460 irqptr->airq_next = apic_irq_table[chosen_irq]; 3461 apic_irq_table[chosen_irq] = irqptr; 3462 } 3463 #endif /* DEBUG */ 3464 mutex_exit(&airq_mutex); 3465 return (VIRTIRQ(chosen_irq, share_id)); 3466 } 3467 return (-1); 3468 } 3469 3470 /* 3471 * 3472 */ 3473 static int 3474 apic_setup_irq_table(dev_info_t *dip, int irqno, struct apic_io_intr *intrp, 3475 struct intrspec *ispec, iflag_t *intr_flagp, int type) 3476 { 3477 int origirq = ispec->intrspec_vec; 3478 uchar_t ipl = ispec->intrspec_pri; 3479 int newirq, intr_index; 3480 uchar_t ipin, ioapic, ioapicindex, vector; 3481 apic_irq_t *irqptr; 3482 major_t major; 3483 dev_info_t *sdip; 3484 3485 DDI_INTR_IMPLDBG((CE_CONT, "apic_setup_irq_table: dip=0x%p type=%d " 3486 "irqno=0x%x origirq=0x%x\n", (void *)dip, type, irqno, origirq)); 3487 3488 ASSERT(ispec != NULL); 3489 3490 major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0; 3491 3492 if (DDI_INTR_IS_MSI_OR_MSIX(type)) { 3493 /* MSI/X doesn't need to setup ioapic stuffs */ 3494 ioapicindex = 0xff; 3495 ioapic = 0xff; 3496 ipin = (uchar_t)0xff; 3497 intr_index = (type == DDI_INTR_TYPE_MSI) ? MSI_INDEX : 3498 MSIX_INDEX; 3499 mutex_enter(&airq_mutex); 3500 if ((irqno = apic_allocate_irq(apic_first_avail_irq)) == -1) { 3501 mutex_exit(&airq_mutex); 3502 /* need an irq for MSI/X to index into autovect[] */ 3503 cmn_err(CE_WARN, "No interrupt irq: %s instance %d", 3504 ddi_get_name(dip), ddi_get_instance(dip)); 3505 return (-1); 3506 } 3507 mutex_exit(&airq_mutex); 3508 3509 } else if (intrp != NULL) { 3510 intr_index = (int)(intrp - apic_io_intrp); 3511 ioapic = intrp->intr_destid; 3512 ipin = intrp->intr_destintin; 3513 /* Find ioapicindex. If destid was ALL, we will exit with 0. */ 3514 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--) 3515 if (apic_io_id[ioapicindex] == ioapic) 3516 break; 3517 ASSERT((ioapic == apic_io_id[ioapicindex]) || 3518 (ioapic == INTR_ALL_APIC)); 3519 3520 /* check whether this intin# has been used by another irqno */ 3521 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1) { 3522 return (newirq); 3523 } 3524 3525 } else if (intr_flagp != NULL) { 3526 /* ACPI case */ 3527 intr_index = ACPI_INDEX; 3528 ioapicindex = acpi_find_ioapic(irqno); 3529 ASSERT(ioapicindex != 0xFF); 3530 ioapic = apic_io_id[ioapicindex]; 3531 ipin = irqno - apic_io_vectbase[ioapicindex]; 3532 if (apic_irq_table[irqno] && 3533 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) { 3534 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin && 3535 apic_irq_table[irqno]->airq_ioapicindex == 3536 ioapicindex); 3537 return (irqno); 3538 } 3539 3540 } else { 3541 /* default configuration */ 3542 ioapicindex = 0; 3543 ioapic = apic_io_id[ioapicindex]; 3544 ipin = (uchar_t)irqno; 3545 intr_index = DEFAULT_INDEX; 3546 } 3547 3548 if (ispec == NULL) { 3549 APIC_VERBOSE_IOAPIC((CE_WARN, "No intrspec for irqno = %x\n", 3550 irqno)); 3551 } else if ((vector = apic_allocate_vector(ipl, irqno, 0)) == 0) { 3552 if ((newirq = apic_share_vector(irqno, intr_flagp, intr_index, 3553 ipl, ioapicindex, ipin, &irqptr)) != -1) { 3554 irqptr->airq_ipl = ipl; 3555 irqptr->airq_origirq = (uchar_t)origirq; 3556 irqptr->airq_dip = dip; 3557 irqptr->airq_major = major; 3558 sdip = apic_irq_table[IRQINDEX(newirq)]->airq_dip; 3559 /* This is OK to do really */ 3560 if (sdip == NULL) { 3561 cmn_err(CE_WARN, "Sharing vectors: %s" 3562 " instance %d and SCI", 3563 ddi_get_name(dip), ddi_get_instance(dip)); 3564 } else { 3565 cmn_err(CE_WARN, "Sharing vectors: %s" 3566 " instance %d and %s instance %d", 3567 ddi_get_name(sdip), ddi_get_instance(sdip), 3568 ddi_get_name(dip), ddi_get_instance(dip)); 3569 } 3570 return (newirq); 3571 } 3572 /* try high priority allocation now that share has failed */ 3573 if ((vector = apic_allocate_vector(ipl, irqno, 1)) == 0) { 3574 cmn_err(CE_WARN, "No interrupt vector: %s instance %d", 3575 ddi_get_name(dip), ddi_get_instance(dip)); 3576 return (-1); 3577 } 3578 } 3579 3580 mutex_enter(&airq_mutex); 3581 if (apic_irq_table[irqno] == NULL) { 3582 irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP); 3583 irqptr->airq_temp_cpu = IRQ_UNINIT; 3584 apic_irq_table[irqno] = irqptr; 3585 } else { 3586 irqptr = apic_irq_table[irqno]; 3587 if (irqptr->airq_mps_intr_index != FREE_INDEX) { 3588 /* 3589 * The slot is used by another irqno, so allocate 3590 * a free irqno for this interrupt 3591 */ 3592 newirq = apic_allocate_irq(apic_first_avail_irq); 3593 if (newirq == -1) { 3594 mutex_exit(&airq_mutex); 3595 return (-1); 3596 } 3597 irqno = newirq; 3598 irqptr = apic_irq_table[irqno]; 3599 if (irqptr == NULL) { 3600 irqptr = kmem_zalloc(sizeof (apic_irq_t), 3601 KM_SLEEP); 3602 irqptr->airq_temp_cpu = IRQ_UNINIT; 3603 apic_irq_table[irqno] = irqptr; 3604 } 3605 apic_modify_vector(vector, newirq); 3606 } 3607 } 3608 apic_max_device_irq = max(irqno, apic_max_device_irq); 3609 apic_min_device_irq = min(irqno, apic_min_device_irq); 3610 mutex_exit(&airq_mutex); 3611 irqptr->airq_ioapicindex = ioapicindex; 3612 irqptr->airq_intin_no = ipin; 3613 irqptr->airq_ipl = ipl; 3614 irqptr->airq_vector = vector; 3615 irqptr->airq_origirq = (uchar_t)origirq; 3616 irqptr->airq_share_id = 0; 3617 irqptr->airq_mps_intr_index = (short)intr_index; 3618 irqptr->airq_dip = dip; 3619 irqptr->airq_major = major; 3620 irqptr->airq_cpu = apic_bind_intr(dip, irqno, ioapic, ipin); 3621 if (intr_flagp) 3622 irqptr->airq_iflag = *intr_flagp; 3623 3624 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { 3625 /* setup I/O APIC entry for non-MSI/X interrupts */ 3626 apic_record_rdt_entry(irqptr, irqno); 3627 } 3628 return (irqno); 3629 } 3630 3631 /* 3632 * return the cpu to which this intr should be bound. 3633 * Check properties or any other mechanism to see if user wants it 3634 * bound to a specific CPU. If so, return the cpu id with high bit set. 3635 * If not, use the policy to choose a cpu and return the id. 3636 */ 3637 uchar_t 3638 apic_bind_intr(dev_info_t *dip, int irq, uchar_t ioapicid, uchar_t intin) 3639 { 3640 int instance, instno, prop_len, bind_cpu, count; 3641 uint_t i, rc; 3642 uchar_t cpu; 3643 major_t major; 3644 char *name, *drv_name, *prop_val, *cptr; 3645 char prop_name[32]; 3646 3647 3648 if (apic_intr_policy == INTR_LOWEST_PRIORITY) 3649 return (IRQ_UNBOUND); 3650 3651 drv_name = NULL; 3652 rc = DDI_PROP_NOT_FOUND; 3653 major = (major_t)-1; 3654 if (dip != NULL) { 3655 name = ddi_get_name(dip); 3656 major = ddi_name_to_major(name); 3657 drv_name = ddi_major_to_name(major); 3658 instance = ddi_get_instance(dip); 3659 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) { 3660 i = apic_min_device_irq; 3661 for (; i <= apic_max_device_irq; i++) { 3662 3663 if ((i == irq) || (apic_irq_table[i] == NULL) || 3664 (apic_irq_table[i]->airq_mps_intr_index 3665 == FREE_INDEX)) 3666 continue; 3667 3668 if ((apic_irq_table[i]->airq_major == major) && 3669 (!(apic_irq_table[i]->airq_cpu & 3670 IRQ_USER_BOUND))) { 3671 3672 cpu = apic_irq_table[i]->airq_cpu; 3673 3674 cmn_err(CE_CONT, 3675 "!pcplusmp: %s (%s) instance #%d " 3676 "vector 0x%x ioapic 0x%x " 3677 "intin 0x%x is bound to cpu %d\n", 3678 name, drv_name, instance, irq, 3679 ioapicid, intin, cpu); 3680 return (cpu); 3681 } 3682 } 3683 } 3684 /* 3685 * search for "drvname"_intpt_bind_cpus property first, the 3686 * syntax of the property should be "a[,b,c,...]" where 3687 * instance 0 binds to cpu a, instance 1 binds to cpu b, 3688 * instance 3 binds to cpu c... 3689 * ddi_getlongprop() will search /option first, then / 3690 * if "drvname"_intpt_bind_cpus doesn't exist, then find 3691 * intpt_bind_cpus property. The syntax is the same, and 3692 * it applies to all the devices if its "drvname" specific 3693 * property doesn't exist 3694 */ 3695 (void) strcpy(prop_name, drv_name); 3696 (void) strcat(prop_name, "_intpt_bind_cpus"); 3697 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name, 3698 (caddr_t)&prop_val, &prop_len); 3699 if (rc != DDI_PROP_SUCCESS) { 3700 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, 3701 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len); 3702 } 3703 } 3704 if (rc == DDI_PROP_SUCCESS) { 3705 for (i = count = 0; i < (prop_len - 1); i++) 3706 if (prop_val[i] == ',') 3707 count++; 3708 if (prop_val[i-1] != ',') 3709 count++; 3710 /* 3711 * if somehow the binding instances defined in the 3712 * property are not enough for this instno., then 3713 * reuse the pattern for the next instance until 3714 * it reaches the requested instno 3715 */ 3716 instno = instance % count; 3717 i = 0; 3718 cptr = prop_val; 3719 while (i < instno) 3720 if (*cptr++ == ',') 3721 i++; 3722 bind_cpu = stoi(&cptr); 3723 kmem_free(prop_val, prop_len); 3724 /* if specific cpu is bogus, then default to cpu 0 */ 3725 if (bind_cpu >= apic_nproc) { 3726 cmn_err(CE_WARN, "pcplusmp: %s=%s: CPU %d not present", 3727 prop_name, prop_val, bind_cpu); 3728 bind_cpu = 0; 3729 } else { 3730 /* indicate that we are bound at user request */ 3731 bind_cpu |= IRQ_USER_BOUND; 3732 } 3733 /* 3734 * no need to check apic_cpus[].aci_status, if specific cpu is 3735 * not up, then post_cpu_start will handle it. 3736 */ 3737 } else { 3738 bind_cpu = apic_next_bind_cpu++; 3739 if (bind_cpu >= apic_nproc) { 3740 apic_next_bind_cpu = 1; 3741 bind_cpu = 0; 3742 } 3743 } 3744 if (drv_name != NULL) 3745 cmn_err(CE_CONT, "!pcplusmp: %s (%s) instance %d " 3746 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3747 name, drv_name, instance, 3748 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3749 else 3750 cmn_err(CE_CONT, "!pcplusmp: " 3751 "vector 0x%x ioapic 0x%x intin 0x%x is bound to cpu %d\n", 3752 irq, ioapicid, intin, bind_cpu & ~IRQ_USER_BOUND); 3753 3754 return ((uchar_t)bind_cpu); 3755 } 3756 3757 static struct apic_io_intr * 3758 apic_find_io_intr_w_busid(int irqno, int busid) 3759 { 3760 struct apic_io_intr *intrp; 3761 3762 /* 3763 * It can have more than 1 entry with same source bus IRQ, 3764 * but unique with the source bus id 3765 */ 3766 intrp = apic_io_intrp; 3767 if (intrp != NULL) { 3768 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3769 if (intrp->intr_irq == irqno && 3770 intrp->intr_busid == busid && 3771 intrp->intr_type == IO_INTR_INT) 3772 return (intrp); 3773 intrp++; 3774 } 3775 } 3776 APIC_VERBOSE_IOAPIC((CE_NOTE, "Did not find io intr for irqno:" 3777 "busid %x:%x\n", irqno, busid)); 3778 return ((struct apic_io_intr *)NULL); 3779 } 3780 3781 3782 struct mps_bus_info { 3783 char *bus_name; 3784 int bus_id; 3785 } bus_info_array[] = { 3786 "ISA ", BUS_ISA, 3787 "PCI ", BUS_PCI, 3788 "EISA ", BUS_EISA, 3789 "XPRESS", BUS_XPRESS, 3790 "PCMCIA", BUS_PCMCIA, 3791 "VL ", BUS_VL, 3792 "CBUS ", BUS_CBUS, 3793 "CBUSII", BUS_CBUSII, 3794 "FUTURE", BUS_FUTURE, 3795 "INTERN", BUS_INTERN, 3796 "MBI ", BUS_MBI, 3797 "MBII ", BUS_MBII, 3798 "MPI ", BUS_MPI, 3799 "MPSA ", BUS_MPSA, 3800 "NUBUS ", BUS_NUBUS, 3801 "TC ", BUS_TC, 3802 "VME ", BUS_VME 3803 }; 3804 3805 static int 3806 apic_find_bus_type(char *bus) 3807 { 3808 int i = 0; 3809 3810 for (; i < sizeof (bus_info_array)/sizeof (struct mps_bus_info); i++) 3811 if (strncmp(bus, bus_info_array[i].bus_name, 3812 strlen(bus_info_array[i].bus_name)) == 0) 3813 return (bus_info_array[i].bus_id); 3814 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus type for bus %s", bus)); 3815 return (0); 3816 } 3817 3818 static int 3819 apic_find_bus(int busid) 3820 { 3821 struct apic_bus *busp; 3822 3823 busp = apic_busp; 3824 while (busp->bus_entry == APIC_BUS_ENTRY) { 3825 if (busp->bus_id == busid) 3826 return (apic_find_bus_type((char *)&busp->bus_str1)); 3827 busp++; 3828 } 3829 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus for bus id %x", busid)); 3830 return (0); 3831 } 3832 3833 static int 3834 apic_find_bus_id(int bustype) 3835 { 3836 struct apic_bus *busp; 3837 3838 busp = apic_busp; 3839 while (busp->bus_entry == APIC_BUS_ENTRY) { 3840 if (apic_find_bus_type((char *)&busp->bus_str1) == bustype) 3841 return (busp->bus_id); 3842 busp++; 3843 } 3844 APIC_VERBOSE_IOAPIC((CE_WARN, "Did not find bus id for bustype %x", 3845 bustype)); 3846 return (-1); 3847 } 3848 3849 /* 3850 * Check if a particular irq need to be reserved for any io_intr 3851 */ 3852 static struct apic_io_intr * 3853 apic_find_io_intr(int irqno) 3854 { 3855 struct apic_io_intr *intrp; 3856 3857 intrp = apic_io_intrp; 3858 if (intrp != NULL) { 3859 while (intrp->intr_entry == APIC_IO_INTR_ENTRY) { 3860 if (intrp->intr_irq == irqno && 3861 intrp->intr_type == IO_INTR_INT) 3862 return (intrp); 3863 intrp++; 3864 } 3865 } 3866 return ((struct apic_io_intr *)NULL); 3867 } 3868 3869 /* 3870 * Check if the given ioapicindex intin combination has already been assigned 3871 * an irq. If so return irqno. Else -1 3872 */ 3873 static int 3874 apic_find_intin(uchar_t ioapic, uchar_t intin) 3875 { 3876 apic_irq_t *irqptr; 3877 int i; 3878 3879 /* find ioapic and intin in the apic_irq_table[] and return the index */ 3880 for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) { 3881 irqptr = apic_irq_table[i]; 3882 while (irqptr) { 3883 if ((irqptr->airq_mps_intr_index >= 0) && 3884 (irqptr->airq_intin_no == intin) && 3885 (irqptr->airq_ioapicindex == ioapic)) { 3886 APIC_VERBOSE_IOAPIC((CE_NOTE, "!Found irq " 3887 "entry for ioapic:intin %x:%x " 3888 "shared interrupts ?", ioapic, intin)); 3889 return (i); 3890 } 3891 irqptr = irqptr->airq_next; 3892 } 3893 } 3894 return (-1); 3895 } 3896 3897 int 3898 apic_allocate_irq(int irq) 3899 { 3900 int freeirq, i; 3901 3902 if ((freeirq = apic_find_free_irq(irq, (APIC_RESV_IRQ - 1))) == -1) 3903 if ((freeirq = apic_find_free_irq(APIC_FIRST_FREE_IRQ, 3904 (irq - 1))) == -1) { 3905 /* 3906 * if BIOS really defines every single irq in the mps 3907 * table, then don't worry about conflicting with 3908 * them, just use any free slot in apic_irq_table 3909 */ 3910 for (i = APIC_FIRST_FREE_IRQ; i < APIC_RESV_IRQ; i++) { 3911 if ((apic_irq_table[i] == NULL) || 3912 apic_irq_table[i]->airq_mps_intr_index == 3913 FREE_INDEX) { 3914 freeirq = i; 3915 break; 3916 } 3917 } 3918 if (freeirq == -1) { 3919 /* This shouldn't happen, but just in case */ 3920 cmn_err(CE_WARN, "pcplusmp: NO available IRQ"); 3921 return (-1); 3922 } 3923 } 3924 if (apic_irq_table[freeirq] == NULL) { 3925 apic_irq_table[freeirq] = 3926 kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP); 3927 if (apic_irq_table[freeirq] == NULL) { 3928 cmn_err(CE_WARN, "pcplusmp: NO memory to allocate IRQ"); 3929 return (-1); 3930 } 3931 apic_irq_table[freeirq]->airq_mps_intr_index = FREE_INDEX; 3932 } 3933 return (freeirq); 3934 } 3935 3936 static int 3937 apic_find_free_irq(int start, int end) 3938 { 3939 int i; 3940 3941 for (i = start; i <= end; i++) 3942 /* Check if any I/O entry needs this IRQ */ 3943 if (apic_find_io_intr(i) == NULL) { 3944 /* Then see if it is free */ 3945 if ((apic_irq_table[i] == NULL) || 3946 (apic_irq_table[i]->airq_mps_intr_index == 3947 FREE_INDEX)) { 3948 return (i); 3949 } 3950 } 3951 return (-1); 3952 } 3953 3954 /* 3955 * Allocate a free vector for irq at ipl. Takes care of merging of multiple 3956 * IPLs into a single APIC level as well as stretching some IPLs onto multiple 3957 * levels. APIC_HI_PRI_VECTS interrupts are reserved for high priority 3958 * requests and allocated only when pri is set. 3959 */ 3960 static uchar_t 3961 apic_allocate_vector(int ipl, int irq, int pri) 3962 { 3963 int lowest, highest, i; 3964 3965 highest = apic_ipltopri[ipl] + APIC_VECTOR_MASK; 3966 lowest = apic_ipltopri[ipl - 1] + APIC_VECTOR_PER_IPL; 3967 3968 if (highest < lowest) /* Both ipl and ipl - 1 map to same pri */ 3969 lowest -= APIC_VECTOR_PER_IPL; 3970 3971 #ifdef DEBUG 3972 if (apic_restrict_vector) /* for testing shared interrupt logic */ 3973 highest = lowest + apic_restrict_vector + APIC_HI_PRI_VECTS; 3974 #endif /* DEBUG */ 3975 if (pri == 0) 3976 highest -= APIC_HI_PRI_VECTS; 3977 3978 for (i = lowest; i < highest; i++) { 3979 if ((i == T_FASTTRAP) || (i == APIC_SPUR_INTR) || 3980 (i == T_SYSCALLINT) || (i == T_DTRACE_PROBE) || 3981 (i == T_DTRACE_RET)) 3982 continue; 3983 if (apic_vector_to_irq[i] == APIC_RESV_IRQ) { 3984 apic_vector_to_irq[i] = (uchar_t)irq; 3985 return (i); 3986 } 3987 } 3988 3989 return (0); 3990 } 3991 3992 static void 3993 apic_modify_vector(uchar_t vector, int irq) 3994 { 3995 apic_vector_to_irq[vector] = (uchar_t)irq; 3996 } 3997 3998 /* 3999 * Mark vector as being in the process of being deleted. Interrupts 4000 * may still come in on some CPU. The moment an interrupt comes with 4001 * the new vector, we know we can free the old one. Called only from 4002 * addspl and delspl with interrupts disabled. Because an interrupt 4003 * can be shared, but no interrupt from either device may come in, 4004 * we also use a timeout mechanism, which we arbitrarily set to 4005 * apic_revector_timeout microseconds. 4006 */ 4007 static void 4008 apic_mark_vector(uchar_t oldvector, uchar_t newvector) 4009 { 4010 int iflag = intr_clear(); 4011 lock_set(&apic_revector_lock); 4012 if (!apic_oldvec_to_newvec) { 4013 apic_oldvec_to_newvec = 4014 kmem_zalloc(sizeof (newvector) * APIC_MAX_VECTOR * 2, 4015 KM_NOSLEEP); 4016 4017 if (!apic_oldvec_to_newvec) { 4018 /* 4019 * This failure is not catastrophic. 4020 * But, the oldvec will never be freed. 4021 */ 4022 apic_error |= APIC_ERR_MARK_VECTOR_FAIL; 4023 lock_clear(&apic_revector_lock); 4024 intr_restore(iflag); 4025 return; 4026 } 4027 apic_newvec_to_oldvec = &apic_oldvec_to_newvec[APIC_MAX_VECTOR]; 4028 } 4029 4030 /* See if we already did this for drivers which do double addintrs */ 4031 if (apic_oldvec_to_newvec[oldvector] != newvector) { 4032 apic_oldvec_to_newvec[oldvector] = newvector; 4033 apic_newvec_to_oldvec[newvector] = oldvector; 4034 apic_revector_pending++; 4035 } 4036 lock_clear(&apic_revector_lock); 4037 intr_restore(iflag); 4038 (void) timeout(apic_xlate_vector_free_timeout_handler, 4039 (void *)(uintptr_t)oldvector, drv_usectohz(apic_revector_timeout)); 4040 } 4041 4042 /* 4043 * xlate_vector is called from intr_enter if revector_pending is set. 4044 * It will xlate it if needed and mark the old vector as free. 4045 */ 4046 static uchar_t 4047 apic_xlate_vector(uchar_t vector) 4048 { 4049 uchar_t newvector, oldvector = 0; 4050 4051 lock_set(&apic_revector_lock); 4052 /* Do we really need to do this ? */ 4053 if (!apic_revector_pending) { 4054 lock_clear(&apic_revector_lock); 4055 return (vector); 4056 } 4057 if ((newvector = apic_oldvec_to_newvec[vector]) != 0) 4058 oldvector = vector; 4059 else { 4060 /* 4061 * The incoming vector is new . See if a stale entry is 4062 * remaining 4063 */ 4064 if ((oldvector = apic_newvec_to_oldvec[vector]) != 0) 4065 newvector = vector; 4066 } 4067 4068 if (oldvector) { 4069 apic_revector_pending--; 4070 apic_oldvec_to_newvec[oldvector] = 0; 4071 apic_newvec_to_oldvec[newvector] = 0; 4072 apic_free_vector(oldvector); 4073 lock_clear(&apic_revector_lock); 4074 /* There could have been more than one reprogramming! */ 4075 return (apic_xlate_vector(newvector)); 4076 } 4077 lock_clear(&apic_revector_lock); 4078 return (vector); 4079 } 4080 4081 void 4082 apic_xlate_vector_free_timeout_handler(void *arg) 4083 { 4084 int iflag; 4085 uchar_t oldvector, newvector; 4086 4087 oldvector = (uchar_t)(uintptr_t)arg; 4088 iflag = intr_clear(); 4089 lock_set(&apic_revector_lock); 4090 if ((newvector = apic_oldvec_to_newvec[oldvector]) != 0) { 4091 apic_free_vector(oldvector); 4092 apic_oldvec_to_newvec[oldvector] = 0; 4093 apic_newvec_to_oldvec[newvector] = 0; 4094 apic_revector_pending--; 4095 } 4096 4097 lock_clear(&apic_revector_lock); 4098 intr_restore(iflag); 4099 } 4100 4101 4102 /* Mark vector as not being used by any irq */ 4103 static void 4104 apic_free_vector(uchar_t vector) 4105 { 4106 apic_vector_to_irq[vector] = APIC_RESV_IRQ; 4107 } 4108 4109 /* 4110 * compute the polarity, trigger mode and vector for programming into 4111 * the I/O apic and record in airq_rdt_entry. 4112 */ 4113 static void 4114 apic_record_rdt_entry(apic_irq_t *irqptr, int irq) 4115 { 4116 int ioapicindex, bus_type, vector; 4117 short intr_index; 4118 uint_t level, po, io_po; 4119 struct apic_io_intr *iointrp; 4120 4121 intr_index = irqptr->airq_mps_intr_index; 4122 DDI_INTR_IMPLDBG((CE_CONT, "apic_record_rdt_entry: intr_index=%d " 4123 "irq = 0x%x dip = 0x%p vector = 0x%x\n", intr_index, irq, 4124 (void *)irqptr->airq_dip, irqptr->airq_vector)); 4125 4126 if (intr_index == RESERVE_INDEX) { 4127 apic_error |= APIC_ERR_INVALID_INDEX; 4128 return; 4129 } else if (APIC_IS_MSI_OR_MSIX_INDEX(intr_index)) { 4130 return; 4131 } 4132 4133 vector = irqptr->airq_vector; 4134 ioapicindex = irqptr->airq_ioapicindex; 4135 /* Assume edge triggered by default */ 4136 level = 0; 4137 /* Assume active high by default */ 4138 po = 0; 4139 4140 if (intr_index == DEFAULT_INDEX || intr_index == FREE_INDEX) { 4141 ASSERT(irq < 16); 4142 if (eisa_level_intr_mask & (1 << irq)) 4143 level = AV_LEVEL; 4144 if (intr_index == FREE_INDEX && apic_defconf == 0) 4145 apic_error |= APIC_ERR_INVALID_INDEX; 4146 } else if (intr_index == ACPI_INDEX) { 4147 bus_type = irqptr->airq_iflag.bustype; 4148 if (irqptr->airq_iflag.intr_el == INTR_EL_CONFORM) { 4149 if (bus_type == BUS_PCI) 4150 level = AV_LEVEL; 4151 } else 4152 level = (irqptr->airq_iflag.intr_el == INTR_EL_LEVEL) ? 4153 AV_LEVEL : 0; 4154 if (level && 4155 ((irqptr->airq_iflag.intr_po == INTR_PO_ACTIVE_LOW) || 4156 (irqptr->airq_iflag.intr_po == INTR_PO_CONFORM && 4157 bus_type == BUS_PCI))) 4158 po = AV_ACTIVE_LOW; 4159 } else { 4160 iointrp = apic_io_intrp + intr_index; 4161 bus_type = apic_find_bus(iointrp->intr_busid); 4162 if (iointrp->intr_el == INTR_EL_CONFORM) { 4163 if ((irq < 16) && (eisa_level_intr_mask & (1 << irq))) 4164 level = AV_LEVEL; 4165 else if (bus_type == BUS_PCI) 4166 level = AV_LEVEL; 4167 } else 4168 level = (iointrp->intr_el == INTR_EL_LEVEL) ? 4169 AV_LEVEL : 0; 4170 if (level && ((iointrp->intr_po == INTR_PO_ACTIVE_LOW) || 4171 (iointrp->intr_po == INTR_PO_CONFORM && 4172 bus_type == BUS_PCI))) 4173 po = AV_ACTIVE_LOW; 4174 } 4175 if (level) 4176 apic_level_intr[irq] = 1; 4177 /* 4178 * The 82489DX External APIC cannot do active low polarity interrupts. 4179 */ 4180 if (po && (apic_io_ver[ioapicindex] != IOAPIC_VER_82489DX)) 4181 io_po = po; 4182 else 4183 io_po = 0; 4184 4185 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) 4186 printf("setio: ioapic=%x intin=%x level=%x po=%x vector=%x\n", 4187 ioapicindex, irqptr->airq_intin_no, level, io_po, vector); 4188 4189 irqptr->airq_rdt_entry = level|io_po|vector; 4190 } 4191 4192 /* 4193 * Call rebind to do the actual programming. 4194 */ 4195 static int 4196 apic_setup_io_intr(apic_irq_t *irqptr, int irq) 4197 { 4198 int rv; 4199 4200 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4201 IMMEDIATE)) 4202 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4203 rv = apic_rebind(irqptr, 0, 1, IMMEDIATE); 4204 4205 return (rv); 4206 } 4207 4208 /* 4209 * Deferred reprogramming: Call apic_rebind to do the real work. 4210 */ 4211 static int 4212 apic_setup_io_intr_deferred(apic_irq_t *irqptr, int irq) 4213 { 4214 int rv; 4215 4216 if (rv = apic_rebind(irqptr, apic_irq_table[irq]->airq_cpu, 1, 4217 DEFERRED)) 4218 /* CPU is not up or interrupt is disabled. Fall back to 0 */ 4219 rv = apic_rebind(irqptr, 0, 1, DEFERRED); 4220 4221 return (rv); 4222 } 4223 4224 /* 4225 * Bind interrupt corresponding to irq_ptr to bind_cpu. acquire_lock 4226 * if false (0) means lock is already held (e.g: in rebind_all). 4227 */ 4228 static int 4229 apic_rebind(apic_irq_t *irq_ptr, int bind_cpu, int acquire_lock, int when) 4230 { 4231 int intin_no; 4232 volatile int32_t *ioapic; 4233 uchar_t airq_temp_cpu; 4234 apic_cpus_info_t *cpu_infop; 4235 int iflag; 4236 int which_irq = apic_vector_to_irq[irq_ptr->airq_vector]; 4237 4238 intin_no = irq_ptr->airq_intin_no; 4239 ioapic = apicioadr[irq_ptr->airq_ioapicindex]; 4240 airq_temp_cpu = irq_ptr->airq_temp_cpu; 4241 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) { 4242 if (airq_temp_cpu & IRQ_USER_BOUND) 4243 /* Mask off high bit so it can be used as array index */ 4244 airq_temp_cpu &= ~IRQ_USER_BOUND; 4245 4246 ASSERT(airq_temp_cpu < apic_nproc); 4247 } 4248 4249 iflag = intr_clear(); 4250 4251 if (acquire_lock) 4252 lock_set(&apic_ioapic_lock); 4253 4254 /* 4255 * Can't bind to a CPU that's not online: 4256 */ 4257 cpu_infop = &apic_cpus[bind_cpu & ~IRQ_USER_BOUND]; 4258 if (!(cpu_infop->aci_status & APIC_CPU_INTR_ENABLE)) { 4259 4260 if (acquire_lock) 4261 lock_clear(&apic_ioapic_lock); 4262 4263 intr_restore(iflag); 4264 return (1); 4265 } 4266 4267 /* 4268 * If this is a deferred reprogramming attempt, ensure we have 4269 * not been passed stale data: 4270 */ 4271 if ((when == DEFERRED) && 4272 (apic_reprogram_info[which_irq].valid == 0)) { 4273 /* stale info, so just return */ 4274 if (acquire_lock) 4275 lock_clear(&apic_ioapic_lock); 4276 4277 intr_restore(iflag); 4278 return (0); 4279 } 4280 4281 /* 4282 * If this interrupt has been delivered to a CPU and that CPU 4283 * has not handled it yet, we cannot reprogram the IOAPIC now: 4284 */ 4285 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index) && 4286 apic_check_stuck_interrupt(irq_ptr, airq_temp_cpu, bind_cpu, 4287 ioapic, intin_no, which_irq) != 0) { 4288 4289 if (acquire_lock) 4290 lock_clear(&apic_ioapic_lock); 4291 4292 intr_restore(iflag); 4293 return (0); 4294 } 4295 4296 /* 4297 * NOTE: We do not unmask the RDT here, as an interrupt MAY still 4298 * come in before we have a chance to reprogram it below. The 4299 * reprogramming below will simultaneously change and unmask the 4300 * RDT entry. 4301 */ 4302 4303 if ((uchar_t)bind_cpu == IRQ_UNBOUND) { 4304 /* Write the RDT entry -- no specific CPU binding */ 4305 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, AV_TOALL); 4306 4307 if (airq_temp_cpu != IRQ_UNINIT && airq_temp_cpu != IRQ_UNBOUND) 4308 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4309 4310 /* Write the vector, trigger, and polarity portion of the RDT */ 4311 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4312 AV_LDEST | AV_LOPRI | irq_ptr->airq_rdt_entry); 4313 if (acquire_lock) 4314 lock_clear(&apic_ioapic_lock); 4315 irq_ptr->airq_temp_cpu = IRQ_UNBOUND; 4316 intr_restore(iflag); 4317 return (0); 4318 } 4319 4320 if (bind_cpu & IRQ_USER_BOUND) { 4321 cpu_infop->aci_bound++; 4322 } else { 4323 cpu_infop->aci_temp_bound++; 4324 } 4325 ASSERT((bind_cpu & ~IRQ_USER_BOUND) < apic_nproc); 4326 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4327 /* Write the RDT entry -- bind to a specific CPU: */ 4328 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapic, intin_no, 4329 cpu_infop->aci_local_id << APIC_ID_BIT_OFFSET); 4330 } 4331 if ((airq_temp_cpu != IRQ_UNBOUND) && (airq_temp_cpu != IRQ_UNINIT)) { 4332 apic_cpus[airq_temp_cpu].aci_temp_bound--; 4333 } 4334 if (!APIC_IS_MSI_OR_MSIX_INDEX(irq_ptr->airq_mps_intr_index)) { 4335 /* Write the vector, trigger, and polarity portion of the RDT */ 4336 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4337 AV_PDEST | AV_FIXED | irq_ptr->airq_rdt_entry); 4338 } else { 4339 int type = (irq_ptr->airq_mps_intr_index == MSI_INDEX) ? 4340 DDI_INTR_TYPE_MSI : DDI_INTR_TYPE_MSIX; 4341 (void) apic_pci_msi_disable_mode(irq_ptr->airq_dip, type, 4342 irq_ptr->airq_ioapicindex); 4343 if (irq_ptr->airq_ioapicindex == irq_ptr->airq_origirq) { 4344 /* first one */ 4345 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4346 "apic_pci_msi_enable_vector\n")); 4347 if (apic_pci_msi_enable_vector(irq_ptr->airq_dip, type, 4348 which_irq, irq_ptr->airq_vector, 4349 irq_ptr->airq_intin_no, 4350 cpu_infop->aci_local_id) != PSM_SUCCESS) { 4351 cmn_err(CE_WARN, "pcplusmp: " 4352 "apic_pci_msi_enable_vector " 4353 "returned PSM_FAILURE"); 4354 } 4355 } 4356 if ((irq_ptr->airq_ioapicindex + irq_ptr->airq_intin_no - 1) == 4357 irq_ptr->airq_origirq) { /* last one */ 4358 DDI_INTR_IMPLDBG((CE_CONT, "apic_rebind: call " 4359 "pci_msi_enable_mode\n")); 4360 if (apic_pci_msi_enable_mode(irq_ptr->airq_dip, 4361 type, which_irq) != PSM_SUCCESS) { 4362 DDI_INTR_IMPLDBG((CE_CONT, "pcplusmp: " 4363 "pci_msi_enable failed\n")); 4364 (void) apic_pci_msi_unconfigure( 4365 irq_ptr->airq_dip, type, which_irq); 4366 } 4367 } 4368 } 4369 if (acquire_lock) 4370 lock_clear(&apic_ioapic_lock); 4371 irq_ptr->airq_temp_cpu = (uchar_t)bind_cpu; 4372 apic_redist_cpu_skip &= ~(1 << (bind_cpu & ~IRQ_USER_BOUND)); 4373 intr_restore(iflag); 4374 return (0); 4375 } 4376 4377 /* 4378 * Checks to see if the IOAPIC interrupt entry specified has its Remote IRR 4379 * bit set. Sets up a timeout to perform the reprogramming at a later time 4380 * if it cannot wait for the Remote IRR bit to clear (or if waiting did not 4381 * result in the bit's clearing). 4382 * 4383 * This function will mask the RDT entry if the Remote IRR bit is set. 4384 * 4385 * Returns non-zero if the caller should defer IOAPIC reprogramming. 4386 */ 4387 static int 4388 apic_check_stuck_interrupt(apic_irq_t *irq_ptr, int old_bind_cpu, 4389 int new_bind_cpu, volatile int32_t *ioapic, int intin_no, int which_irq) 4390 { 4391 int32_t rdt_entry; 4392 int waited; 4393 4394 /* Mask the RDT entry, but only if it's a level-triggered interrupt */ 4395 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no); 4396 if ((rdt_entry & (AV_LEVEL|AV_MASK)) == AV_LEVEL) { 4397 4398 /* Mask it */ 4399 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no, 4400 AV_MASK | rdt_entry); 4401 } 4402 4403 /* 4404 * Wait for the delivery pending bit to clear. 4405 */ 4406 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4407 (AV_LEVEL|AV_PENDING)) == (AV_LEVEL|AV_PENDING)) { 4408 4409 /* 4410 * If we're still waiting on the delivery of this interrupt, 4411 * continue to wait here until it is delivered (this should be 4412 * a very small amount of time, but include a timeout just in 4413 * case). 4414 */ 4415 for (waited = 0; waited < apic_max_usecs_clear_pending; 4416 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4417 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4418 & AV_PENDING) == 0) { 4419 break; 4420 } 4421 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4422 } 4423 4424 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4425 AV_PENDING) != 0) { 4426 cmn_err(CE_WARN, "!IOAPIC %d intin %d: Could not " 4427 "deliver interrupt to local APIC within " 4428 "%d usecs.", irq_ptr->airq_ioapicindex, 4429 irq_ptr->airq_intin_no, 4430 apic_max_usecs_clear_pending); 4431 } 4432 } 4433 4434 /* 4435 * If the remote IRR bit is set, then the interrupt has been sent 4436 * to a CPU for processing. We have no choice but to wait for 4437 * that CPU to process the interrupt, at which point the remote IRR 4438 * bit will be cleared. 4439 */ 4440 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) & 4441 (AV_LEVEL|AV_REMOTE_IRR)) == (AV_LEVEL|AV_REMOTE_IRR)) { 4442 4443 /* 4444 * If the CPU that this RDT is bound to is NOT the current 4445 * CPU, wait until that CPU handles the interrupt and ACKs 4446 * it. If this interrupt is not bound to any CPU (that is, 4447 * if it's bound to the logical destination of "anyone"), it 4448 * may have been delivered to the current CPU so handle that 4449 * case by deferring the reprogramming (below). 4450 */ 4451 kpreempt_disable(); 4452 if ((old_bind_cpu != IRQ_UNBOUND) && 4453 (old_bind_cpu != IRQ_UNINIT) && 4454 (old_bind_cpu != psm_get_cpu_id())) { 4455 for (waited = 0; waited < apic_max_usecs_clear_pending; 4456 waited += APIC_USECS_PER_WAIT_INTERVAL) { 4457 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4458 intin_no) & AV_REMOTE_IRR) == 0) { 4459 4460 /* Clear the reprogramming state: */ 4461 lock_set(&apic_ioapic_reprogram_lock); 4462 4463 apic_reprogram_info[which_irq].valid 4464 = 0; 4465 apic_reprogram_info[which_irq].bindcpu 4466 = 0; 4467 apic_reprogram_info[which_irq].timeouts 4468 = 0; 4469 4470 lock_clear(&apic_ioapic_reprogram_lock); 4471 4472 /* Remote IRR has cleared! */ 4473 kpreempt_enable(); 4474 return (0); 4475 } 4476 drv_usecwait(APIC_USECS_PER_WAIT_INTERVAL); 4477 } 4478 } 4479 kpreempt_enable(); 4480 4481 /* 4482 * If we waited and the Remote IRR bit is still not cleared, 4483 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS 4484 * times for this interrupt, try the last-ditch workarounds: 4485 */ 4486 if (apic_reprogram_info[which_irq].timeouts >= 4487 APIC_REPROGRAM_MAX_TIMEOUTS) { 4488 4489 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, intin_no) 4490 & AV_REMOTE_IRR) != 0) { 4491 /* 4492 * Trying to clear the bit through normal 4493 * channels has failed. So as a last-ditch 4494 * effort, try to set the trigger mode to 4495 * edge, then to level. This has been 4496 * observed to work on many systems. 4497 */ 4498 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4499 intin_no, 4500 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4501 intin_no) & ~AV_LEVEL); 4502 4503 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4504 intin_no, 4505 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4506 intin_no) | AV_LEVEL); 4507 4508 /* 4509 * If the bit's STILL set, declare total and 4510 * utter failure 4511 */ 4512 if ((READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic, 4513 intin_no) & AV_REMOTE_IRR) != 0) { 4514 cmn_err(CE_WARN, "!IOAPIC %d intin %d: " 4515 "Remote IRR failed to reset " 4516 "within %d usecs. Interrupts to " 4517 "this pin may cease to function.", 4518 irq_ptr->airq_ioapicindex, 4519 irq_ptr->airq_intin_no, 4520 apic_max_usecs_clear_pending); 4521 } 4522 } 4523 /* Clear the reprogramming state: */ 4524 lock_set(&apic_ioapic_reprogram_lock); 4525 4526 apic_reprogram_info[which_irq].valid = 0; 4527 apic_reprogram_info[which_irq].bindcpu = 0; 4528 apic_reprogram_info[which_irq].timeouts = 0; 4529 4530 lock_clear(&apic_ioapic_reprogram_lock); 4531 } else { 4532 #ifdef DEBUG 4533 cmn_err(CE_WARN, "Deferring reprogramming of irq %d", 4534 which_irq); 4535 #endif /* DEBUG */ 4536 /* 4537 * If waiting for the Remote IRR bit (above) didn't 4538 * allow it to clear, defer the reprogramming: 4539 */ 4540 lock_set(&apic_ioapic_reprogram_lock); 4541 4542 apic_reprogram_info[which_irq].valid = 1; 4543 apic_reprogram_info[which_irq].bindcpu = new_bind_cpu; 4544 apic_reprogram_info[which_irq].timeouts++; 4545 4546 lock_clear(&apic_ioapic_reprogram_lock); 4547 4548 /* Fire up a timeout to handle this later */ 4549 (void) timeout(apic_reprogram_timeout_handler, 4550 (void *) 0, 4551 drv_usectohz(APIC_REPROGRAM_TIMEOUT_DELAY)); 4552 4553 /* Inform caller to defer IOAPIC programming: */ 4554 return (1); 4555 } 4556 } 4557 return (0); 4558 } 4559 4560 /* 4561 * Timeout handler that performs the APIC reprogramming 4562 */ 4563 /*ARGSUSED*/ 4564 static void 4565 apic_reprogram_timeout_handler(void *arg) 4566 { 4567 /*LINTED: set but not used in function*/ 4568 int i, result; 4569 4570 /* Serialize access to this function */ 4571 mutex_enter(&apic_reprogram_timeout_mutex); 4572 4573 /* 4574 * For each entry in the reprogramming state that's valid, 4575 * try the reprogramming again: 4576 */ 4577 for (i = 0; i < APIC_MAX_VECTOR; i++) { 4578 if (apic_reprogram_info[i].valid == 0) 4579 continue; 4580 /* 4581 * Though we can't really do anything about errors 4582 * at this point, keep track of them for reporting. 4583 * Note that it is very possible for apic_setup_io_intr 4584 * to re-register this very timeout if the Remote IRR bit 4585 * has not yet cleared. 4586 */ 4587 result = apic_setup_io_intr_deferred(apic_irq_table[i], i); 4588 4589 #ifdef DEBUG 4590 if (result) 4591 cmn_err(CE_WARN, "apic_reprogram_timeout: " 4592 "apic_setup_io_intr returned nonzero for " 4593 "irq=%d!", i); 4594 #endif /* DEBUG */ 4595 } 4596 4597 mutex_exit(&apic_reprogram_timeout_mutex); 4598 } 4599 4600 4601 /* 4602 * Called to migrate all interrupts at an irq to another cpu. safe 4603 * if true means we are not being called from an interrupt 4604 * context and hence it is safe to do a lock_set. If false 4605 * do only a lock_try and return failure ( non 0 ) if we cannot get it 4606 */ 4607 int 4608 apic_rebind_all(apic_irq_t *irq_ptr, int bind_cpu, int safe) 4609 { 4610 apic_irq_t *irqptr = irq_ptr; 4611 int retval = 0; 4612 int iflag; 4613 4614 iflag = intr_clear(); 4615 if (!safe) { 4616 if (lock_try(&apic_ioapic_lock) == 0) { 4617 intr_restore(iflag); 4618 return (1); 4619 } 4620 } else 4621 lock_set(&apic_ioapic_lock); 4622 4623 while (irqptr) { 4624 if (irqptr->airq_temp_cpu != IRQ_UNINIT) 4625 retval |= apic_rebind(irqptr, bind_cpu, 0, IMMEDIATE); 4626 irqptr = irqptr->airq_next; 4627 } 4628 lock_clear(&apic_ioapic_lock); 4629 intr_restore(iflag); 4630 return (retval); 4631 } 4632 4633 /* 4634 * apic_intr_redistribute does all the messy computations for identifying 4635 * which interrupt to move to which CPU. Currently we do just one interrupt 4636 * at a time. This reduces the time we spent doing all this within clock 4637 * interrupt. When it is done in idle, we could do more than 1. 4638 * First we find the most busy and the most free CPU (time in ISR only) 4639 * skipping those CPUs that has been identified as being ineligible (cpu_skip) 4640 * Then we look for IRQs which are closest to the difference between the 4641 * most busy CPU and the average ISR load. We try to find one whose load 4642 * is less than difference.If none exists, then we chose one larger than the 4643 * difference, provided it does not make the most idle CPU worse than the 4644 * most busy one. In the end, we clear all the busy fields for CPUs. For 4645 * IRQs, they are cleared as they are scanned. 4646 */ 4647 static void 4648 apic_intr_redistribute() 4649 { 4650 int busiest_cpu, most_free_cpu; 4651 int cpu_free, cpu_busy, max_busy, min_busy; 4652 int min_free, diff; 4653 int average_busy, cpus_online; 4654 int i, busy; 4655 apic_cpus_info_t *cpu_infop; 4656 apic_irq_t *min_busy_irq = NULL; 4657 apic_irq_t *max_busy_irq = NULL; 4658 4659 busiest_cpu = most_free_cpu = -1; 4660 cpu_free = cpu_busy = max_busy = average_busy = 0; 4661 min_free = apic_sample_factor_redistribution; 4662 cpus_online = 0; 4663 /* 4664 * Below we will check for CPU_INTR_ENABLE, bound, temp_bound, temp_cpu 4665 * without ioapic_lock. That is OK as we are just doing statistical 4666 * sampling anyway and any inaccuracy now will get corrected next time 4667 * The call to rebind which actually changes things will make sure 4668 * we are consistent. 4669 */ 4670 for (i = 0; i < apic_nproc; i++) { 4671 if (!(apic_redist_cpu_skip & (1 << i)) && 4672 (apic_cpus[i].aci_status & APIC_CPU_INTR_ENABLE)) { 4673 4674 cpu_infop = &apic_cpus[i]; 4675 /* 4676 * If no unbound interrupts or only 1 total on this 4677 * CPU, skip 4678 */ 4679 if (!cpu_infop->aci_temp_bound || 4680 (cpu_infop->aci_bound + cpu_infop->aci_temp_bound) 4681 == 1) { 4682 apic_redist_cpu_skip |= 1 << i; 4683 continue; 4684 } 4685 4686 busy = cpu_infop->aci_busy; 4687 average_busy += busy; 4688 cpus_online++; 4689 if (max_busy < busy) { 4690 max_busy = busy; 4691 busiest_cpu = i; 4692 } 4693 if (min_free > busy) { 4694 min_free = busy; 4695 most_free_cpu = i; 4696 } 4697 if (busy > apic_int_busy_mark) { 4698 cpu_busy |= 1 << i; 4699 } else { 4700 if (busy < apic_int_free_mark) 4701 cpu_free |= 1 << i; 4702 } 4703 } 4704 } 4705 if ((cpu_busy && cpu_free) || 4706 (max_busy >= (min_free + apic_diff_for_redistribution))) { 4707 4708 apic_num_imbalance++; 4709 #ifdef DEBUG 4710 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4711 prom_printf( 4712 "redistribute busy=%x free=%x max=%x min=%x", 4713 cpu_busy, cpu_free, max_busy, min_free); 4714 } 4715 #endif /* DEBUG */ 4716 4717 4718 average_busy /= cpus_online; 4719 4720 diff = max_busy - average_busy; 4721 min_busy = max_busy; /* start with the max possible value */ 4722 max_busy = 0; 4723 min_busy_irq = max_busy_irq = NULL; 4724 i = apic_min_device_irq; 4725 for (; i < apic_max_device_irq; i++) { 4726 apic_irq_t *irq_ptr; 4727 /* Change to linked list per CPU ? */ 4728 if ((irq_ptr = apic_irq_table[i]) == NULL) 4729 continue; 4730 /* Check for irq_busy & decide which one to move */ 4731 /* Also zero them for next round */ 4732 if ((irq_ptr->airq_temp_cpu == busiest_cpu) && 4733 irq_ptr->airq_busy) { 4734 if (irq_ptr->airq_busy < diff) { 4735 /* 4736 * Check for least busy CPU, 4737 * best fit or what ? 4738 */ 4739 if (max_busy < irq_ptr->airq_busy) { 4740 /* 4741 * Most busy within the 4742 * required differential 4743 */ 4744 max_busy = irq_ptr->airq_busy; 4745 max_busy_irq = irq_ptr; 4746 } 4747 } else { 4748 if (min_busy > irq_ptr->airq_busy) { 4749 /* 4750 * least busy, but more than 4751 * the reqd diff 4752 */ 4753 if (min_busy < 4754 (diff + average_busy - 4755 min_free)) { 4756 /* 4757 * Making sure new cpu 4758 * will not end up 4759 * worse 4760 */ 4761 min_busy = 4762 irq_ptr->airq_busy; 4763 4764 min_busy_irq = irq_ptr; 4765 } 4766 } 4767 } 4768 } 4769 irq_ptr->airq_busy = 0; 4770 } 4771 4772 if (max_busy_irq != NULL) { 4773 #ifdef DEBUG 4774 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4775 prom_printf("rebinding %x to %x", 4776 max_busy_irq->airq_vector, most_free_cpu); 4777 } 4778 #endif /* DEBUG */ 4779 if (apic_rebind_all(max_busy_irq, most_free_cpu, 0) 4780 == 0) 4781 /* Make change permenant */ 4782 max_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4783 } else if (min_busy_irq != NULL) { 4784 #ifdef DEBUG 4785 if (apic_verbose & APIC_VERBOSE_IOAPIC_FLAG) { 4786 prom_printf("rebinding %x to %x", 4787 min_busy_irq->airq_vector, most_free_cpu); 4788 } 4789 #endif /* DEBUG */ 4790 4791 if (apic_rebind_all(min_busy_irq, most_free_cpu, 0) == 4792 0) 4793 /* Make change permenant */ 4794 min_busy_irq->airq_cpu = (uchar_t)most_free_cpu; 4795 } else { 4796 if (cpu_busy != (1 << busiest_cpu)) { 4797 apic_redist_cpu_skip |= 1 << busiest_cpu; 4798 /* 4799 * We leave cpu_skip set so that next time we 4800 * can choose another cpu 4801 */ 4802 } 4803 } 4804 apic_num_rebind++; 4805 } else { 4806 /* 4807 * found nothing. Could be that we skipped over valid CPUs 4808 * or we have balanced everything. If we had a variable 4809 * ticks_for_redistribution, it could be increased here. 4810 * apic_int_busy, int_free etc would also need to be 4811 * changed. 4812 */ 4813 if (apic_redist_cpu_skip) 4814 apic_redist_cpu_skip = 0; 4815 } 4816 for (i = 0; i < apic_nproc; i++) { 4817 apic_cpus[i].aci_busy = 0; 4818 } 4819 } 4820 4821 static void 4822 apic_cleanup_busy() 4823 { 4824 int i; 4825 apic_irq_t *irq_ptr; 4826 4827 for (i = 0; i < apic_nproc; i++) { 4828 apic_cpus[i].aci_busy = 0; 4829 } 4830 4831 for (i = apic_min_device_irq; i < apic_max_device_irq; i++) { 4832 if ((irq_ptr = apic_irq_table[i]) != NULL) 4833 irq_ptr->airq_busy = 0; 4834 } 4835 apic_skipped_redistribute = 0; 4836 } 4837 4838 4839 /* 4840 * This function will reprogram the timer. 4841 * 4842 * When in oneshot mode the argument is the absolute time in future to 4843 * generate the interrupt at. 4844 * 4845 * When in periodic mode, the argument is the interval at which the 4846 * interrupts should be generated. There is no need to support the periodic 4847 * mode timer change at this time. 4848 */ 4849 static void 4850 apic_timer_reprogram(hrtime_t time) 4851 { 4852 hrtime_t now; 4853 uint_t ticks; 4854 4855 /* 4856 * We should be called from high PIL context (CBE_HIGH_PIL), 4857 * so kpreempt is disabled. 4858 */ 4859 4860 if (!apic_oneshot) { 4861 /* time is the interval for periodic mode */ 4862 ticks = (uint_t)((time) / apic_nsec_per_tick); 4863 } else { 4864 /* one shot mode */ 4865 4866 now = gethrtime(); 4867 4868 if (time <= now) { 4869 /* 4870 * requested to generate an interrupt in the past 4871 * generate an interrupt as soon as possible 4872 */ 4873 ticks = apic_min_timer_ticks; 4874 } else if ((time - now) > apic_nsec_max) { 4875 /* 4876 * requested to generate an interrupt at a time 4877 * further than what we are capable of. Set to max 4878 * the hardware can handle 4879 */ 4880 4881 ticks = APIC_MAXVAL; 4882 #ifdef DEBUG 4883 cmn_err(CE_CONT, "apic_timer_reprogram, request at" 4884 " %lld too far in future, current time" 4885 " %lld \n", time, now); 4886 #endif /* DEBUG */ 4887 } else 4888 ticks = (uint_t)((time - now) / apic_nsec_per_tick); 4889 } 4890 4891 if (ticks < apic_min_timer_ticks) 4892 ticks = apic_min_timer_ticks; 4893 4894 apicadr[APIC_INIT_COUNT] = ticks; 4895 4896 } 4897 4898 /* 4899 * This function will enable timer interrupts. 4900 */ 4901 static void 4902 apic_timer_enable(void) 4903 { 4904 /* 4905 * We should be Called from high PIL context (CBE_HIGH_PIL), 4906 * so kpreempt is disabled. 4907 */ 4908 4909 if (!apic_oneshot) 4910 apicadr[APIC_LOCAL_TIMER] = 4911 (apic_clkvect + APIC_BASE_VECT) | AV_TIME; 4912 else { 4913 /* one shot */ 4914 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT); 4915 } 4916 } 4917 4918 /* 4919 * This function will disable timer interrupts. 4920 */ 4921 static void 4922 apic_timer_disable(void) 4923 { 4924 /* 4925 * We should be Called from high PIL context (CBE_HIGH_PIL), 4926 * so kpreempt is disabled. 4927 */ 4928 4929 apicadr[APIC_LOCAL_TIMER] = (apic_clkvect + APIC_BASE_VECT) | AV_MASK; 4930 } 4931 4932 4933 cyclic_id_t apic_cyclic_id; 4934 4935 /* 4936 * If this module needs to be a consumer of cyclic subsystem, they 4937 * can be added here, since at this time kernel cyclic subsystem is initialized 4938 * argument is not currently used, and is reserved for future. 4939 */ 4940 static void 4941 apic_post_cyclic_setup(void *arg) 4942 { 4943 _NOTE(ARGUNUSED(arg)) 4944 cyc_handler_t hdlr; 4945 cyc_time_t when; 4946 4947 /* cpu_lock is held */ 4948 4949 /* set up cyclics for intr redistribution */ 4950 4951 /* 4952 * In peridoc mode intr redistribution processing is done in 4953 * apic_intr_enter during clk intr processing 4954 */ 4955 if (!apic_oneshot) 4956 return; 4957 4958 hdlr.cyh_level = CY_LOW_LEVEL; 4959 hdlr.cyh_func = (cyc_func_t)apic_redistribute_compute; 4960 hdlr.cyh_arg = NULL; 4961 4962 when.cyt_when = 0; 4963 when.cyt_interval = apic_redistribute_sample_interval; 4964 apic_cyclic_id = cyclic_add(&hdlr, &when); 4965 4966 4967 } 4968 4969 static void 4970 apic_redistribute_compute(void) 4971 { 4972 int i, j, max_busy; 4973 4974 if (apic_enable_dynamic_migration) { 4975 if (++apic_nticks == apic_sample_factor_redistribution) { 4976 /* 4977 * Time to call apic_intr_redistribute(). 4978 * reset apic_nticks. This will cause max_busy 4979 * to be calculated below and if it is more than 4980 * apic_int_busy, we will do the whole thing 4981 */ 4982 apic_nticks = 0; 4983 } 4984 max_busy = 0; 4985 for (i = 0; i < apic_nproc; i++) { 4986 4987 /* 4988 * Check if curipl is non zero & if ISR is in 4989 * progress 4990 */ 4991 if (((j = apic_cpus[i].aci_curipl) != 0) && 4992 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) { 4993 4994 int irq; 4995 apic_cpus[i].aci_busy++; 4996 irq = apic_cpus[i].aci_current[j]; 4997 apic_irq_table[irq]->airq_busy++; 4998 } 4999 5000 if (!apic_nticks && 5001 (apic_cpus[i].aci_busy > max_busy)) 5002 max_busy = apic_cpus[i].aci_busy; 5003 } 5004 if (!apic_nticks) { 5005 if (max_busy > apic_int_busy_mark) { 5006 /* 5007 * We could make the following check be 5008 * skipped > 1 in which case, we get a 5009 * redistribution at half the busy mark (due to 5010 * double interval). Need to be able to collect 5011 * more empirical data to decide if that is a 5012 * good strategy. Punt for now. 5013 */ 5014 if (apic_skipped_redistribute) 5015 apic_cleanup_busy(); 5016 else 5017 apic_intr_redistribute(); 5018 } else 5019 apic_skipped_redistribute++; 5020 } 5021 } 5022 } 5023 5024 5025 static int 5026 apic_acpi_translate_pci_irq(dev_info_t *dip, int busid, int devid, 5027 int ipin, int *pci_irqp, iflag_t *intr_flagp) 5028 { 5029 5030 int status; 5031 acpi_psm_lnk_t acpipsmlnk; 5032 5033 if ((status = acpi_get_irq_cache_ent(busid, devid, ipin, pci_irqp, 5034 intr_flagp)) == ACPI_PSM_SUCCESS) { 5035 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Found irqno %d " 5036 "from cache for device %s, instance #%d\n", *pci_irqp, 5037 ddi_get_name(dip), ddi_get_instance(dip))); 5038 return (status); 5039 } 5040 5041 bzero(&acpipsmlnk, sizeof (acpi_psm_lnk_t)); 5042 5043 if ((status = acpi_translate_pci_irq(dip, ipin, pci_irqp, intr_flagp, 5044 &acpipsmlnk)) == ACPI_PSM_FAILURE) { 5045 APIC_VERBOSE_IRQ((CE_WARN, "pcplusmp: " 5046 " acpi_translate_pci_irq failed for device %s, instance" 5047 " #%d", ddi_get_name(dip), ddi_get_instance(dip))); 5048 return (status); 5049 } 5050 5051 if (status == ACPI_PSM_PARTIAL && acpipsmlnk.lnkobj != NULL) { 5052 status = apic_acpi_irq_configure(&acpipsmlnk, dip, pci_irqp, 5053 intr_flagp); 5054 if (status != ACPI_PSM_SUCCESS) { 5055 status = acpi_get_current_irq_resource(&acpipsmlnk, 5056 pci_irqp, intr_flagp); 5057 } 5058 } 5059 5060 if (status == ACPI_PSM_SUCCESS) { 5061 acpi_new_irq_cache_ent(busid, devid, ipin, *pci_irqp, 5062 intr_flagp, &acpipsmlnk); 5063 5064 APIC_VERBOSE_IRQ((CE_CONT, "pcplusmp: [ACPI] " 5065 "new irq %d for device %s, instance #%d\n", 5066 *pci_irqp, ddi_get_name(dip), ddi_get_instance(dip))); 5067 } 5068 5069 return (status); 5070 } 5071 5072 /* 5073 * Configures the irq for the interrupt link device identified by 5074 * acpipsmlnkp. 5075 * 5076 * Gets the current and the list of possible irq settings for the 5077 * device. If apic_unconditional_srs is not set, and the current 5078 * resource setting is in the list of possible irq settings, 5079 * current irq resource setting is passed to the caller. 5080 * 5081 * Otherwise, picks an irq number from the list of possible irq 5082 * settings, and sets the irq of the device to this value. 5083 * If prefer_crs is set, among a set of irq numbers in the list that have 5084 * the least number of devices sharing the interrupt, we pick current irq 5085 * resource setting if it is a member of this set. 5086 * 5087 * Passes the irq number in the value pointed to by pci_irqp, and 5088 * polarity and sensitivity in the structure pointed to by dipintrflagp 5089 * to the caller. 5090 * 5091 * Note that if setting the irq resource failed, but successfuly obtained 5092 * the current irq resource settings, passes the current irq resources 5093 * and considers it a success. 5094 * 5095 * Returns: 5096 * ACPI_PSM_SUCCESS on success. 5097 * 5098 * ACPI_PSM_FAILURE if an error occured during the configuration or 5099 * if a suitable irq was not found for this device, or if setting the 5100 * irq resource and obtaining the current resource fails. 5101 * 5102 */ 5103 static int 5104 apic_acpi_irq_configure(acpi_psm_lnk_t *acpipsmlnkp, dev_info_t *dip, 5105 int *pci_irqp, iflag_t *dipintr_flagp) 5106 { 5107 5108 int i, min_share, foundnow, done = 0; 5109 int32_t irq; 5110 int32_t share_irq = -1; 5111 int32_t chosen_irq = -1; 5112 int cur_irq = -1; 5113 acpi_irqlist_t *irqlistp; 5114 acpi_irqlist_t *irqlistent; 5115 5116 if ((acpi_get_possible_irq_resources(acpipsmlnkp, &irqlistp)) 5117 == ACPI_PSM_FAILURE) { 5118 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Unable to determine " 5119 "or assign IRQ for device %s, instance #%d: The system was " 5120 "unable to get the list of potential IRQs from ACPI.", 5121 ddi_get_name(dip), ddi_get_instance(dip))); 5122 5123 return (ACPI_PSM_FAILURE); 5124 } 5125 5126 if ((acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5127 dipintr_flagp) == ACPI_PSM_SUCCESS) && (!apic_unconditional_srs) && 5128 (cur_irq > 0)) { 5129 /* 5130 * If an IRQ is set in CRS and that IRQ exists in the set 5131 * returned from _PRS, return that IRQ, otherwise print 5132 * a warning 5133 */ 5134 5135 if (acpi_irqlist_find_irq(irqlistp, cur_irq, NULL) 5136 == ACPI_PSM_SUCCESS) { 5137 5138 acpi_free_irqlist(irqlistp); 5139 ASSERT(pci_irqp != NULL); 5140 *pci_irqp = cur_irq; 5141 return (ACPI_PSM_SUCCESS); 5142 } 5143 5144 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find the " 5145 "current irq %d for device %s, instance #%d in ACPI's " 5146 "list of possible irqs for this device. Picking one from " 5147 " the latter list.", cur_irq, ddi_get_name(dip), 5148 ddi_get_instance(dip))); 5149 } 5150 5151 irqlistent = irqlistp; 5152 min_share = 255; 5153 5154 while (irqlistent != NULL) { 5155 irqlistent->intr_flags.bustype = BUS_PCI; 5156 5157 for (foundnow = 0, i = 0; i < irqlistent->num_irqs; i++) { 5158 5159 irq = irqlistent->irqs[i]; 5160 5161 if ((irq < 16) && (apic_reserved_irqlist[irq])) 5162 continue; 5163 5164 if (irq == 0) { 5165 /* invalid irq number */ 5166 continue; 5167 } 5168 5169 if ((apic_irq_table[irq] == NULL) || 5170 (apic_irq_table[irq]->airq_dip == dip)) { 5171 chosen_irq = irq; 5172 foundnow = 1; 5173 /* 5174 * If we do not prefer current irq from crs 5175 * or if we do and this irq is the same as 5176 * current irq from crs, this is the one 5177 * to pick. 5178 */ 5179 if (!(apic_prefer_crs) || (irq == cur_irq)) { 5180 done = 1; 5181 break; 5182 } 5183 continue; 5184 } 5185 5186 if (irqlistent->intr_flags.intr_el == INTR_EL_EDGE) 5187 continue; 5188 5189 if (!acpi_intr_compatible(irqlistent->intr_flags, 5190 apic_irq_table[irq]->airq_iflag)) 5191 continue; 5192 5193 if ((apic_irq_table[irq]->airq_share < min_share) || 5194 ((apic_irq_table[irq]->airq_share == min_share) && 5195 (cur_irq == irq) && (apic_prefer_crs))) { 5196 min_share = apic_irq_table[irq]->airq_share; 5197 share_irq = irq; 5198 foundnow = 1; 5199 } 5200 } 5201 5202 /* 5203 * If we found an IRQ in the inner loop this time, save the 5204 * details from the irqlist for later use. 5205 */ 5206 if (foundnow && ((chosen_irq != -1) || (share_irq != -1))) { 5207 /* 5208 * Copy the acpi_prs_private_t and flags from this 5209 * irq list entry, since we found an irq from this 5210 * entry. 5211 */ 5212 acpipsmlnkp->acpi_prs_prv = irqlistent->acpi_prs_prv; 5213 *dipintr_flagp = irqlistent->intr_flags; 5214 } 5215 5216 if (done) 5217 break; 5218 5219 /* Go to the next irqlist entry */ 5220 irqlistent = irqlistent->next; 5221 } 5222 5223 5224 acpi_free_irqlist(irqlistp); 5225 if (chosen_irq != -1) 5226 irq = chosen_irq; 5227 else if (share_irq != -1) 5228 irq = share_irq; 5229 else { 5230 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: Could not find a " 5231 "suitable irq from the list of possible irqs for device " 5232 "%s, instance #%d in ACPI's list of possible irqs", 5233 ddi_get_name(dip), ddi_get_instance(dip))); 5234 return (ACPI_PSM_FAILURE); 5235 } 5236 5237 APIC_VERBOSE_IRQ((CE_CONT, "!pcplusmp: Setting irq %d for device %s " 5238 "instance #%d\n", irq, ddi_get_name(dip), ddi_get_instance(dip))); 5239 5240 if ((acpi_set_irq_resource(acpipsmlnkp, irq)) == ACPI_PSM_SUCCESS) { 5241 /* 5242 * setting irq was successful, check to make sure CRS 5243 * reflects that. If CRS does not agree with what we 5244 * set, return the irq that was set. 5245 */ 5246 5247 if (acpi_get_current_irq_resource(acpipsmlnkp, &cur_irq, 5248 dipintr_flagp) == ACPI_PSM_SUCCESS) { 5249 5250 if (cur_irq != irq) 5251 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: " 5252 "IRQ resource set (irqno %d) for device %s " 5253 "instance #%d, differs from current " 5254 "setting irqno %d", 5255 irq, ddi_get_name(dip), 5256 ddi_get_instance(dip), cur_irq)); 5257 } 5258 5259 /* 5260 * return the irq that was set, and not what CRS reports, 5261 * since CRS has been seen to be bogus on some systems 5262 */ 5263 cur_irq = irq; 5264 } else { 5265 APIC_VERBOSE_IRQ((CE_WARN, "!pcplusmp: set resource irq %d " 5266 "failed for device %s instance #%d", 5267 irq, ddi_get_name(dip), ddi_get_instance(dip))); 5268 5269 if (cur_irq == -1) 5270 return (ACPI_PSM_FAILURE); 5271 } 5272 5273 ASSERT(pci_irqp != NULL); 5274 *pci_irqp = cur_irq; 5275 return (ACPI_PSM_SUCCESS); 5276 } 5277