1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright 2019, Joyent, Inc. 27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 28 */ 29 30 /* 31 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 32 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 34 * PSMI 1.5 extensions are supported in Solaris Nevada. 35 * PSMI 1.6 extensions are supported in Solaris Nevada. 36 * PSMI 1.7 extensions are supported in Solaris Nevada. 37 */ 38 #define PSMI_1_7 39 40 #include <sys/processor.h> 41 #include <sys/time.h> 42 #include <sys/psm.h> 43 #include <sys/smp_impldefs.h> 44 #include <sys/cram.h> 45 #include <sys/acpi/acpi.h> 46 #include <sys/acpica.h> 47 #include <sys/psm_common.h> 48 #include <sys/apic.h> 49 #include <sys/pit.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/ddi_impldefs.h> 53 #include <sys/pci.h> 54 #include <sys/promif.h> 55 #include <sys/x86_archext.h> 56 #include <sys/cpc_impl.h> 57 #include <sys/uadmin.h> 58 #include <sys/panic.h> 59 #include <sys/debug.h> 60 #include <sys/archsystm.h> 61 #include <sys/trap.h> 62 #include <sys/machsystm.h> 63 #include <sys/sysmacros.h> 64 #include <sys/cpuvar.h> 65 #include <sys/rm_platter.h> 66 #include <sys/privregs.h> 67 #include <sys/note.h> 68 #include <sys/pci_intr_lib.h> 69 #include <sys/spl.h> 70 #include <sys/clock.h> 71 #include <sys/dditypes.h> 72 #include <sys/sunddi.h> 73 #include <sys/x_call.h> 74 #include <sys/reboot.h> 75 #include <sys/hpet.h> 76 #include <sys/apic_common.h> 77 #include <sys/apic_timer.h> 78 79 static void apic_record_ioapic_rdt(void *intrmap_private, 80 ioapic_rdt_t *irdt); 81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs); 82 83 /* 84 * Common routines between pcplusmp & apix (taken from apic.c). 85 */ 86 87 int apic_clkinit(int); 88 hrtime_t apic_gethrtime(void); 89 void apic_send_ipi(int, int); 90 void apic_set_idlecpu(processorid_t); 91 void apic_unset_idlecpu(processorid_t); 92 void apic_shutdown(int, int); 93 void apic_preshutdown(int, int); 94 processorid_t apic_get_next_processorid(processorid_t); 95 96 hrtime_t apic_gettime(); 97 98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP; 99 100 /* Now the ones for Dynamic Interrupt distribution */ 101 int apic_enable_dynamic_migration = 0; 102 103 /* maximum loop count when sending Start IPIs. */ 104 int apic_sipi_max_loop_count = 0x1000; 105 106 /* 107 * These variables are frequently accessed in apic_intr_enter(), 108 * apic_intr_exit and apic_setspl, so group them together 109 */ 110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 112 int apic_clkvect; 113 114 /* vector at which error interrupts come in */ 115 int apic_errvect; 116 int apic_enable_error_intr = 1; 117 int apic_error_display_delay = 100; 118 119 /* vector at which performance counter overflow interrupts come in */ 120 int apic_cpcovf_vect; 121 int apic_enable_cpcovf_intr = 1; 122 123 /* vector at which CMCI interrupts come in */ 124 int apic_cmci_vect; 125 extern void cmi_cmci_trap(void); 126 127 lock_t apic_mode_switch_lock; 128 129 int apic_pir_vect; 130 131 /* 132 * Patchable global variables. 133 */ 134 int apic_forceload = 0; 135 136 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 137 138 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 139 int apic_panic_on_nmi = 0; 140 int apic_panic_on_apic_error = 0; 141 142 int apic_verbose = 0; /* 0x1ff */ 143 144 #ifdef DEBUG 145 int apic_debug = 0; 146 int apic_restrict_vector = 0; 147 148 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 149 int apic_debug_msgbufindex = 0; 150 151 #endif /* DEBUG */ 152 153 uint_t apic_nticks = 0; 154 uint_t apic_skipped_redistribute = 0; 155 156 uint_t last_count_read = 0; 157 lock_t apic_gethrtime_lock; 158 volatile int apic_hrtime_stamp = 0; 159 volatile hrtime_t apic_nsec_since_boot = 0; 160 161 static hrtime_t apic_last_hrtime = 0; 162 int apic_hrtime_error = 0; 163 int apic_remote_hrterr = 0; 164 int apic_num_nmis = 0; 165 int apic_apic_error = 0; 166 int apic_num_apic_errors = 0; 167 int apic_num_cksum_errors = 0; 168 169 int apic_error = 0; 170 171 static int apic_cmos_ssb_set = 0; 172 173 /* use to make sure only one cpu handles the nmi */ 174 lock_t apic_nmi_lock; 175 /* use to make sure only one cpu handles the error interrupt */ 176 lock_t apic_error_lock; 177 178 static struct { 179 uchar_t cntl; 180 uchar_t data; 181 } aspen_bmc[] = { 182 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 183 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 184 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 185 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 186 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 187 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 188 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 189 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 190 191 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 192 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 193 }; 194 195 static struct { 196 int port; 197 uchar_t data; 198 } sitka_bmc[] = { 199 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 200 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 201 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 202 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 203 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 204 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 205 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 206 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 207 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 208 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 209 210 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 211 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 212 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 213 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 214 }; 215 216 /* Patchable global variables. */ 217 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 218 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ 219 220 /* default apic ops without interrupt remapping */ 221 static apic_intrmap_ops_t apic_nointrmap_ops = { 222 (int (*)(int))return_instr, 223 (void (*)(int))return_instr, 224 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr, 225 (void (*)(void *, void *, uint16_t, int))return_instr, 226 (void (*)(void **))return_instr, 227 apic_record_ioapic_rdt, 228 apic_record_msi, 229 }; 230 231 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops; 232 apic_cpus_info_t *apic_cpus = NULL; 233 cpuset_t apic_cpumask; 234 uint_t apic_picinit_called; 235 236 /* Flag to indicate that we need to shut down all processors */ 237 static uint_t apic_shutdown_processors; 238 239 /* 240 * Probe the ioapic method for apix module. Called in apic_probe_common() 241 */ 242 int 243 apic_ioapic_method_probe() 244 { 245 if (apix_enable == 0) 246 return (PSM_SUCCESS); 247 248 /* 249 * Set IOAPIC EOI handling method. The priority from low to high is: 250 * 1. IOxAPIC: with EOI register 251 * 2. IOMMU interrupt mapping 252 * 3. Mask-Before-EOI method for systems without boot 253 * interrupt routing, such as systems with only one IOAPIC; 254 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution 255 * which disables the boot interrupt routing already. 256 * 4. Directed EOI 257 */ 258 if (apic_io_ver[0] >= 0x20) 259 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC; 260 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max)) 261 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK; 262 if (apic_directed_EOI_supported()) 263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI; 264 265 /* fall back to pcplusmp */ 266 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) { 267 /* make sure apix is after pcplusmp in /etc/mach */ 268 apix_enable = 0; /* go ahead with pcplusmp install next */ 269 return (PSM_FAILURE); 270 } 271 272 return (PSM_SUCCESS); 273 } 274 275 /* 276 * handler for APIC Error interrupt. Just print a warning and continue 277 */ 278 int 279 apic_error_intr() 280 { 281 uint_t error0, error1, error; 282 uint_t i; 283 284 /* 285 * We need to write before read as per 7.4.17 of system prog manual. 286 * We do both and or the results to be safe 287 */ 288 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 289 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 290 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 291 error = error0 | error1; 292 293 /* 294 * Clear the APIC error status (do this on all cpus that enter here) 295 * (two writes are required due to the semantics of accessing the 296 * error status register.) 297 */ 298 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 299 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 300 301 /* 302 * Prevent more than 1 CPU from handling error interrupt causing 303 * double printing (interleave of characters from multiple 304 * CPU's when using prom_printf) 305 */ 306 if (lock_try(&apic_error_lock) == 0) 307 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 308 if (error) { 309 #if DEBUG 310 if (apic_debug) 311 debug_enter("pcplusmp: APIC Error interrupt received"); 312 #endif /* DEBUG */ 313 if (apic_panic_on_apic_error) 314 cmn_err(CE_PANIC, 315 "APIC Error interrupt on CPU %d. Status = %x", 316 psm_get_cpu_id(), error); 317 else { 318 if ((error & ~APIC_CS_ERRORS) == 0) { 319 /* cksum error only */ 320 apic_error |= APIC_ERR_APIC_ERROR; 321 apic_apic_error |= error; 322 apic_num_apic_errors++; 323 apic_num_cksum_errors++; 324 } else { 325 /* 326 * prom_printf is the best shot we have of 327 * something which is problem free from 328 * high level/NMI type of interrupts 329 */ 330 prom_printf("APIC Error interrupt on CPU %d. " 331 "Status 0 = %x, Status 1 = %x\n", 332 psm_get_cpu_id(), error0, error1); 333 apic_error |= APIC_ERR_APIC_ERROR; 334 apic_apic_error |= error; 335 apic_num_apic_errors++; 336 for (i = 0; i < apic_error_display_delay; i++) { 337 tenmicrosec(); 338 } 339 /* 340 * provide more delay next time limited to 341 * roughly 1 clock tick time 342 */ 343 if (apic_error_display_delay < 500) 344 apic_error_display_delay *= 2; 345 } 346 } 347 lock_clear(&apic_error_lock); 348 return (DDI_INTR_CLAIMED); 349 } else { 350 lock_clear(&apic_error_lock); 351 return (DDI_INTR_UNCLAIMED); 352 } 353 } 354 355 /* 356 * Turn off the mask bit in the performance counter Local Vector Table entry. 357 */ 358 void 359 apic_cpcovf_mask_clear(void) 360 { 361 apic_reg_ops->apic_write(APIC_PCINT_VECT, 362 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK)); 363 } 364 365 /*ARGSUSED*/ 366 static int 367 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) 368 { 369 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect); 370 return (0); 371 } 372 373 /*ARGSUSED*/ 374 static int 375 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) 376 { 377 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK); 378 return (0); 379 } 380 381 void 382 apic_cmci_setup(processorid_t cpuid, boolean_t enable) 383 { 384 cpuset_t cpu_set; 385 386 CPUSET_ONLY(cpu_set, cpuid); 387 388 if (enable) { 389 xc_call(0, 0, 0, CPUSET2BV(cpu_set), 390 (xc_func_t)apic_cmci_enable); 391 } else { 392 xc_call(0, 0, 0, CPUSET2BV(cpu_set), 393 (xc_func_t)apic_cmci_disable); 394 } 395 } 396 397 static void 398 apic_disable_local_apic(void) 399 { 400 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL); 401 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK); 402 403 /* local intr reg 0 */ 404 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK); 405 406 /* disable NMI */ 407 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK); 408 409 /* and error interrupt */ 410 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK); 411 412 /* and perf counter intr */ 413 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK); 414 415 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR); 416 } 417 418 static void 419 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start) 420 { 421 int loop_count; 422 uint32_t vector; 423 uint_t apicid; 424 ulong_t iflag; 425 426 apicid = apic_cpus[cpun].aci_local_id; 427 428 /* 429 * Interrupts on current CPU will be disabled during the 430 * steps in order to avoid unwanted side effects from 431 * executing interrupt handlers on a problematic BIOS. 432 */ 433 iflag = intr_clear(); 434 435 if (start) { 436 outb(CMOS_ADDR, SSB); 437 outb(CMOS_DATA, BIOS_SHUTDOWN); 438 } 439 440 /* 441 * According to X2APIC specification in section '2.3.5.1' of 442 * Interrupt Command Register Semantics, the semantics of 443 * programming the Interrupt Command Register to dispatch an interrupt 444 * is simplified. A single MSR write to the 64-bit ICR is required 445 * for dispatching an interrupt. Specifically, with the 64-bit MSR 446 * interface to ICR, system software is not required to check the 447 * status of the delivery status bit prior to writing to the ICR 448 * to send an IPI. With the removal of the Delivery Status bit, 449 * system software no longer has a reason to read the ICR. It remains 450 * readable only to aid in debugging. 451 */ 452 #ifdef DEBUG 453 APIC_AV_PENDING_SET(); 454 #else 455 if (apic_mode == LOCAL_APIC) { 456 APIC_AV_PENDING_SET(); 457 } 458 #endif /* DEBUG */ 459 460 /* for integrated - make sure there is one INIT IPI in buffer */ 461 /* for external - it will wake up the cpu */ 462 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET); 463 464 /* If only 1 CPU is installed, PENDING bit will not go low */ 465 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) { 466 if (apic_mode == LOCAL_APIC && 467 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING) 468 apic_ret(); 469 else 470 break; 471 } 472 473 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET); 474 drv_usecwait(20000); /* 20 milli sec */ 475 476 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 477 /* integrated apic */ 478 479 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 480 (APIC_VECTOR_MASK | APIC_IPL_MASK); 481 482 /* to offset the INIT IPI queue up in the buffer */ 483 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 484 drv_usecwait(200); /* 20 micro sec */ 485 486 /* 487 * send the second SIPI (Startup IPI) as recommended by Intel 488 * software development manual. 489 */ 490 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 491 drv_usecwait(200); /* 20 micro sec */ 492 } 493 494 intr_restore(iflag); 495 } 496 497 /*ARGSUSED1*/ 498 int 499 apic_cpu_start(processorid_t cpun, caddr_t arg) 500 { 501 ASSERT(MUTEX_HELD(&cpu_lock)); 502 503 if (!apic_cpu_in_range(cpun)) { 504 return (EINVAL); 505 } 506 507 /* 508 * Switch to apic_common_send_ipi for safety during starting other CPUs. 509 */ 510 if (apic_mode == LOCAL_X2APIC) { 511 apic_switch_ipi_callback(B_TRUE); 512 } 513 514 apic_cmos_ssb_set = 1; 515 apic_cpu_send_SIPI(cpun, B_TRUE); 516 517 return (0); 518 } 519 520 /* 521 * Put CPU into halted state with interrupts disabled. 522 */ 523 /*ARGSUSED1*/ 524 int 525 apic_cpu_stop(processorid_t cpun, caddr_t arg) 526 { 527 int rc; 528 cpu_t *cp; 529 extern cpuset_t cpu_ready_set; 530 extern void cpu_idle_intercept_cpu(cpu_t *cp); 531 532 ASSERT(MUTEX_HELD(&cpu_lock)); 533 534 if (!apic_cpu_in_range(cpun)) { 535 return (EINVAL); 536 } 537 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) { 538 return (ENOTSUP); 539 } 540 541 cp = cpu_get(cpun); 542 ASSERT(cp != NULL); 543 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0); 544 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0); 545 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0); 546 547 /* Clear CPU_READY flag to disable cross calls. */ 548 cp->cpu_flags &= ~CPU_READY; 549 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun); 550 rc = xc_flush_cpu(cp); 551 if (rc != 0) { 552 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun); 553 cp->cpu_flags |= CPU_READY; 554 return (rc); 555 } 556 557 /* Intercept target CPU at a safe point before powering it off. */ 558 cpu_idle_intercept_cpu(cp); 559 560 apic_cpu_send_SIPI(cpun, B_FALSE); 561 cp->cpu_flags &= ~CPU_RUNNING; 562 563 return (0); 564 } 565 566 int 567 apic_cpu_ops(psm_cpu_request_t *reqp) 568 { 569 if (reqp == NULL) { 570 return (EINVAL); 571 } 572 573 switch (reqp->pcr_cmd) { 574 case PSM_CPU_ADD: 575 return (apic_cpu_add(reqp)); 576 577 case PSM_CPU_REMOVE: 578 return (apic_cpu_remove(reqp)); 579 580 case PSM_CPU_STOP: 581 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid, 582 reqp->req.cpu_stop.ctx)); 583 584 default: 585 return (ENOTSUP); 586 } 587 } 588 589 #ifdef DEBUG 590 int apic_break_on_cpu = 9; 591 int apic_stretch_interrupts = 0; 592 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 593 #endif /* DEBUG */ 594 595 /* 596 * generates an interprocessor interrupt to another CPU. Any changes made to 597 * this routine must be accompanied by similar changes to 598 * apic_common_send_ipi(). 599 */ 600 void 601 apic_send_ipi(int cpun, int ipl) 602 { 603 int vector; 604 ulong_t flag; 605 606 vector = apic_resv_vector[ipl]; 607 608 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 609 610 flag = intr_clear(); 611 612 APIC_AV_PENDING_SET(); 613 614 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 615 vector); 616 617 intr_restore(flag); 618 } 619 620 void 621 apic_send_pir_ipi(processorid_t cpun) 622 { 623 const int vector = apic_pir_vect; 624 ulong_t flag; 625 626 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 627 628 flag = intr_clear(); 629 630 /* Self-IPI for inducing PIR makes no sense. */ 631 if ((cpun != psm_get_cpu_id())) { 632 APIC_AV_PENDING_SET(); 633 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 634 vector); 635 } 636 637 intr_restore(flag); 638 } 639 640 int 641 apic_get_pir_ipivect(void) 642 { 643 return (apic_pir_vect); 644 } 645 646 /*ARGSUSED*/ 647 void 648 apic_set_idlecpu(processorid_t cpun) 649 { 650 } 651 652 /*ARGSUSED*/ 653 void 654 apic_unset_idlecpu(processorid_t cpun) 655 { 656 } 657 658 659 void 660 apic_ret() 661 { 662 } 663 664 /* 665 * If apic_coarse_time == 1, then apic_gettime() is used instead of 666 * apic_gethrtime(). This is used for performance instead of accuracy. 667 */ 668 669 hrtime_t 670 apic_gettime() 671 { 672 int old_hrtime_stamp; 673 hrtime_t temp; 674 675 /* 676 * In one-shot mode, we do not keep time, so if anyone 677 * calls psm_gettime() directly, we vector over to 678 * gethrtime(). 679 * one-shot mode MUST NOT be enabled if this psm is the source of 680 * hrtime. 681 */ 682 683 if (apic_oneshot) 684 return (gethrtime()); 685 686 687 gettime_again: 688 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 689 apic_ret(); 690 691 temp = apic_nsec_since_boot; 692 693 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 694 goto gettime_again; 695 } 696 return (temp); 697 } 698 699 /* 700 * Here we return the number of nanoseconds since booting. Note every 701 * clock interrupt increments apic_nsec_since_boot by the appropriate 702 * amount. 703 */ 704 hrtime_t 705 apic_gethrtime(void) 706 { 707 int curr_timeval, countval, elapsed_ticks; 708 int old_hrtime_stamp, status; 709 hrtime_t temp; 710 uint32_t cpun; 711 ulong_t oflags; 712 713 /* 714 * In one-shot mode, we do not keep time, so if anyone 715 * calls psm_gethrtime() directly, we vector over to 716 * gethrtime(). 717 * one-shot mode MUST NOT be enabled if this psm is the source of 718 * hrtime. 719 */ 720 721 if (apic_oneshot) 722 return (gethrtime()); 723 724 oflags = intr_clear(); /* prevent migration */ 725 726 cpun = apic_reg_ops->apic_read(APIC_LID_REG); 727 if (apic_mode == LOCAL_APIC) 728 cpun >>= APIC_ID_BIT_OFFSET; 729 730 lock_set(&apic_gethrtime_lock); 731 732 gethrtime_again: 733 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 734 apic_ret(); 735 736 /* 737 * Check to see which CPU we are on. Note the time is kept on 738 * the local APIC of CPU 0. If on CPU 0, simply read the current 739 * counter. If on another CPU, issue a remote read command to CPU 0. 740 */ 741 if (cpun == apic_cpus[0].aci_local_id) { 742 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT); 743 } else { 744 #ifdef DEBUG 745 APIC_AV_PENDING_SET(); 746 #else 747 if (apic_mode == LOCAL_APIC) 748 APIC_AV_PENDING_SET(); 749 #endif /* DEBUG */ 750 751 apic_reg_ops->apic_write_int_cmd( 752 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE); 753 754 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1)) 755 & AV_READ_PENDING) { 756 apic_ret(); 757 } 758 759 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 760 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ); 761 else { /* 0 = invalid */ 762 apic_remote_hrterr++; 763 /* 764 * return last hrtime right now, will need more 765 * testing if change to retry 766 */ 767 temp = apic_last_hrtime; 768 769 lock_clear(&apic_gethrtime_lock); 770 771 intr_restore(oflags); 772 773 return (temp); 774 } 775 } 776 if (countval > last_count_read) 777 countval = 0; 778 else 779 last_count_read = countval; 780 781 elapsed_ticks = apic_hertz_count - countval; 782 783 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks); 784 temp = apic_nsec_since_boot + curr_timeval; 785 786 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 787 /* we might have clobbered last_count_read. Restore it */ 788 last_count_read = apic_hertz_count; 789 goto gethrtime_again; 790 } 791 792 if (temp < apic_last_hrtime) { 793 /* return last hrtime if error occurs */ 794 apic_hrtime_error++; 795 temp = apic_last_hrtime; 796 } 797 else 798 apic_last_hrtime = temp; 799 800 lock_clear(&apic_gethrtime_lock); 801 intr_restore(oflags); 802 803 return (temp); 804 } 805 806 /* apic NMI handler */ 807 /*ARGSUSED*/ 808 void 809 apic_nmi_intr(caddr_t arg, struct regs *rp) 810 { 811 nmi_action_t action = nmi_action; 812 813 if (apic_shutdown_processors) { 814 apic_disable_local_apic(); 815 return; 816 } 817 818 apic_error |= APIC_ERR_NMI; 819 820 if (!lock_try(&apic_nmi_lock)) 821 return; 822 apic_num_nmis++; 823 824 /* 825 * "nmi_action" always over-rides the older way of doing this, unless we 826 * can't actually drop into kmdb when requested. 827 */ 828 if (action == NMI_ACTION_KMDB && !psm_debugger()) 829 action = NMI_ACTION_UNSET; 830 831 if (action == NMI_ACTION_UNSET) { 832 if (apic_kmdb_on_nmi && psm_debugger()) 833 action = NMI_ACTION_KMDB; 834 else if (apic_panic_on_nmi) 835 action = NMI_ACTION_PANIC; 836 else 837 action = NMI_ACTION_IGNORE; 838 } 839 840 switch (action) { 841 case NMI_ACTION_IGNORE: 842 /* 843 * prom_printf is the best shot we have of something which is 844 * problem free from high level/NMI type of interrupts 845 */ 846 prom_printf("NMI received\n"); 847 break; 848 849 case NMI_ACTION_PANIC: 850 /* Keep panic from entering kmdb. */ 851 nopanicdebug = 1; 852 panic("NMI received\n"); 853 break; 854 855 case NMI_ACTION_KMDB: 856 default: 857 debug_enter("NMI received: entering kmdb\n"); 858 break; 859 } 860 861 lock_clear(&apic_nmi_lock); 862 } 863 864 processorid_t 865 apic_get_next_processorid(processorid_t cpu_id) 866 { 867 868 int i; 869 870 if (cpu_id == -1) 871 return ((processorid_t)0); 872 873 for (i = cpu_id + 1; i < NCPU; i++) { 874 if (apic_cpu_in_range(i)) 875 return (i); 876 } 877 878 return ((processorid_t)-1); 879 } 880 881 int 882 apic_cpu_add(psm_cpu_request_t *reqp) 883 { 884 int i, rv = 0; 885 ulong_t iflag; 886 boolean_t first = B_TRUE; 887 uchar_t localver = 0; 888 uint32_t localid, procid; 889 processorid_t cpuid = (processorid_t)-1; 890 mach_cpu_add_arg_t *ap; 891 892 ASSERT(reqp != NULL); 893 reqp->req.cpu_add.cpuid = (processorid_t)-1; 894 895 /* Check whether CPU hotplug is supported. */ 896 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 897 return (ENOTSUP); 898 } 899 900 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp; 901 switch (ap->type) { 902 case MACH_CPU_ARG_LOCAL_APIC: 903 localid = ap->arg.apic.apic_id; 904 procid = ap->arg.apic.proc_id; 905 if (localid >= 255 || procid > 255) { 906 cmn_err(CE_WARN, 907 "!apic: apicid(%u) or procid(%u) is invalid.", 908 localid, procid); 909 return (EINVAL); 910 } 911 break; 912 913 case MACH_CPU_ARG_LOCAL_X2APIC: 914 localid = ap->arg.apic.apic_id; 915 procid = ap->arg.apic.proc_id; 916 if (localid >= UINT32_MAX) { 917 cmn_err(CE_WARN, 918 "!apic: x2apicid(%u) is invalid.", localid); 919 return (EINVAL); 920 } else if (localid >= 255 && apic_mode == LOCAL_APIC) { 921 cmn_err(CE_WARN, "!apic: system is in APIC mode, " 922 "can't support x2APIC processor."); 923 return (ENOTSUP); 924 } 925 break; 926 927 default: 928 cmn_err(CE_WARN, 929 "!apic: unknown argument type %d to apic_cpu_add().", 930 ap->type); 931 return (EINVAL); 932 } 933 934 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 935 iflag = intr_clear(); 936 lock_set(&apic_ioapic_lock); 937 938 /* Check whether local APIC id already exists. */ 939 for (i = 0; i < apic_nproc; i++) { 940 if (!CPU_IN_SET(apic_cpumask, i)) 941 continue; 942 if (apic_cpus[i].aci_local_id == localid) { 943 lock_clear(&apic_ioapic_lock); 944 intr_restore(iflag); 945 cmn_err(CE_WARN, 946 "!apic: local apic id %u already exists.", 947 localid); 948 return (EEXIST); 949 } else if (apic_cpus[i].aci_processor_id == procid) { 950 lock_clear(&apic_ioapic_lock); 951 intr_restore(iflag); 952 cmn_err(CE_WARN, 953 "!apic: processor id %u already exists.", 954 (int)procid); 955 return (EEXIST); 956 } 957 958 /* 959 * There's no local APIC version number available in MADT table, 960 * so assume that all CPUs are homogeneous and use local APIC 961 * version number of the first existing CPU. 962 */ 963 if (first) { 964 first = B_FALSE; 965 localver = apic_cpus[i].aci_local_ver; 966 } 967 } 968 ASSERT(first == B_FALSE); 969 970 /* 971 * Try to assign the same cpuid if APIC id exists in the dirty cache. 972 */ 973 for (i = 0; i < apic_max_nproc; i++) { 974 if (CPU_IN_SET(apic_cpumask, i)) { 975 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0); 976 continue; 977 } 978 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE); 979 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) && 980 apic_cpus[i].aci_local_id == localid && 981 apic_cpus[i].aci_processor_id == procid) { 982 cpuid = i; 983 break; 984 } 985 } 986 987 /* Avoid the dirty cache and allocate fresh slot if possible. */ 988 if (cpuid == (processorid_t)-1) { 989 for (i = 0; i < apic_max_nproc; i++) { 990 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) && 991 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) { 992 cpuid = i; 993 break; 994 } 995 } 996 } 997 998 /* Try to find any free slot as last resort. */ 999 if (cpuid == (processorid_t)-1) { 1000 for (i = 0; i < apic_max_nproc; i++) { 1001 if (apic_cpus[i].aci_status & APIC_CPU_FREE) { 1002 cpuid = i; 1003 break; 1004 } 1005 } 1006 } 1007 1008 if (cpuid == (processorid_t)-1) { 1009 lock_clear(&apic_ioapic_lock); 1010 intr_restore(iflag); 1011 cmn_err(CE_NOTE, 1012 "!apic: failed to allocate cpu id for processor %u.", 1013 procid); 1014 rv = EAGAIN; 1015 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) { 1016 lock_clear(&apic_ioapic_lock); 1017 intr_restore(iflag); 1018 cmn_err(CE_NOTE, 1019 "!apic: failed to build mapping for processor %u.", 1020 procid); 1021 rv = EBUSY; 1022 } else { 1023 ASSERT(cpuid >= 0 && cpuid < NCPU); 1024 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus); 1025 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0])); 1026 apic_cpus[cpuid].aci_processor_id = procid; 1027 apic_cpus[cpuid].aci_local_id = localid; 1028 apic_cpus[cpuid].aci_local_ver = localver; 1029 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid); 1030 if (cpuid >= apic_nproc) { 1031 apic_nproc = cpuid + 1; 1032 } 1033 lock_clear(&apic_ioapic_lock); 1034 intr_restore(iflag); 1035 reqp->req.cpu_add.cpuid = cpuid; 1036 } 1037 1038 return (rv); 1039 } 1040 1041 int 1042 apic_cpu_remove(psm_cpu_request_t *reqp) 1043 { 1044 int i; 1045 ulong_t iflag; 1046 processorid_t cpuid; 1047 1048 /* Check whether CPU hotplug is supported. */ 1049 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 1050 return (ENOTSUP); 1051 } 1052 1053 cpuid = reqp->req.cpu_remove.cpuid; 1054 1055 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 1056 iflag = intr_clear(); 1057 lock_set(&apic_ioapic_lock); 1058 1059 if (!apic_cpu_in_range(cpuid)) { 1060 lock_clear(&apic_ioapic_lock); 1061 intr_restore(iflag); 1062 cmn_err(CE_WARN, 1063 "!apic: cpuid %d doesn't exist in apic_cpus array.", 1064 cpuid); 1065 return (ENODEV); 1066 } 1067 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0); 1068 1069 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) { 1070 lock_clear(&apic_ioapic_lock); 1071 intr_restore(iflag); 1072 return (ENOENT); 1073 } 1074 1075 if (cpuid == apic_nproc - 1) { 1076 /* 1077 * We are removing the highest numbered cpuid so we need to 1078 * find the next highest cpuid as the new value for apic_nproc. 1079 */ 1080 for (i = apic_nproc; i > 0; i--) { 1081 if (CPU_IN_SET(apic_cpumask, i - 1)) { 1082 apic_nproc = i; 1083 break; 1084 } 1085 } 1086 /* at least one CPU left */ 1087 ASSERT(i > 0); 1088 } 1089 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid); 1090 /* mark slot as free and keep it in the dirty cache */ 1091 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY; 1092 1093 lock_clear(&apic_ioapic_lock); 1094 intr_restore(iflag); 1095 1096 return (0); 1097 } 1098 1099 /* 1100 * Return the number of ticks the APIC decrements in SF nanoseconds. 1101 * The fixed-frequency PIT (aka 8254) is used for the measurement. 1102 */ 1103 static uint64_t 1104 apic_calibrate_impl() 1105 { 1106 uint8_t pit_tick_lo; 1107 uint16_t pit_tick, target_pit_tick, pit_ticks_adj; 1108 uint32_t pit_ticks; 1109 uint32_t start_apic_tick, end_apic_tick, apic_ticks; 1110 ulong_t iflag; 1111 1112 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init); 1113 apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL); 1114 1115 iflag = intr_clear(); 1116 1117 do { 1118 pit_tick_lo = inb(PITCTR0_PORT); 1119 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1120 } while (pit_tick < APIC_TIME_MIN || 1121 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 1122 1123 /* 1124 * Wait for the PIT to decrement by 5 ticks to ensure 1125 * we didn't start in the middle of a tick. 1126 * Compare with 0x10 for the wrap around case. 1127 */ 1128 target_pit_tick = pit_tick - 5; 1129 do { 1130 pit_tick_lo = inb(PITCTR0_PORT); 1131 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1132 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1133 1134 start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1135 1136 /* 1137 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks 1138 */ 1139 target_pit_tick = pit_tick - APIC_TIME_COUNT; 1140 do { 1141 pit_tick_lo = inb(PITCTR0_PORT); 1142 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1143 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1144 1145 end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1146 1147 intr_restore(iflag); 1148 1149 apic_ticks = start_apic_tick - end_apic_tick; 1150 1151 /* The PIT might have decremented by more ticks than planned */ 1152 pit_ticks_adj = target_pit_tick - pit_tick; 1153 /* total number of PIT ticks corresponding to apic_ticks */ 1154 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj; 1155 1156 /* 1157 * Determine the number of nanoseconds per APIC clock tick 1158 * and then determine how many APIC ticks to interrupt at the 1159 * desired frequency 1160 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s 1161 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s 1162 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9) 1163 * apic_ticks_per_SFns = 1164 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9) 1165 */ 1166 return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC)); 1167 } 1168 1169 /* 1170 * It was found empirically that 5 measurements seem sufficient to give a good 1171 * accuracy. Most spurious measurements are higher than the target value thus 1172 * we eliminate up to 2/5 spurious measurements. 1173 */ 1174 #define APIC_CALIBRATE_MEASUREMENTS 5 1175 1176 #define APIC_CALIBRATE_PERCENT_OFF_WARNING 10 1177 1178 /* 1179 * Return the number of ticks the APIC decrements in SF nanoseconds. 1180 * Several measurements are taken to filter out outliers. 1181 */ 1182 uint64_t 1183 apic_calibrate() 1184 { 1185 uint64_t measurements[APIC_CALIBRATE_MEASUREMENTS]; 1186 int median_idx; 1187 uint64_t median; 1188 1189 /* 1190 * When running under a virtual machine, the emulated PIT and APIC 1191 * counters do not always return the right values and can roll over. 1192 * Those spurious measurements are relatively rare but could 1193 * significantly affect the calibration. 1194 * Therefore we take several measurements and then keep the median. 1195 * The median is preferred to the average here as we only want to 1196 * discard outliers. 1197 */ 1198 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) 1199 measurements[i] = apic_calibrate_impl(); 1200 1201 /* 1202 * sort results and retrieve median. 1203 */ 1204 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) { 1205 for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) { 1206 if (measurements[j] < measurements[i]) { 1207 uint64_t tmp = measurements[i]; 1208 measurements[i] = measurements[j]; 1209 measurements[j] = tmp; 1210 } 1211 } 1212 } 1213 median_idx = APIC_CALIBRATE_MEASUREMENTS / 2; 1214 median = measurements[median_idx]; 1215 1216 #if (APIC_CALIBRATE_MEASUREMENTS >= 3) 1217 /* 1218 * Check that measurements are consistent. Post a warning 1219 * if the three middle values are not close to each other. 1220 */ 1221 uint64_t delta_warn = median * 1222 APIC_CALIBRATE_PERCENT_OFF_WARNING / 100; 1223 if ((median - measurements[median_idx - 1]) > delta_warn || 1224 (measurements[median_idx + 1] - median) > delta_warn) { 1225 cmn_err(CE_WARN, "apic_calibrate measurements lack " 1226 "precision: %llu, %llu, %llu.", 1227 (u_longlong_t)measurements[median_idx - 1], 1228 (u_longlong_t)median, 1229 (u_longlong_t)measurements[median_idx + 1]); 1230 } 1231 #endif 1232 1233 return (median); 1234 } 1235 1236 /* 1237 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 1238 * frequency. Note at this stage in the boot sequence, the boot processor 1239 * is the only active processor. 1240 * hertz value of 0 indicates a one-shot mode request. In this case 1241 * the function returns the resolution (in nanoseconds) for the hardware 1242 * timer interrupt. If one-shot mode capability is not available, 1243 * the return value will be 0. apic_enable_oneshot is a global switch 1244 * for disabling the functionality. 1245 * A non-zero positive value for hertz indicates a periodic mode request. 1246 * In this case the hardware will be programmed to generate clock interrupts 1247 * at hertz frequency and returns the resolution of interrupts in 1248 * nanosecond. 1249 */ 1250 1251 int 1252 apic_clkinit(int hertz) 1253 { 1254 int ret; 1255 1256 apic_int_busy_mark = (apic_int_busy_mark * 1257 apic_sample_factor_redistribution) / 100; 1258 apic_int_free_mark = (apic_int_free_mark * 1259 apic_sample_factor_redistribution) / 100; 1260 apic_diff_for_redistribution = (apic_diff_for_redistribution * 1261 apic_sample_factor_redistribution) / 100; 1262 1263 ret = apic_timer_init(hertz); 1264 return (ret); 1265 1266 } 1267 1268 /* 1269 * apic_preshutdown: 1270 * Called early in shutdown whilst we can still access filesystems to do 1271 * things like loading modules which will be required to complete shutdown 1272 * after filesystems are all unmounted. 1273 */ 1274 void 1275 apic_preshutdown(int cmd, int fcn) 1276 { 1277 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 1278 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 1279 } 1280 1281 void 1282 apic_shutdown(int cmd, int fcn) 1283 { 1284 int restarts, attempts; 1285 int i; 1286 uchar_t byte; 1287 ulong_t iflag; 1288 1289 hpet_acpi_fini(); 1290 1291 /* Send NMI to all CPUs except self to do per processor shutdown */ 1292 iflag = intr_clear(); 1293 #ifdef DEBUG 1294 APIC_AV_PENDING_SET(); 1295 #else 1296 if (apic_mode == LOCAL_APIC) 1297 APIC_AV_PENDING_SET(); 1298 #endif /* DEBUG */ 1299 apic_shutdown_processors = 1; 1300 apic_reg_ops->apic_write(APIC_INT_CMD1, 1301 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF); 1302 1303 /* restore cmos shutdown byte before reboot */ 1304 if (apic_cmos_ssb_set) { 1305 outb(CMOS_ADDR, SSB); 1306 outb(CMOS_DATA, 0); 1307 } 1308 1309 ioapic_disable_redirection(); 1310 1311 /* disable apic mode if imcr present */ 1312 if (apic_imcrp) { 1313 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1314 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 1315 } 1316 1317 apic_disable_local_apic(); 1318 1319 intr_restore(iflag); 1320 1321 /* remainder of function is for shutdown cases only */ 1322 if (cmd != A_SHUTDOWN) 1323 return; 1324 1325 /* 1326 * Switch system back into Legacy-Mode if using ACPI and 1327 * not powering-off. Some BIOSes need to remain in ACPI-mode 1328 * for power-off to succeed (Dell Dimension 4600) 1329 * Do not disable ACPI while doing fastreboot 1330 */ 1331 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT) 1332 (void) AcpiDisable(); 1333 1334 if (fcn == AD_FASTREBOOT) { 1335 apic_reg_ops->apic_write(APIC_INT_CMD1, 1336 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF); 1337 } 1338 1339 /* remainder of function is for shutdown+poweroff case only */ 1340 if (fcn != AD_POWEROFF) 1341 return; 1342 1343 switch (apic_poweroff_method) { 1344 case APIC_POWEROFF_VIA_RTC: 1345 1346 /* select the extended NVRAM bank in the RTC */ 1347 outb(CMOS_ADDR, RTC_REGA); 1348 byte = inb(CMOS_DATA); 1349 outb(CMOS_DATA, (byte | EXT_BANK)); 1350 1351 outb(CMOS_ADDR, PFR_REG); 1352 1353 /* for Predator must toggle the PAB bit */ 1354 byte = inb(CMOS_DATA); 1355 1356 /* 1357 * clear power active bar, wakeup alarm and 1358 * kickstart 1359 */ 1360 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 1361 outb(CMOS_DATA, byte); 1362 1363 /* delay before next write */ 1364 drv_usecwait(1000); 1365 1366 /* for S40 the following would suffice */ 1367 byte = inb(CMOS_DATA); 1368 1369 /* power active bar control bit */ 1370 byte |= PAB_CBIT; 1371 outb(CMOS_DATA, byte); 1372 1373 break; 1374 1375 case APIC_POWEROFF_VIA_ASPEN_BMC: 1376 restarts = 0; 1377 restart_aspen_bmc: 1378 if (++restarts == 3) 1379 break; 1380 attempts = 0; 1381 do { 1382 byte = inb(MISMIC_FLAG_REGISTER); 1383 byte &= MISMIC_BUSY_MASK; 1384 if (byte != 0) { 1385 drv_usecwait(1000); 1386 if (attempts >= 3) 1387 goto restart_aspen_bmc; 1388 ++attempts; 1389 } 1390 } while (byte != 0); 1391 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 1392 byte = inb(MISMIC_FLAG_REGISTER); 1393 byte |= 0x1; 1394 outb(MISMIC_FLAG_REGISTER, byte); 1395 i = 0; 1396 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 1397 i++) { 1398 attempts = 0; 1399 do { 1400 byte = inb(MISMIC_FLAG_REGISTER); 1401 byte &= MISMIC_BUSY_MASK; 1402 if (byte != 0) { 1403 drv_usecwait(1000); 1404 if (attempts >= 3) 1405 goto restart_aspen_bmc; 1406 ++attempts; 1407 } 1408 } while (byte != 0); 1409 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 1410 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 1411 byte = inb(MISMIC_FLAG_REGISTER); 1412 byte |= 0x1; 1413 outb(MISMIC_FLAG_REGISTER, byte); 1414 } 1415 break; 1416 1417 case APIC_POWEROFF_VIA_SITKA_BMC: 1418 restarts = 0; 1419 restart_sitka_bmc: 1420 if (++restarts == 3) 1421 break; 1422 attempts = 0; 1423 do { 1424 byte = inb(SMS_STATUS_REGISTER); 1425 byte &= SMS_STATE_MASK; 1426 if ((byte == SMS_READ_STATE) || 1427 (byte == SMS_WRITE_STATE)) { 1428 drv_usecwait(1000); 1429 if (attempts >= 3) 1430 goto restart_sitka_bmc; 1431 ++attempts; 1432 } 1433 } while ((byte == SMS_READ_STATE) || 1434 (byte == SMS_WRITE_STATE)); 1435 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 1436 i = 0; 1437 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 1438 i++) { 1439 attempts = 0; 1440 do { 1441 byte = inb(SMS_STATUS_REGISTER); 1442 byte &= SMS_IBF_MASK; 1443 if (byte != 0) { 1444 drv_usecwait(1000); 1445 if (attempts >= 3) 1446 goto restart_sitka_bmc; 1447 ++attempts; 1448 } 1449 } while (byte != 0); 1450 outb(sitka_bmc[i].port, sitka_bmc[i].data); 1451 } 1452 break; 1453 1454 case APIC_POWEROFF_NONE: 1455 1456 /* If no APIC direct method, we will try using ACPI */ 1457 if (apic_enable_acpi) { 1458 if (acpi_poweroff() == 1) 1459 return; 1460 } else 1461 return; 1462 1463 break; 1464 } 1465 /* 1466 * Wait a limited time here for power to go off. 1467 * If the power does not go off, then there was a 1468 * problem and we should continue to the halt which 1469 * prints a message for the user to press a key to 1470 * reboot. 1471 */ 1472 drv_usecwait(7000000); /* wait seven seconds */ 1473 1474 } 1475 1476 cyclic_id_t apic_cyclic_id; 1477 1478 /* 1479 * The following functions are in the platform specific file so that they 1480 * can be different functions depending on whether we are running on 1481 * bare metal or a hypervisor. 1482 */ 1483 1484 /* 1485 * map an apic for memory-mapped access 1486 */ 1487 uint32_t * 1488 mapin_apic(uint32_t addr, size_t len, int flags) 1489 { 1490 return ((void *)psm_map_phys(addr, len, flags)); 1491 } 1492 1493 uint32_t * 1494 mapin_ioapic(uint32_t addr, size_t len, int flags) 1495 { 1496 return (mapin_apic(addr, len, flags)); 1497 } 1498 1499 /* 1500 * unmap an apic 1501 */ 1502 void 1503 mapout_apic(caddr_t addr, size_t len) 1504 { 1505 psm_unmap_phys(addr, len); 1506 } 1507 1508 void 1509 mapout_ioapic(caddr_t addr, size_t len) 1510 { 1511 mapout_apic(addr, len); 1512 } 1513 1514 uint32_t 1515 ioapic_read(int ioapic_ix, uint32_t reg) 1516 { 1517 volatile uint32_t *ioapic; 1518 1519 ioapic = apicioadr[ioapic_ix]; 1520 ioapic[APIC_IO_REG] = reg; 1521 return (ioapic[APIC_IO_DATA]); 1522 } 1523 1524 void 1525 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value) 1526 { 1527 volatile uint32_t *ioapic; 1528 1529 ioapic = apicioadr[ioapic_ix]; 1530 ioapic[APIC_IO_REG] = reg; 1531 ioapic[APIC_IO_DATA] = value; 1532 } 1533 1534 void 1535 ioapic_write_eoi(int ioapic_ix, uint32_t value) 1536 { 1537 volatile uint32_t *ioapic; 1538 1539 ioapic = apicioadr[ioapic_ix]; 1540 ioapic[APIC_IO_EOI] = value; 1541 } 1542 1543 /* 1544 * Round-robin algorithm to find the next CPU with interrupts enabled. 1545 * It can't share the same static variable apic_next_bind_cpu with 1546 * apic_get_next_bind_cpu(), since that will cause all interrupts to be 1547 * bound to CPU1 at boot time. During boot, only CPU0 is online with 1548 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu() 1549 * are called. However, the pcplusmp driver assumes that there will be 1550 * boot_ncpus CPUs configured eventually so it tries to distribute all 1551 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all 1552 * interrupts being targetted at CPU1, we need to use a dedicated static 1553 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu. 1554 */ 1555 1556 processorid_t 1557 apic_find_cpu(int flag) 1558 { 1559 int i; 1560 static processorid_t acid = 0; 1561 1562 /* Find the first CPU with the passed-in flag set */ 1563 for (i = 0; i < apic_nproc; i++) { 1564 if (++acid >= apic_nproc) { 1565 acid = 0; 1566 } 1567 if (apic_cpu_in_range(acid) && 1568 (apic_cpus[acid].aci_status & flag)) { 1569 break; 1570 } 1571 } 1572 1573 ASSERT((apic_cpus[acid].aci_status & flag) != 0); 1574 return (acid); 1575 } 1576 1577 void 1578 apic_intrmap_init(int apic_mode) 1579 { 1580 int suppress_brdcst_eoi = 0; 1581 1582 /* 1583 * Intel Software Developer's Manual 3A, 10.12.7: 1584 * 1585 * Routing of device interrupts to local APIC units operating in 1586 * x2APIC mode requires use of the interrupt-remapping architecture 1587 * specified in the Intel Virtualization Technology for Directed 1588 * I/O, Revision 1.3. Because of this, BIOS must enumerate support 1589 * for and software must enable this interrupt remapping with 1590 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in 1591 * the local APIC units. 1592 * 1593 * 1594 * In other words, to use the APIC in x2APIC mode, we need interrupt 1595 * remapping. Since we don't start up the IOMMU by default, we 1596 * won't be able to do any interrupt remapping and therefore have to 1597 * use the APIC in traditional 'local APIC' mode with memory mapped 1598 * I/O. 1599 */ 1600 1601 if (psm_vt_ops != NULL) { 1602 if (((apic_intrmap_ops_t *)psm_vt_ops)-> 1603 apic_intrmap_init(apic_mode) == DDI_SUCCESS) { 1604 1605 apic_vt_ops = psm_vt_ops; 1606 1607 /* 1608 * We leverage the interrupt remapping engine to 1609 * suppress broadcast EOI; thus we must send the 1610 * directed EOI with the directed-EOI handler. 1611 */ 1612 if (apic_directed_EOI_supported() == 0) { 1613 suppress_brdcst_eoi = 1; 1614 } 1615 1616 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi); 1617 1618 if (apic_detect_x2apic()) { 1619 apic_enable_x2apic(); 1620 } 1621 1622 if (apic_directed_EOI_supported() == 0) { 1623 apic_set_directed_EOI_handler(); 1624 } 1625 } 1626 } 1627 } 1628 1629 /*ARGSUSED*/ 1630 static void 1631 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt) 1632 { 1633 irdt->ir_hi <<= APIC_ID_BIT_OFFSET; 1634 } 1635 1636 /*ARGSUSED*/ 1637 static void 1638 apic_record_msi(void *intrmap_private, msi_regs_t *mregs) 1639 { 1640 mregs->mr_addr = MSI_ADDR_HDR | 1641 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | 1642 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) | 1643 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT); 1644 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | 1645 mregs->mr_data; 1646 } 1647 1648 /* 1649 * Functions from apic_introp.c 1650 * 1651 * Those functions are used by apic_intr_ops(). 1652 */ 1653 1654 /* 1655 * MSI support flag: 1656 * reflects whether MSI is supported at APIC level 1657 * it can also be patched through /etc/system 1658 * 1659 * 0 = default value - don't know and need to call apic_check_msi_support() 1660 * to find out then set it accordingly 1661 * 1 = supported 1662 * -1 = not supported 1663 */ 1664 int apic_support_msi = 0; 1665 1666 /* Multiple vector support for MSI-X */ 1667 int apic_msix_enable = 1; 1668 1669 /* Multiple vector support for MSI */ 1670 int apic_multi_msi_enable = 1; 1671 1672 /* 1673 * Check whether the system supports MSI. 1674 * 1675 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find 1676 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we 1677 * return PSM_SUCCESS to indicate this system supports MSI. 1678 * 1679 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is 1680 * by detecting if we are running inside the KVM hypervisor, which guarantees 1681 * this version number.) 1682 */ 1683 int 1684 apic_check_msi_support() 1685 { 1686 dev_info_t *cdip; 1687 char dev_type[16]; 1688 int dev_len; 1689 int hwenv = get_hwenv(); 1690 1691 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n")); 1692 1693 /* 1694 * check whether the first level children of root_node have 1695 * PCI-E or PCI capability. 1696 */ 1697 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL; 1698 cdip = ddi_get_next_sibling(cdip)) { 1699 1700 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p," 1701 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip, 1702 ddi_driver_name(cdip), ddi_binding_name(cdip), 1703 ddi_node_name(cdip))); 1704 dev_len = sizeof (dev_type); 1705 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1706 "device_type", (caddr_t)dev_type, &dev_len) 1707 != DDI_PROP_SUCCESS) 1708 continue; 1709 if (strcmp(dev_type, "pciex") == 0) 1710 return (PSM_SUCCESS); 1711 if (strcmp(dev_type, "pci") == 0 && 1712 (hwenv == HW_KVM || hwenv == HW_BHYVE)) 1713 return (PSM_SUCCESS); 1714 } 1715 1716 /* MSI is not supported on this system */ 1717 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' " 1718 "device_type found\n")); 1719 return (PSM_FAILURE); 1720 } 1721 1722 /* 1723 * apic_pci_msi_unconfigure: 1724 * 1725 * This and next two interfaces are copied from pci_intr_lib.c 1726 * Do ensure that these two files stay in sync. 1727 * These needed to be copied over here to avoid a deadlock situation on 1728 * certain mp systems that use MSI interrupts. 1729 * 1730 * IMPORTANT regards next three interfaces: 1731 * i) are called only for MSI/X interrupts. 1732 * ii) called with interrupts disabled, and must not block 1733 */ 1734 void 1735 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum) 1736 { 1737 ushort_t msi_ctrl; 1738 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1739 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1740 1741 ASSERT((handle != NULL) && (cap_ptr != 0)); 1742 1743 if (type == DDI_INTR_TYPE_MSI) { 1744 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1745 msi_ctrl &= (~PCI_MSI_MME_MASK); 1746 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1747 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0); 1748 1749 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1750 pci_config_put16(handle, 1751 cap_ptr + PCI_MSI_64BIT_DATA, 0); 1752 pci_config_put32(handle, 1753 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0); 1754 } else { 1755 pci_config_put16(handle, 1756 cap_ptr + PCI_MSI_32BIT_DATA, 0); 1757 } 1758 1759 } else if (type == DDI_INTR_TYPE_MSIX) { 1760 uintptr_t off; 1761 uint32_t mask; 1762 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip); 1763 1764 ASSERT(msix_p != NULL); 1765 1766 /* Offset into "inum"th entry in the MSI-X table & mask it */ 1767 off = (uintptr_t)msix_p->msix_tbl_addr + (inum * 1768 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET; 1769 1770 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off); 1771 1772 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1)); 1773 1774 /* Offset into the "inum"th entry in the MSI-X table */ 1775 off = (uintptr_t)msix_p->msix_tbl_addr + 1776 (inum * PCI_MSIX_VECTOR_SIZE); 1777 1778 /* Reset the "data" and "addr" bits */ 1779 ddi_put32(msix_p->msix_tbl_hdl, 1780 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0); 1781 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0); 1782 } 1783 } 1784 1785 /* 1786 * apic_pci_msi_disable_mode: 1787 */ 1788 void 1789 apic_pci_msi_disable_mode(dev_info_t *rdip, int type) 1790 { 1791 ushort_t msi_ctrl; 1792 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1793 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1794 1795 ASSERT((handle != NULL) && (cap_ptr != 0)); 1796 1797 if (type == DDI_INTR_TYPE_MSI) { 1798 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1799 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT)) 1800 return; 1801 1802 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */ 1803 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1804 1805 } else if (type == DDI_INTR_TYPE_MSIX) { 1806 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL); 1807 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) { 1808 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT; 1809 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, 1810 msi_ctrl); 1811 } 1812 } 1813 } 1814 1815 uint32_t 1816 apic_get_localapicid(uint32_t cpuid) 1817 { 1818 ASSERT(cpuid < apic_nproc && apic_cpus != NULL); 1819 1820 return (apic_cpus[cpuid].aci_local_id); 1821 } 1822 1823 uchar_t 1824 apic_get_ioapicid(uchar_t ioapicindex) 1825 { 1826 ASSERT(ioapicindex < MAX_IO_APIC); 1827 1828 return (apic_io_id[ioapicindex]); 1829 } 1830