1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright 2019, Joyent, Inc. 27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 28 * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org> 29 */ 30 31 /* 32 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 33 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 34 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 35 * PSMI 1.5 extensions are supported in Solaris Nevada. 36 * PSMI 1.6 extensions are supported in Solaris Nevada. 37 * PSMI 1.7 extensions are supported in Solaris Nevada. 38 */ 39 #define PSMI_1_7 40 41 #include <sys/processor.h> 42 #include <sys/time.h> 43 #include <sys/psm.h> 44 #include <sys/smp_impldefs.h> 45 #include <sys/cram.h> 46 #include <sys/acpi/acpi.h> 47 #include <sys/acpica.h> 48 #include <sys/psm_common.h> 49 #include <sys/apic.h> 50 #include <sys/pit.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/ddi_impldefs.h> 54 #include <sys/pci.h> 55 #include <sys/promif.h> 56 #include <sys/x86_archext.h> 57 #include <sys/cpc_impl.h> 58 #include <sys/uadmin.h> 59 #include <sys/panic.h> 60 #include <sys/debug.h> 61 #include <sys/archsystm.h> 62 #include <sys/trap.h> 63 #include <sys/machsystm.h> 64 #include <sys/sysmacros.h> 65 #include <sys/cpuvar.h> 66 #include <sys/rm_platter.h> 67 #include <sys/privregs.h> 68 #include <sys/note.h> 69 #include <sys/pci_intr_lib.h> 70 #include <sys/spl.h> 71 #include <sys/clock.h> 72 #include <sys/dditypes.h> 73 #include <sys/sunddi.h> 74 #include <sys/x_call.h> 75 #include <sys/reboot.h> 76 #include <sys/hpet.h> 77 #include <sys/apic_common.h> 78 #include <sys/apic_timer.h> 79 80 static void apic_record_ioapic_rdt(void *intrmap_private, 81 ioapic_rdt_t *irdt); 82 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs); 83 84 /* 85 * Common routines between pcplusmp & apix (taken from apic.c). 86 */ 87 88 int apic_clkinit(int); 89 hrtime_t apic_gethrtime(void); 90 void apic_send_ipi(int, int); 91 void apic_set_idlecpu(processorid_t); 92 void apic_unset_idlecpu(processorid_t); 93 void apic_shutdown(int, int); 94 void apic_preshutdown(int, int); 95 processorid_t apic_get_next_processorid(processorid_t); 96 97 hrtime_t apic_gettime(); 98 99 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP; 100 101 /* Now the ones for Dynamic Interrupt distribution */ 102 int apic_enable_dynamic_migration = 0; 103 104 /* maximum loop count when sending Start IPIs. */ 105 int apic_sipi_max_loop_count = 0x1000; 106 107 /* 108 * These variables are frequently accessed in apic_intr_enter(), 109 * apic_intr_exit and apic_setspl, so group them together 110 */ 111 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 112 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 113 int apic_clkvect; 114 115 /* vector at which error interrupts come in */ 116 int apic_errvect; 117 int apic_enable_error_intr = 1; 118 int apic_error_display_delay = 100; 119 120 /* vector at which performance counter overflow interrupts come in */ 121 int apic_cpcovf_vect; 122 int apic_enable_cpcovf_intr = 1; 123 124 /* vector at which CMCI interrupts come in */ 125 int apic_cmci_vect; 126 extern void cmi_cmci_trap(void); 127 128 lock_t apic_mode_switch_lock; 129 130 int apic_pir_vect; 131 132 /* 133 * Patchable global variables. 134 */ 135 int apic_forceload = 0; 136 137 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 138 139 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 140 int apic_panic_on_nmi = 0; 141 int apic_panic_on_apic_error = 0; 142 143 int apic_verbose = 0; /* 0x1ff */ 144 145 #ifdef DEBUG 146 int apic_debug = 0; 147 int apic_restrict_vector = 0; 148 149 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 150 int apic_debug_msgbufindex = 0; 151 152 #endif /* DEBUG */ 153 154 uint_t apic_nticks = 0; 155 uint_t apic_skipped_redistribute = 0; 156 157 uint_t last_count_read = 0; 158 lock_t apic_gethrtime_lock; 159 volatile int apic_hrtime_stamp = 0; 160 volatile hrtime_t apic_nsec_since_boot = 0; 161 162 static hrtime_t apic_last_hrtime = 0; 163 int apic_hrtime_error = 0; 164 int apic_remote_hrterr = 0; 165 int apic_num_nmis = 0; 166 int apic_apic_error = 0; 167 int apic_num_apic_errors = 0; 168 int apic_num_cksum_errors = 0; 169 170 int apic_error = 0; 171 172 static int apic_cmos_ssb_set = 0; 173 174 /* use to make sure only one cpu handles the nmi */ 175 lock_t apic_nmi_lock; 176 /* use to make sure only one cpu handles the error interrupt */ 177 lock_t apic_error_lock; 178 179 static struct { 180 uchar_t cntl; 181 uchar_t data; 182 } aspen_bmc[] = { 183 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 184 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 185 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 186 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 187 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 188 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 189 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 190 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 191 192 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 193 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 194 }; 195 196 static struct { 197 int port; 198 uchar_t data; 199 } sitka_bmc[] = { 200 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 201 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 202 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 203 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 204 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 205 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 206 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 207 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 208 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 209 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 210 211 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 212 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 213 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 214 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 215 }; 216 217 /* Patchable global variables. */ 218 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 219 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ 220 221 /* default apic ops without interrupt remapping */ 222 static apic_intrmap_ops_t apic_nointrmap_ops = { 223 (int (*)(int))return_instr, 224 (void (*)(int))return_instr, 225 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr, 226 (void (*)(void *, void *, uint16_t, int))return_instr, 227 (void (*)(void **))return_instr, 228 apic_record_ioapic_rdt, 229 apic_record_msi, 230 }; 231 232 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops; 233 apic_cpus_info_t *apic_cpus = NULL; 234 cpuset_t apic_cpumask; 235 uint_t apic_picinit_called; 236 237 /* Flag to indicate that we need to shut down all processors */ 238 static uint_t apic_shutdown_processors; 239 240 /* 241 * Probe the ioapic method for apix module. Called in apic_probe_common() 242 */ 243 int 244 apic_ioapic_method_probe() 245 { 246 if (apix_enable == 0) 247 return (PSM_SUCCESS); 248 249 /* 250 * Set IOAPIC EOI handling method. The priority from low to high is: 251 * 1. IOxAPIC: with EOI register 252 * 2. IOMMU interrupt mapping 253 * 3. Mask-Before-EOI method for systems without boot 254 * interrupt routing, such as systems with only one IOAPIC; 255 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution 256 * which disables the boot interrupt routing already. 257 * 4. Directed EOI 258 */ 259 if (apic_io_ver[0] >= 0x20) 260 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC; 261 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max)) 262 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK; 263 if (apic_directed_EOI_supported()) 264 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI; 265 266 /* fall back to pcplusmp */ 267 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) { 268 /* make sure apix is after pcplusmp in /etc/mach */ 269 apix_enable = 0; /* go ahead with pcplusmp install next */ 270 return (PSM_FAILURE); 271 } 272 273 return (PSM_SUCCESS); 274 } 275 276 /* 277 * handler for APIC Error interrupt. Just print a warning and continue 278 */ 279 int 280 apic_error_intr() 281 { 282 uint_t error0, error1, error; 283 uint_t i; 284 285 /* 286 * We need to write before read as per 7.4.17 of system prog manual. 287 * We do both and or the results to be safe 288 */ 289 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 290 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 291 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 292 error = error0 | error1; 293 294 /* 295 * Clear the APIC error status (do this on all cpus that enter here) 296 * (two writes are required due to the semantics of accessing the 297 * error status register.) 298 */ 299 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 300 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 301 302 /* 303 * Prevent more than 1 CPU from handling error interrupt causing 304 * double printing (interleave of characters from multiple 305 * CPU's when using prom_printf) 306 */ 307 if (lock_try(&apic_error_lock) == 0) 308 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 309 if (error) { 310 #if DEBUG 311 if (apic_debug) 312 debug_enter("pcplusmp: APIC Error interrupt received"); 313 #endif /* DEBUG */ 314 if (apic_panic_on_apic_error) 315 cmn_err(CE_PANIC, 316 "APIC Error interrupt on CPU %d. Status = %x", 317 psm_get_cpu_id(), error); 318 else { 319 if ((error & ~APIC_CS_ERRORS) == 0) { 320 /* cksum error only */ 321 apic_error |= APIC_ERR_APIC_ERROR; 322 apic_apic_error |= error; 323 apic_num_apic_errors++; 324 apic_num_cksum_errors++; 325 } else { 326 /* 327 * prom_printf is the best shot we have of 328 * something which is problem free from 329 * high level/NMI type of interrupts 330 */ 331 prom_printf("APIC Error interrupt on CPU %d. " 332 "Status 0 = %x, Status 1 = %x\n", 333 psm_get_cpu_id(), error0, error1); 334 apic_error |= APIC_ERR_APIC_ERROR; 335 apic_apic_error |= error; 336 apic_num_apic_errors++; 337 for (i = 0; i < apic_error_display_delay; i++) { 338 tenmicrosec(); 339 } 340 /* 341 * provide more delay next time limited to 342 * roughly 1 clock tick time 343 */ 344 if (apic_error_display_delay < 500) 345 apic_error_display_delay *= 2; 346 } 347 } 348 lock_clear(&apic_error_lock); 349 return (DDI_INTR_CLAIMED); 350 } else { 351 lock_clear(&apic_error_lock); 352 return (DDI_INTR_UNCLAIMED); 353 } 354 } 355 356 /* 357 * Turn off the mask bit in the performance counter Local Vector Table entry. 358 */ 359 void 360 apic_cpcovf_mask_clear(void) 361 { 362 apic_reg_ops->apic_write(APIC_PCINT_VECT, 363 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK)); 364 } 365 366 static int 367 apic_cmci_enable(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused, 368 xc_arg_t arg3 __unused) 369 { 370 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect); 371 return (0); 372 } 373 374 static int 375 apic_cmci_disable(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused, 376 xc_arg_t arg3 __unused) 377 { 378 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK); 379 return (0); 380 } 381 382 void 383 apic_cmci_setup(processorid_t cpuid, boolean_t enable) 384 { 385 cpuset_t cpu_set; 386 387 CPUSET_ONLY(cpu_set, cpuid); 388 389 if (enable) { 390 xc_call(0, 0, 0, CPUSET2BV(cpu_set), 391 (xc_func_t)apic_cmci_enable); 392 } else { 393 xc_call(0, 0, 0, CPUSET2BV(cpu_set), 394 (xc_func_t)apic_cmci_disable); 395 } 396 } 397 398 static void 399 apic_disable_local_apic(void) 400 { 401 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL); 402 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK); 403 404 /* local intr reg 0 */ 405 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK); 406 407 /* disable NMI */ 408 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK); 409 410 /* and error interrupt */ 411 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK); 412 413 /* and perf counter intr */ 414 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK); 415 416 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR); 417 } 418 419 static void 420 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start) 421 { 422 int loop_count; 423 uint32_t vector; 424 uint_t apicid; 425 ulong_t iflag; 426 427 apicid = apic_cpus[cpun].aci_local_id; 428 429 /* 430 * Interrupts on current CPU will be disabled during the 431 * steps in order to avoid unwanted side effects from 432 * executing interrupt handlers on a problematic BIOS. 433 */ 434 iflag = intr_clear(); 435 436 if (start) { 437 outb(CMOS_ADDR, SSB); 438 outb(CMOS_DATA, BIOS_SHUTDOWN); 439 } 440 441 /* 442 * According to X2APIC specification in section '2.3.5.1' of 443 * Interrupt Command Register Semantics, the semantics of 444 * programming the Interrupt Command Register to dispatch an interrupt 445 * is simplified. A single MSR write to the 64-bit ICR is required 446 * for dispatching an interrupt. Specifically, with the 64-bit MSR 447 * interface to ICR, system software is not required to check the 448 * status of the delivery status bit prior to writing to the ICR 449 * to send an IPI. With the removal of the Delivery Status bit, 450 * system software no longer has a reason to read the ICR. It remains 451 * readable only to aid in debugging. 452 */ 453 #ifdef DEBUG 454 APIC_AV_PENDING_SET(); 455 #else 456 if (apic_mode == LOCAL_APIC) { 457 APIC_AV_PENDING_SET(); 458 } 459 #endif /* DEBUG */ 460 461 /* for integrated - make sure there is one INIT IPI in buffer */ 462 /* for external - it will wake up the cpu */ 463 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET); 464 465 /* If only 1 CPU is installed, PENDING bit will not go low */ 466 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) { 467 if (apic_mode == LOCAL_APIC && 468 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING) 469 apic_ret(); 470 else 471 break; 472 } 473 474 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET); 475 drv_usecwait(20000); /* 20 milli sec */ 476 477 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 478 /* integrated apic */ 479 480 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 481 (APIC_VECTOR_MASK | APIC_IPL_MASK); 482 483 /* to offset the INIT IPI queue up in the buffer */ 484 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 485 drv_usecwait(200); /* 20 micro sec */ 486 487 /* 488 * send the second SIPI (Startup IPI) as recommended by Intel 489 * software development manual. 490 */ 491 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 492 drv_usecwait(200); /* 20 micro sec */ 493 } 494 495 intr_restore(iflag); 496 } 497 498 /*ARGSUSED1*/ 499 int 500 apic_cpu_start(processorid_t cpun, caddr_t arg __unused) 501 { 502 ASSERT(MUTEX_HELD(&cpu_lock)); 503 504 if (!apic_cpu_in_range(cpun)) { 505 return (EINVAL); 506 } 507 508 /* 509 * Switch to apic_common_send_ipi for safety during starting other CPUs. 510 */ 511 if (apic_mode == LOCAL_X2APIC) { 512 apic_switch_ipi_callback(B_TRUE); 513 } 514 515 apic_cmos_ssb_set = 1; 516 apic_cpu_send_SIPI(cpun, B_TRUE); 517 518 return (0); 519 } 520 521 /* 522 * Put CPU into halted state with interrupts disabled. 523 */ 524 /*ARGSUSED1*/ 525 int 526 apic_cpu_stop(processorid_t cpun, caddr_t arg __unused) 527 { 528 int rc; 529 cpu_t *cp; 530 extern cpuset_t cpu_ready_set; 531 extern void cpu_idle_intercept_cpu(cpu_t *cp); 532 533 ASSERT(MUTEX_HELD(&cpu_lock)); 534 535 if (!apic_cpu_in_range(cpun)) { 536 return (EINVAL); 537 } 538 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) { 539 return (ENOTSUP); 540 } 541 542 cp = cpu_get(cpun); 543 ASSERT(cp != NULL); 544 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0); 545 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0); 546 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0); 547 548 /* Clear CPU_READY flag to disable cross calls. */ 549 cp->cpu_flags &= ~CPU_READY; 550 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun); 551 rc = xc_flush_cpu(cp); 552 if (rc != 0) { 553 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun); 554 cp->cpu_flags |= CPU_READY; 555 return (rc); 556 } 557 558 /* Intercept target CPU at a safe point before powering it off. */ 559 cpu_idle_intercept_cpu(cp); 560 561 apic_cpu_send_SIPI(cpun, B_FALSE); 562 cp->cpu_flags &= ~CPU_RUNNING; 563 564 return (0); 565 } 566 567 int 568 apic_cpu_ops(psm_cpu_request_t *reqp) 569 { 570 if (reqp == NULL) { 571 return (EINVAL); 572 } 573 574 switch (reqp->pcr_cmd) { 575 case PSM_CPU_ADD: 576 return (apic_cpu_add(reqp)); 577 578 case PSM_CPU_REMOVE: 579 return (apic_cpu_remove(reqp)); 580 581 case PSM_CPU_STOP: 582 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid, 583 reqp->req.cpu_stop.ctx)); 584 585 default: 586 return (ENOTSUP); 587 } 588 } 589 590 #ifdef DEBUG 591 int apic_break_on_cpu = 9; 592 int apic_stretch_interrupts = 0; 593 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 594 #endif /* DEBUG */ 595 596 /* 597 * generates an interprocessor interrupt to another CPU. Any changes made to 598 * this routine must be accompanied by similar changes to 599 * apic_common_send_ipi(). 600 */ 601 void 602 apic_send_ipi(int cpun, int ipl) 603 { 604 int vector; 605 ulong_t flag; 606 607 vector = apic_resv_vector[ipl]; 608 609 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 610 611 flag = intr_clear(); 612 613 APIC_AV_PENDING_SET(); 614 615 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 616 vector); 617 618 intr_restore(flag); 619 } 620 621 void 622 apic_send_pir_ipi(processorid_t cpun) 623 { 624 const int vector = apic_pir_vect; 625 ulong_t flag; 626 627 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 628 629 flag = intr_clear(); 630 631 /* Self-IPI for inducing PIR makes no sense. */ 632 if ((cpun != psm_get_cpu_id())) { 633 APIC_AV_PENDING_SET(); 634 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 635 vector); 636 } 637 638 intr_restore(flag); 639 } 640 641 int 642 apic_get_pir_ipivect(void) 643 { 644 return (apic_pir_vect); 645 } 646 647 void 648 apic_set_idlecpu(processorid_t cpun __unused) 649 { 650 } 651 652 void 653 apic_unset_idlecpu(processorid_t cpun __unused) 654 { 655 } 656 657 658 void 659 apic_ret() 660 { 661 } 662 663 /* 664 * If apic_coarse_time == 1, then apic_gettime() is used instead of 665 * apic_gethrtime(). This is used for performance instead of accuracy. 666 */ 667 668 hrtime_t 669 apic_gettime() 670 { 671 int old_hrtime_stamp; 672 hrtime_t temp; 673 674 /* 675 * In one-shot mode, we do not keep time, so if anyone 676 * calls psm_gettime() directly, we vector over to 677 * gethrtime(). 678 * one-shot mode MUST NOT be enabled if this psm is the source of 679 * hrtime. 680 */ 681 682 if (apic_oneshot) 683 return (gethrtime()); 684 685 686 gettime_again: 687 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 688 apic_ret(); 689 690 temp = apic_nsec_since_boot; 691 692 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 693 goto gettime_again; 694 } 695 return (temp); 696 } 697 698 /* 699 * Here we return the number of nanoseconds since booting. Note every 700 * clock interrupt increments apic_nsec_since_boot by the appropriate 701 * amount. 702 */ 703 hrtime_t 704 apic_gethrtime(void) 705 { 706 int curr_timeval, countval, elapsed_ticks; 707 int old_hrtime_stamp, status; 708 hrtime_t temp; 709 uint32_t cpun; 710 ulong_t oflags; 711 712 /* 713 * In one-shot mode, we do not keep time, so if anyone 714 * calls psm_gethrtime() directly, we vector over to 715 * gethrtime(). 716 * one-shot mode MUST NOT be enabled if this psm is the source of 717 * hrtime. 718 */ 719 720 if (apic_oneshot) 721 return (gethrtime()); 722 723 oflags = intr_clear(); /* prevent migration */ 724 725 cpun = apic_reg_ops->apic_read(APIC_LID_REG); 726 if (apic_mode == LOCAL_APIC) 727 cpun >>= APIC_ID_BIT_OFFSET; 728 729 lock_set(&apic_gethrtime_lock); 730 731 gethrtime_again: 732 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 733 apic_ret(); 734 735 /* 736 * Check to see which CPU we are on. Note the time is kept on 737 * the local APIC of CPU 0. If on CPU 0, simply read the current 738 * counter. If on another CPU, issue a remote read command to CPU 0. 739 */ 740 if (cpun == apic_cpus[0].aci_local_id) { 741 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT); 742 } else { 743 #ifdef DEBUG 744 APIC_AV_PENDING_SET(); 745 #else 746 if (apic_mode == LOCAL_APIC) 747 APIC_AV_PENDING_SET(); 748 #endif /* DEBUG */ 749 750 apic_reg_ops->apic_write_int_cmd( 751 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE); 752 753 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1)) 754 & AV_READ_PENDING) { 755 apic_ret(); 756 } 757 758 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 759 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ); 760 else { /* 0 = invalid */ 761 apic_remote_hrterr++; 762 /* 763 * return last hrtime right now, will need more 764 * testing if change to retry 765 */ 766 temp = apic_last_hrtime; 767 768 lock_clear(&apic_gethrtime_lock); 769 770 intr_restore(oflags); 771 772 return (temp); 773 } 774 } 775 if (countval > last_count_read) 776 countval = 0; 777 else 778 last_count_read = countval; 779 780 elapsed_ticks = apic_hertz_count - countval; 781 782 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks); 783 temp = apic_nsec_since_boot + curr_timeval; 784 785 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 786 /* we might have clobbered last_count_read. Restore it */ 787 last_count_read = apic_hertz_count; 788 goto gethrtime_again; 789 } 790 791 if (temp < apic_last_hrtime) { 792 /* return last hrtime if error occurs */ 793 apic_hrtime_error++; 794 temp = apic_last_hrtime; 795 } 796 else 797 apic_last_hrtime = temp; 798 799 lock_clear(&apic_gethrtime_lock); 800 intr_restore(oflags); 801 802 return (temp); 803 } 804 805 /* apic NMI handler */ 806 uint_t 807 apic_nmi_intr(caddr_t arg __unused, caddr_t arg1 __unused) 808 { 809 nmi_action_t action = nmi_action; 810 811 if (apic_shutdown_processors) { 812 apic_disable_local_apic(); 813 return (DDI_INTR_CLAIMED); 814 } 815 816 apic_error |= APIC_ERR_NMI; 817 818 if (!lock_try(&apic_nmi_lock)) 819 return (DDI_INTR_CLAIMED); 820 apic_num_nmis++; 821 822 /* 823 * "nmi_action" always over-rides the older way of doing this, unless we 824 * can't actually drop into kmdb when requested. 825 */ 826 if (action == NMI_ACTION_KMDB && !psm_debugger()) 827 action = NMI_ACTION_UNSET; 828 829 if (action == NMI_ACTION_UNSET) { 830 if (apic_kmdb_on_nmi && psm_debugger()) 831 action = NMI_ACTION_KMDB; 832 else if (apic_panic_on_nmi) 833 action = NMI_ACTION_PANIC; 834 else 835 action = NMI_ACTION_IGNORE; 836 } 837 838 switch (action) { 839 case NMI_ACTION_IGNORE: 840 /* 841 * prom_printf is the best shot we have of something which is 842 * problem free from high level/NMI type of interrupts 843 */ 844 prom_printf("NMI received\n"); 845 break; 846 847 case NMI_ACTION_PANIC: 848 /* Keep panic from entering kmdb. */ 849 nopanicdebug = 1; 850 panic("NMI received\n"); 851 break; 852 853 case NMI_ACTION_KMDB: 854 default: 855 debug_enter("NMI received: entering kmdb\n"); 856 break; 857 } 858 859 lock_clear(&apic_nmi_lock); 860 return (DDI_INTR_CLAIMED); 861 } 862 863 processorid_t 864 apic_get_next_processorid(processorid_t cpu_id) 865 { 866 867 int i; 868 869 if (cpu_id == -1) 870 return ((processorid_t)0); 871 872 for (i = cpu_id + 1; i < NCPU; i++) { 873 if (apic_cpu_in_range(i)) 874 return (i); 875 } 876 877 return ((processorid_t)-1); 878 } 879 880 int 881 apic_cpu_add(psm_cpu_request_t *reqp) 882 { 883 int i, rv = 0; 884 ulong_t iflag; 885 boolean_t first = B_TRUE; 886 uchar_t localver = 0; 887 uint32_t localid, procid; 888 processorid_t cpuid = (processorid_t)-1; 889 mach_cpu_add_arg_t *ap; 890 891 ASSERT(reqp != NULL); 892 reqp->req.cpu_add.cpuid = (processorid_t)-1; 893 894 /* Check whether CPU hotplug is supported. */ 895 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 896 return (ENOTSUP); 897 } 898 899 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp; 900 switch (ap->type) { 901 case MACH_CPU_ARG_LOCAL_APIC: 902 localid = ap->arg.apic.apic_id; 903 procid = ap->arg.apic.proc_id; 904 if (localid >= 255 || procid > 255) { 905 cmn_err(CE_WARN, 906 "!apic: apicid(%u) or procid(%u) is invalid.", 907 localid, procid); 908 return (EINVAL); 909 } 910 break; 911 912 case MACH_CPU_ARG_LOCAL_X2APIC: 913 localid = ap->arg.apic.apic_id; 914 procid = ap->arg.apic.proc_id; 915 if (localid >= UINT32_MAX) { 916 cmn_err(CE_WARN, 917 "!apic: x2apicid(%u) is invalid.", localid); 918 return (EINVAL); 919 } else if (localid >= 255 && apic_mode == LOCAL_APIC) { 920 cmn_err(CE_WARN, "!apic: system is in APIC mode, " 921 "can't support x2APIC processor."); 922 return (ENOTSUP); 923 } 924 break; 925 926 default: 927 cmn_err(CE_WARN, 928 "!apic: unknown argument type %d to apic_cpu_add().", 929 ap->type); 930 return (EINVAL); 931 } 932 933 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 934 iflag = intr_clear(); 935 lock_set(&apic_ioapic_lock); 936 937 /* Check whether local APIC id already exists. */ 938 for (i = 0; i < apic_nproc; i++) { 939 if (!CPU_IN_SET(apic_cpumask, i)) 940 continue; 941 if (apic_cpus[i].aci_local_id == localid) { 942 lock_clear(&apic_ioapic_lock); 943 intr_restore(iflag); 944 cmn_err(CE_WARN, 945 "!apic: local apic id %u already exists.", 946 localid); 947 return (EEXIST); 948 } else if (apic_cpus[i].aci_processor_id == procid) { 949 lock_clear(&apic_ioapic_lock); 950 intr_restore(iflag); 951 cmn_err(CE_WARN, 952 "!apic: processor id %u already exists.", 953 (int)procid); 954 return (EEXIST); 955 } 956 957 /* 958 * There's no local APIC version number available in MADT table, 959 * so assume that all CPUs are homogeneous and use local APIC 960 * version number of the first existing CPU. 961 */ 962 if (first) { 963 first = B_FALSE; 964 localver = apic_cpus[i].aci_local_ver; 965 } 966 } 967 ASSERT(first == B_FALSE); 968 969 /* 970 * Try to assign the same cpuid if APIC id exists in the dirty cache. 971 */ 972 for (i = 0; i < apic_max_nproc; i++) { 973 if (CPU_IN_SET(apic_cpumask, i)) { 974 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0); 975 continue; 976 } 977 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE); 978 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) && 979 apic_cpus[i].aci_local_id == localid && 980 apic_cpus[i].aci_processor_id == procid) { 981 cpuid = i; 982 break; 983 } 984 } 985 986 /* Avoid the dirty cache and allocate fresh slot if possible. */ 987 if (cpuid == (processorid_t)-1) { 988 for (i = 0; i < apic_max_nproc; i++) { 989 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) && 990 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) { 991 cpuid = i; 992 break; 993 } 994 } 995 } 996 997 /* Try to find any free slot as last resort. */ 998 if (cpuid == (processorid_t)-1) { 999 for (i = 0; i < apic_max_nproc; i++) { 1000 if (apic_cpus[i].aci_status & APIC_CPU_FREE) { 1001 cpuid = i; 1002 break; 1003 } 1004 } 1005 } 1006 1007 if (cpuid == (processorid_t)-1) { 1008 lock_clear(&apic_ioapic_lock); 1009 intr_restore(iflag); 1010 cmn_err(CE_NOTE, 1011 "!apic: failed to allocate cpu id for processor %u.", 1012 procid); 1013 rv = EAGAIN; 1014 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) { 1015 lock_clear(&apic_ioapic_lock); 1016 intr_restore(iflag); 1017 cmn_err(CE_NOTE, 1018 "!apic: failed to build mapping for processor %u.", 1019 procid); 1020 rv = EBUSY; 1021 } else { 1022 ASSERT(cpuid >= 0 && cpuid < NCPU); 1023 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus); 1024 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0])); 1025 apic_cpus[cpuid].aci_processor_id = procid; 1026 apic_cpus[cpuid].aci_local_id = localid; 1027 apic_cpus[cpuid].aci_local_ver = localver; 1028 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid); 1029 if (cpuid >= apic_nproc) { 1030 apic_nproc = cpuid + 1; 1031 } 1032 lock_clear(&apic_ioapic_lock); 1033 intr_restore(iflag); 1034 reqp->req.cpu_add.cpuid = cpuid; 1035 } 1036 1037 return (rv); 1038 } 1039 1040 int 1041 apic_cpu_remove(psm_cpu_request_t *reqp) 1042 { 1043 int i; 1044 ulong_t iflag; 1045 processorid_t cpuid; 1046 1047 /* Check whether CPU hotplug is supported. */ 1048 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 1049 return (ENOTSUP); 1050 } 1051 1052 cpuid = reqp->req.cpu_remove.cpuid; 1053 1054 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 1055 iflag = intr_clear(); 1056 lock_set(&apic_ioapic_lock); 1057 1058 if (!apic_cpu_in_range(cpuid)) { 1059 lock_clear(&apic_ioapic_lock); 1060 intr_restore(iflag); 1061 cmn_err(CE_WARN, 1062 "!apic: cpuid %d doesn't exist in apic_cpus array.", 1063 cpuid); 1064 return (ENODEV); 1065 } 1066 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0); 1067 1068 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) { 1069 lock_clear(&apic_ioapic_lock); 1070 intr_restore(iflag); 1071 return (ENOENT); 1072 } 1073 1074 if (cpuid == apic_nproc - 1) { 1075 /* 1076 * We are removing the highest numbered cpuid so we need to 1077 * find the next highest cpuid as the new value for apic_nproc. 1078 */ 1079 for (i = apic_nproc; i > 0; i--) { 1080 if (CPU_IN_SET(apic_cpumask, i - 1)) { 1081 apic_nproc = i; 1082 break; 1083 } 1084 } 1085 /* at least one CPU left */ 1086 ASSERT(i > 0); 1087 } 1088 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid); 1089 /* mark slot as free and keep it in the dirty cache */ 1090 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY; 1091 1092 lock_clear(&apic_ioapic_lock); 1093 intr_restore(iflag); 1094 1095 return (0); 1096 } 1097 1098 /* 1099 * Return the number of ticks the APIC decrements in SF nanoseconds. 1100 * The fixed-frequency PIT (aka 8254) is used for the measurement. 1101 */ 1102 static uint64_t 1103 apic_calibrate_impl() 1104 { 1105 uint8_t pit_tick_lo; 1106 uint16_t pit_tick, target_pit_tick, pit_ticks_adj; 1107 uint32_t pit_ticks; 1108 uint32_t start_apic_tick, end_apic_tick, apic_ticks; 1109 ulong_t iflag; 1110 1111 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init); 1112 apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL); 1113 1114 iflag = intr_clear(); 1115 1116 /* 1117 * Put the PIT in mode 0, "Interrupt On Terminal Count": 1118 */ 1119 outb(PITCTL_PORT, PIT_C0 | PIT_LOADMODE | PIT_ENDSIGMODE); 1120 1121 /* 1122 * The PIT counts down and then the counter value wraps around. Load 1123 * the maximum counter value: 1124 */ 1125 outb(PITCTR0_PORT, 0xFF); 1126 outb(PITCTR0_PORT, 0xFF); 1127 1128 do { 1129 pit_tick_lo = inb(PITCTR0_PORT); 1130 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1131 } while (pit_tick < APIC_TIME_MIN || 1132 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 1133 1134 /* 1135 * Wait for the PIT to decrement by 5 ticks to ensure 1136 * we didn't start in the middle of a tick. 1137 * Compare with 0x10 for the wrap around case. 1138 */ 1139 target_pit_tick = pit_tick - 5; 1140 do { 1141 pit_tick_lo = inb(PITCTR0_PORT); 1142 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1143 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1144 1145 start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1146 1147 /* 1148 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks 1149 */ 1150 target_pit_tick = pit_tick - APIC_TIME_COUNT; 1151 do { 1152 pit_tick_lo = inb(PITCTR0_PORT); 1153 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1154 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1155 1156 end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1157 1158 intr_restore(iflag); 1159 1160 apic_ticks = start_apic_tick - end_apic_tick; 1161 1162 /* The PIT might have decremented by more ticks than planned */ 1163 pit_ticks_adj = target_pit_tick - pit_tick; 1164 /* total number of PIT ticks corresponding to apic_ticks */ 1165 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj; 1166 1167 /* 1168 * Determine the number of nanoseconds per APIC clock tick 1169 * and then determine how many APIC ticks to interrupt at the 1170 * desired frequency 1171 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s 1172 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s 1173 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9) 1174 * apic_ticks_per_SFns = 1175 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9) 1176 */ 1177 return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC)); 1178 } 1179 1180 /* 1181 * It was found empirically that 5 measurements seem sufficient to give a good 1182 * accuracy. Most spurious measurements are higher than the target value thus 1183 * we eliminate up to 2/5 spurious measurements. 1184 */ 1185 #define APIC_CALIBRATE_MEASUREMENTS 5 1186 1187 #define APIC_CALIBRATE_PERCENT_OFF_WARNING 10 1188 1189 /* 1190 * Return the number of ticks the APIC decrements in SF nanoseconds. 1191 * Several measurements are taken to filter out outliers. 1192 */ 1193 uint64_t 1194 apic_calibrate() 1195 { 1196 uint64_t measurements[APIC_CALIBRATE_MEASUREMENTS]; 1197 int median_idx; 1198 uint64_t median; 1199 1200 /* 1201 * When running under a virtual machine, the emulated PIT and APIC 1202 * counters do not always return the right values and can roll over. 1203 * Those spurious measurements are relatively rare but could 1204 * significantly affect the calibration. 1205 * Therefore we take several measurements and then keep the median. 1206 * The median is preferred to the average here as we only want to 1207 * discard outliers. 1208 */ 1209 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) 1210 measurements[i] = apic_calibrate_impl(); 1211 1212 /* 1213 * sort results and retrieve median. 1214 */ 1215 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) { 1216 for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) { 1217 if (measurements[j] < measurements[i]) { 1218 uint64_t tmp = measurements[i]; 1219 measurements[i] = measurements[j]; 1220 measurements[j] = tmp; 1221 } 1222 } 1223 } 1224 median_idx = APIC_CALIBRATE_MEASUREMENTS / 2; 1225 median = measurements[median_idx]; 1226 1227 #if (APIC_CALIBRATE_MEASUREMENTS >= 3) 1228 /* 1229 * Check that measurements are consistent. Post a warning 1230 * if the three middle values are not close to each other. 1231 */ 1232 uint64_t delta_warn = median * 1233 APIC_CALIBRATE_PERCENT_OFF_WARNING / 100; 1234 if ((median - measurements[median_idx - 1]) > delta_warn || 1235 (measurements[median_idx + 1] - median) > delta_warn) { 1236 cmn_err(CE_WARN, "apic_calibrate measurements lack " 1237 "precision: %llu, %llu, %llu.", 1238 (u_longlong_t)measurements[median_idx - 1], 1239 (u_longlong_t)median, 1240 (u_longlong_t)measurements[median_idx + 1]); 1241 } 1242 #endif 1243 1244 return (median); 1245 } 1246 1247 /* 1248 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 1249 * frequency. Note at this stage in the boot sequence, the boot processor 1250 * is the only active processor. 1251 * hertz value of 0 indicates a one-shot mode request. In this case 1252 * the function returns the resolution (in nanoseconds) for the hardware 1253 * timer interrupt. If one-shot mode capability is not available, 1254 * the return value will be 0. apic_enable_oneshot is a global switch 1255 * for disabling the functionality. 1256 * A non-zero positive value for hertz indicates a periodic mode request. 1257 * In this case the hardware will be programmed to generate clock interrupts 1258 * at hertz frequency and returns the resolution of interrupts in 1259 * nanosecond. 1260 */ 1261 1262 int 1263 apic_clkinit(int hertz) 1264 { 1265 int ret; 1266 1267 apic_int_busy_mark = (apic_int_busy_mark * 1268 apic_sample_factor_redistribution) / 100; 1269 apic_int_free_mark = (apic_int_free_mark * 1270 apic_sample_factor_redistribution) / 100; 1271 apic_diff_for_redistribution = (apic_diff_for_redistribution * 1272 apic_sample_factor_redistribution) / 100; 1273 1274 ret = apic_timer_init(hertz); 1275 return (ret); 1276 1277 } 1278 1279 /* 1280 * apic_preshutdown: 1281 * Called early in shutdown whilst we can still access filesystems to do 1282 * things like loading modules which will be required to complete shutdown 1283 * after filesystems are all unmounted. 1284 */ 1285 void 1286 apic_preshutdown(int cmd __unused, int fcn __unused) 1287 { 1288 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 1289 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 1290 } 1291 1292 void 1293 apic_shutdown(int cmd, int fcn) 1294 { 1295 int restarts, attempts; 1296 int i; 1297 uchar_t byte; 1298 ulong_t iflag; 1299 1300 hpet_acpi_fini(); 1301 1302 /* Send NMI to all CPUs except self to do per processor shutdown */ 1303 iflag = intr_clear(); 1304 #ifdef DEBUG 1305 APIC_AV_PENDING_SET(); 1306 #else 1307 if (apic_mode == LOCAL_APIC) 1308 APIC_AV_PENDING_SET(); 1309 #endif /* DEBUG */ 1310 apic_shutdown_processors = 1; 1311 apic_reg_ops->apic_write(APIC_INT_CMD1, 1312 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF); 1313 1314 /* restore cmos shutdown byte before reboot */ 1315 if (apic_cmos_ssb_set) { 1316 outb(CMOS_ADDR, SSB); 1317 outb(CMOS_DATA, 0); 1318 } 1319 1320 ioapic_disable_redirection(); 1321 1322 /* disable apic mode if imcr present */ 1323 if (apic_imcrp) { 1324 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1325 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 1326 } 1327 1328 apic_disable_local_apic(); 1329 1330 intr_restore(iflag); 1331 1332 /* remainder of function is for shutdown cases only */ 1333 if (cmd != A_SHUTDOWN) 1334 return; 1335 1336 /* 1337 * Switch system back into Legacy-Mode if using ACPI and 1338 * not powering-off. Some BIOSes need to remain in ACPI-mode 1339 * for power-off to succeed (Dell Dimension 4600) 1340 * Do not disable ACPI while doing fastreboot 1341 */ 1342 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT) 1343 (void) AcpiDisable(); 1344 1345 if (fcn == AD_FASTREBOOT) { 1346 apic_reg_ops->apic_write(APIC_INT_CMD1, 1347 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF); 1348 } 1349 1350 /* remainder of function is for shutdown+poweroff case only */ 1351 if (fcn != AD_POWEROFF) 1352 return; 1353 1354 switch (apic_poweroff_method) { 1355 case APIC_POWEROFF_VIA_RTC: 1356 1357 /* select the extended NVRAM bank in the RTC */ 1358 outb(CMOS_ADDR, RTC_REGA); 1359 byte = inb(CMOS_DATA); 1360 outb(CMOS_DATA, (byte | EXT_BANK)); 1361 1362 outb(CMOS_ADDR, PFR_REG); 1363 1364 /* for Predator must toggle the PAB bit */ 1365 byte = inb(CMOS_DATA); 1366 1367 /* 1368 * clear power active bar, wakeup alarm and 1369 * kickstart 1370 */ 1371 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 1372 outb(CMOS_DATA, byte); 1373 1374 /* delay before next write */ 1375 drv_usecwait(1000); 1376 1377 /* for S40 the following would suffice */ 1378 byte = inb(CMOS_DATA); 1379 1380 /* power active bar control bit */ 1381 byte |= PAB_CBIT; 1382 outb(CMOS_DATA, byte); 1383 1384 break; 1385 1386 case APIC_POWEROFF_VIA_ASPEN_BMC: 1387 restarts = 0; 1388 restart_aspen_bmc: 1389 if (++restarts == 3) 1390 break; 1391 attempts = 0; 1392 do { 1393 byte = inb(MISMIC_FLAG_REGISTER); 1394 byte &= MISMIC_BUSY_MASK; 1395 if (byte != 0) { 1396 drv_usecwait(1000); 1397 if (attempts >= 3) 1398 goto restart_aspen_bmc; 1399 ++attempts; 1400 } 1401 } while (byte != 0); 1402 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 1403 byte = inb(MISMIC_FLAG_REGISTER); 1404 byte |= 0x1; 1405 outb(MISMIC_FLAG_REGISTER, byte); 1406 i = 0; 1407 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 1408 i++) { 1409 attempts = 0; 1410 do { 1411 byte = inb(MISMIC_FLAG_REGISTER); 1412 byte &= MISMIC_BUSY_MASK; 1413 if (byte != 0) { 1414 drv_usecwait(1000); 1415 if (attempts >= 3) 1416 goto restart_aspen_bmc; 1417 ++attempts; 1418 } 1419 } while (byte != 0); 1420 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 1421 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 1422 byte = inb(MISMIC_FLAG_REGISTER); 1423 byte |= 0x1; 1424 outb(MISMIC_FLAG_REGISTER, byte); 1425 } 1426 break; 1427 1428 case APIC_POWEROFF_VIA_SITKA_BMC: 1429 restarts = 0; 1430 restart_sitka_bmc: 1431 if (++restarts == 3) 1432 break; 1433 attempts = 0; 1434 do { 1435 byte = inb(SMS_STATUS_REGISTER); 1436 byte &= SMS_STATE_MASK; 1437 if ((byte == SMS_READ_STATE) || 1438 (byte == SMS_WRITE_STATE)) { 1439 drv_usecwait(1000); 1440 if (attempts >= 3) 1441 goto restart_sitka_bmc; 1442 ++attempts; 1443 } 1444 } while ((byte == SMS_READ_STATE) || 1445 (byte == SMS_WRITE_STATE)); 1446 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 1447 i = 0; 1448 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 1449 i++) { 1450 attempts = 0; 1451 do { 1452 byte = inb(SMS_STATUS_REGISTER); 1453 byte &= SMS_IBF_MASK; 1454 if (byte != 0) { 1455 drv_usecwait(1000); 1456 if (attempts >= 3) 1457 goto restart_sitka_bmc; 1458 ++attempts; 1459 } 1460 } while (byte != 0); 1461 outb(sitka_bmc[i].port, sitka_bmc[i].data); 1462 } 1463 break; 1464 1465 case APIC_POWEROFF_NONE: 1466 1467 /* If no APIC direct method, we will try using ACPI */ 1468 if (apic_enable_acpi) { 1469 if (acpi_poweroff() == 1) 1470 return; 1471 } else 1472 return; 1473 1474 break; 1475 } 1476 /* 1477 * Wait a limited time here for power to go off. 1478 * If the power does not go off, then there was a 1479 * problem and we should continue to the halt which 1480 * prints a message for the user to press a key to 1481 * reboot. 1482 */ 1483 drv_usecwait(7000000); /* wait seven seconds */ 1484 1485 } 1486 1487 cyclic_id_t apic_cyclic_id; 1488 1489 /* 1490 * The following functions are in the platform specific file so that they 1491 * can be different functions depending on whether we are running on 1492 * bare metal or a hypervisor. 1493 */ 1494 1495 /* 1496 * map an apic for memory-mapped access 1497 */ 1498 uint32_t * 1499 mapin_apic(uint32_t addr, size_t len, int flags) 1500 { 1501 return ((void *)psm_map_phys(addr, len, flags)); 1502 } 1503 1504 uint32_t * 1505 mapin_ioapic(uint32_t addr, size_t len, int flags) 1506 { 1507 return (mapin_apic(addr, len, flags)); 1508 } 1509 1510 /* 1511 * unmap an apic 1512 */ 1513 void 1514 mapout_apic(caddr_t addr, size_t len) 1515 { 1516 psm_unmap_phys(addr, len); 1517 } 1518 1519 void 1520 mapout_ioapic(caddr_t addr, size_t len) 1521 { 1522 mapout_apic(addr, len); 1523 } 1524 1525 uint32_t 1526 ioapic_read(int ioapic_ix, uint32_t reg) 1527 { 1528 volatile uint32_t *ioapic; 1529 1530 ioapic = apicioadr[ioapic_ix]; 1531 ioapic[APIC_IO_REG] = reg; 1532 return (ioapic[APIC_IO_DATA]); 1533 } 1534 1535 void 1536 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value) 1537 { 1538 volatile uint32_t *ioapic; 1539 1540 ioapic = apicioadr[ioapic_ix]; 1541 ioapic[APIC_IO_REG] = reg; 1542 ioapic[APIC_IO_DATA] = value; 1543 } 1544 1545 void 1546 ioapic_write_eoi(int ioapic_ix, uint32_t value) 1547 { 1548 volatile uint32_t *ioapic; 1549 1550 ioapic = apicioadr[ioapic_ix]; 1551 ioapic[APIC_IO_EOI] = value; 1552 } 1553 1554 /* 1555 * Round-robin algorithm to find the next CPU with interrupts enabled. 1556 * It can't share the same static variable apic_next_bind_cpu with 1557 * apic_get_next_bind_cpu(), since that will cause all interrupts to be 1558 * bound to CPU1 at boot time. During boot, only CPU0 is online with 1559 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu() 1560 * are called. However, the pcplusmp driver assumes that there will be 1561 * boot_ncpus CPUs configured eventually so it tries to distribute all 1562 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all 1563 * interrupts being targetted at CPU1, we need to use a dedicated static 1564 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu. 1565 */ 1566 1567 processorid_t 1568 apic_find_cpu(int flag) 1569 { 1570 int i; 1571 static processorid_t acid = 0; 1572 1573 /* Find the first CPU with the passed-in flag set */ 1574 for (i = 0; i < apic_nproc; i++) { 1575 if (++acid >= apic_nproc) { 1576 acid = 0; 1577 } 1578 if (apic_cpu_in_range(acid) && 1579 (apic_cpus[acid].aci_status & flag)) { 1580 break; 1581 } 1582 } 1583 1584 ASSERT((apic_cpus[acid].aci_status & flag) != 0); 1585 return (acid); 1586 } 1587 1588 void 1589 apic_intrmap_init(int apic_mode) 1590 { 1591 int suppress_brdcst_eoi = 0; 1592 1593 /* 1594 * Intel Software Developer's Manual 3A, 10.12.7: 1595 * 1596 * Routing of device interrupts to local APIC units operating in 1597 * x2APIC mode requires use of the interrupt-remapping architecture 1598 * specified in the Intel Virtualization Technology for Directed 1599 * I/O, Revision 1.3. Because of this, BIOS must enumerate support 1600 * for and software must enable this interrupt remapping with 1601 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in 1602 * the local APIC units. 1603 * 1604 * 1605 * In other words, to use the APIC in x2APIC mode, we need interrupt 1606 * remapping. Since we don't start up the IOMMU by default, we 1607 * won't be able to do any interrupt remapping and therefore have to 1608 * use the APIC in traditional 'local APIC' mode with memory mapped 1609 * I/O. 1610 */ 1611 1612 if (psm_vt_ops != NULL) { 1613 if (((apic_intrmap_ops_t *)psm_vt_ops)-> 1614 apic_intrmap_init(apic_mode) == DDI_SUCCESS) { 1615 1616 apic_vt_ops = psm_vt_ops; 1617 1618 /* 1619 * We leverage the interrupt remapping engine to 1620 * suppress broadcast EOI; thus we must send the 1621 * directed EOI with the directed-EOI handler. 1622 */ 1623 if (apic_directed_EOI_supported() == 0) { 1624 suppress_brdcst_eoi = 1; 1625 } 1626 1627 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi); 1628 1629 if (apic_detect_x2apic()) { 1630 apic_enable_x2apic(); 1631 } 1632 1633 if (apic_directed_EOI_supported() == 0) { 1634 apic_set_directed_EOI_handler(); 1635 } 1636 } 1637 } 1638 } 1639 1640 static void 1641 apic_record_ioapic_rdt(void *intrmap_private __unused, ioapic_rdt_t *irdt) 1642 { 1643 irdt->ir_hi <<= APIC_ID_BIT_OFFSET; 1644 } 1645 1646 static void 1647 apic_record_msi(void *intrmap_private __unused, msi_regs_t *mregs) 1648 { 1649 mregs->mr_addr = MSI_ADDR_HDR | 1650 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | 1651 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) | 1652 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT); 1653 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | 1654 mregs->mr_data; 1655 } 1656 1657 /* 1658 * Functions from apic_introp.c 1659 * 1660 * Those functions are used by apic_intr_ops(). 1661 */ 1662 1663 /* 1664 * MSI support flag: 1665 * reflects whether MSI is supported at APIC level 1666 * it can also be patched through /etc/system 1667 * 1668 * 0 = default value - don't know and need to call apic_check_msi_support() 1669 * to find out then set it accordingly 1670 * 1 = supported 1671 * -1 = not supported 1672 */ 1673 int apic_support_msi = 0; 1674 1675 /* Multiple vector support for MSI-X */ 1676 int apic_msix_enable = 1; 1677 1678 /* Multiple vector support for MSI */ 1679 int apic_multi_msi_enable = 1; 1680 1681 /* 1682 * Check whether the system supports MSI. 1683 * 1684 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find 1685 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we 1686 * return PSM_SUCCESS to indicate this system supports MSI. 1687 * 1688 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is 1689 * by detecting if we are running inside the KVM hypervisor, which guarantees 1690 * this version number.) 1691 */ 1692 int 1693 apic_check_msi_support() 1694 { 1695 dev_info_t *cdip; 1696 char dev_type[16]; 1697 int dev_len; 1698 int hwenv = get_hwenv(); 1699 1700 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n")); 1701 1702 /* 1703 * check whether the first level children of root_node have 1704 * PCI-E or PCI capability. 1705 */ 1706 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL; 1707 cdip = ddi_get_next_sibling(cdip)) { 1708 1709 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p," 1710 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip, 1711 ddi_driver_name(cdip), ddi_binding_name(cdip), 1712 ddi_node_name(cdip))); 1713 dev_len = sizeof (dev_type); 1714 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1715 "device_type", (caddr_t)dev_type, &dev_len) 1716 != DDI_PROP_SUCCESS) 1717 continue; 1718 if (strcmp(dev_type, "pciex") == 0) 1719 return (PSM_SUCCESS); 1720 if (strcmp(dev_type, "pci") == 0 && 1721 (hwenv == HW_KVM || hwenv == HW_BHYVE)) 1722 return (PSM_SUCCESS); 1723 } 1724 1725 /* MSI is not supported on this system */ 1726 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' " 1727 "device_type found\n")); 1728 return (PSM_FAILURE); 1729 } 1730 1731 /* 1732 * apic_pci_msi_unconfigure: 1733 * 1734 * This and next two interfaces are copied from pci_intr_lib.c 1735 * Do ensure that these two files stay in sync. 1736 * These needed to be copied over here to avoid a deadlock situation on 1737 * certain mp systems that use MSI interrupts. 1738 * 1739 * IMPORTANT regards next three interfaces: 1740 * i) are called only for MSI/X interrupts. 1741 * ii) called with interrupts disabled, and must not block 1742 */ 1743 void 1744 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum) 1745 { 1746 ushort_t msi_ctrl; 1747 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1748 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1749 1750 ASSERT((handle != NULL) && (cap_ptr != 0)); 1751 1752 if (type == DDI_INTR_TYPE_MSI) { 1753 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1754 msi_ctrl &= (~PCI_MSI_MME_MASK); 1755 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1756 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0); 1757 1758 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1759 pci_config_put16(handle, 1760 cap_ptr + PCI_MSI_64BIT_DATA, 0); 1761 pci_config_put32(handle, 1762 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0); 1763 } else { 1764 pci_config_put16(handle, 1765 cap_ptr + PCI_MSI_32BIT_DATA, 0); 1766 } 1767 1768 } else if (type == DDI_INTR_TYPE_MSIX) { 1769 uintptr_t off; 1770 uint32_t mask; 1771 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip); 1772 1773 ASSERT(msix_p != NULL); 1774 1775 /* Offset into "inum"th entry in the MSI-X table & mask it */ 1776 off = (uintptr_t)msix_p->msix_tbl_addr + (inum * 1777 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET; 1778 1779 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off); 1780 1781 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1)); 1782 1783 /* Offset into the "inum"th entry in the MSI-X table */ 1784 off = (uintptr_t)msix_p->msix_tbl_addr + 1785 (inum * PCI_MSIX_VECTOR_SIZE); 1786 1787 /* Reset the "data" and "addr" bits */ 1788 ddi_put32(msix_p->msix_tbl_hdl, 1789 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0); 1790 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0); 1791 } 1792 } 1793 1794 /* 1795 * apic_pci_msi_disable_mode: 1796 */ 1797 void 1798 apic_pci_msi_disable_mode(dev_info_t *rdip, int type) 1799 { 1800 ushort_t msi_ctrl; 1801 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1802 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1803 1804 ASSERT((handle != NULL) && (cap_ptr != 0)); 1805 1806 if (type == DDI_INTR_TYPE_MSI) { 1807 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1808 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT)) 1809 return; 1810 1811 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */ 1812 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1813 1814 } else if (type == DDI_INTR_TYPE_MSIX) { 1815 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL); 1816 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) { 1817 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT; 1818 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, 1819 msi_ctrl); 1820 } 1821 } 1822 } 1823 1824 uint32_t 1825 apic_get_localapicid(uint32_t cpuid) 1826 { 1827 ASSERT(cpuid < apic_nproc && apic_cpus != NULL); 1828 1829 return (apic_cpus[cpuid].aci_local_id); 1830 } 1831 1832 uchar_t 1833 apic_get_ioapicid(uchar_t ioapicindex) 1834 { 1835 ASSERT(ioapicindex < MAX_IO_APIC); 1836 1837 return (apic_io_id[ioapicindex]); 1838 } 1839