1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright 2018 Joyent, Inc. 27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 28 */ 29 30 /* 31 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 32 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 34 * PSMI 1.5 extensions are supported in Solaris Nevada. 35 * PSMI 1.6 extensions are supported in Solaris Nevada. 36 * PSMI 1.7 extensions are supported in Solaris Nevada. 37 */ 38 #define PSMI_1_7 39 40 #include <sys/processor.h> 41 #include <sys/time.h> 42 #include <sys/psm.h> 43 #include <sys/smp_impldefs.h> 44 #include <sys/cram.h> 45 #include <sys/acpi/acpi.h> 46 #include <sys/acpica.h> 47 #include <sys/psm_common.h> 48 #include <sys/apic.h> 49 #include <sys/pit.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/ddi_impldefs.h> 53 #include <sys/pci.h> 54 #include <sys/promif.h> 55 #include <sys/x86_archext.h> 56 #include <sys/cpc_impl.h> 57 #include <sys/uadmin.h> 58 #include <sys/panic.h> 59 #include <sys/debug.h> 60 #include <sys/archsystm.h> 61 #include <sys/trap.h> 62 #include <sys/machsystm.h> 63 #include <sys/sysmacros.h> 64 #include <sys/cpuvar.h> 65 #include <sys/rm_platter.h> 66 #include <sys/privregs.h> 67 #include <sys/note.h> 68 #include <sys/pci_intr_lib.h> 69 #include <sys/spl.h> 70 #include <sys/clock.h> 71 #include <sys/dditypes.h> 72 #include <sys/sunddi.h> 73 #include <sys/x_call.h> 74 #include <sys/reboot.h> 75 #include <sys/hpet.h> 76 #include <sys/apic_common.h> 77 #include <sys/apic_timer.h> 78 79 static void apic_record_ioapic_rdt(void *intrmap_private, 80 ioapic_rdt_t *irdt); 81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs); 82 83 /* 84 * Common routines between pcplusmp & apix (taken from apic.c). 85 */ 86 87 int apic_clkinit(int); 88 hrtime_t apic_gethrtime(void); 89 void apic_send_ipi(int, int); 90 void apic_set_idlecpu(processorid_t); 91 void apic_unset_idlecpu(processorid_t); 92 void apic_shutdown(int, int); 93 void apic_preshutdown(int, int); 94 processorid_t apic_get_next_processorid(processorid_t); 95 96 hrtime_t apic_gettime(); 97 98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP; 99 100 /* Now the ones for Dynamic Interrupt distribution */ 101 int apic_enable_dynamic_migration = 0; 102 103 /* maximum loop count when sending Start IPIs. */ 104 int apic_sipi_max_loop_count = 0x1000; 105 106 /* 107 * These variables are frequently accessed in apic_intr_enter(), 108 * apic_intr_exit and apic_setspl, so group them together 109 */ 110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 112 int apic_clkvect; 113 114 /* vector at which error interrupts come in */ 115 int apic_errvect; 116 int apic_enable_error_intr = 1; 117 int apic_error_display_delay = 100; 118 119 /* vector at which performance counter overflow interrupts come in */ 120 int apic_cpcovf_vect; 121 int apic_enable_cpcovf_intr = 1; 122 123 /* vector at which CMCI interrupts come in */ 124 int apic_cmci_vect; 125 extern int cmi_enable_cmci; 126 extern void cmi_cmci_trap(void); 127 128 kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */ 129 int cmci_cpu_setup_registered; 130 131 lock_t apic_mode_switch_lock; 132 133 int apic_pir_vect; 134 135 /* 136 * Patchable global variables. 137 */ 138 int apic_forceload = 0; 139 140 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 141 142 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 143 int apic_panic_on_nmi = 0; 144 int apic_panic_on_apic_error = 0; 145 146 int apic_verbose = 0; /* 0x1ff */ 147 148 #ifdef DEBUG 149 int apic_debug = 0; 150 int apic_restrict_vector = 0; 151 152 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 153 int apic_debug_msgbufindex = 0; 154 155 #endif /* DEBUG */ 156 157 uint_t apic_nticks = 0; 158 uint_t apic_skipped_redistribute = 0; 159 160 uint_t last_count_read = 0; 161 lock_t apic_gethrtime_lock; 162 volatile int apic_hrtime_stamp = 0; 163 volatile hrtime_t apic_nsec_since_boot = 0; 164 165 static hrtime_t apic_last_hrtime = 0; 166 int apic_hrtime_error = 0; 167 int apic_remote_hrterr = 0; 168 int apic_num_nmis = 0; 169 int apic_apic_error = 0; 170 int apic_num_apic_errors = 0; 171 int apic_num_cksum_errors = 0; 172 173 int apic_error = 0; 174 175 static int apic_cmos_ssb_set = 0; 176 177 /* use to make sure only one cpu handles the nmi */ 178 lock_t apic_nmi_lock; 179 /* use to make sure only one cpu handles the error interrupt */ 180 lock_t apic_error_lock; 181 182 static struct { 183 uchar_t cntl; 184 uchar_t data; 185 } aspen_bmc[] = { 186 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 187 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 188 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 189 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 190 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 191 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 192 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 193 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 194 195 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 196 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 197 }; 198 199 static struct { 200 int port; 201 uchar_t data; 202 } sitka_bmc[] = { 203 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 204 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 205 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 206 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 207 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 208 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 209 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 210 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 211 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 212 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 213 214 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 215 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 216 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 217 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 218 }; 219 220 /* Patchable global variables. */ 221 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 222 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ 223 224 /* default apic ops without interrupt remapping */ 225 static apic_intrmap_ops_t apic_nointrmap_ops = { 226 (int (*)(int))return_instr, 227 (void (*)(int))return_instr, 228 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr, 229 (void (*)(void *, void *, uint16_t, int))return_instr, 230 (void (*)(void **))return_instr, 231 apic_record_ioapic_rdt, 232 apic_record_msi, 233 }; 234 235 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops; 236 apic_cpus_info_t *apic_cpus = NULL; 237 cpuset_t apic_cpumask; 238 uint_t apic_picinit_called; 239 240 /* Flag to indicate that we need to shut down all processors */ 241 static uint_t apic_shutdown_processors; 242 243 /* 244 * Probe the ioapic method for apix module. Called in apic_probe_common() 245 */ 246 int 247 apic_ioapic_method_probe() 248 { 249 if (apix_enable == 0) 250 return (PSM_SUCCESS); 251 252 /* 253 * Set IOAPIC EOI handling method. The priority from low to high is: 254 * 1. IOxAPIC: with EOI register 255 * 2. IOMMU interrupt mapping 256 * 3. Mask-Before-EOI method for systems without boot 257 * interrupt routing, such as systems with only one IOAPIC; 258 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution 259 * which disables the boot interrupt routing already. 260 * 4. Directed EOI 261 */ 262 if (apic_io_ver[0] >= 0x20) 263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC; 264 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max)) 265 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK; 266 if (apic_directed_EOI_supported()) 267 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI; 268 269 /* fall back to pcplusmp */ 270 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) { 271 /* make sure apix is after pcplusmp in /etc/mach */ 272 apix_enable = 0; /* go ahead with pcplusmp install next */ 273 return (PSM_FAILURE); 274 } 275 276 return (PSM_SUCCESS); 277 } 278 279 /* 280 * handler for APIC Error interrupt. Just print a warning and continue 281 */ 282 int 283 apic_error_intr() 284 { 285 uint_t error0, error1, error; 286 uint_t i; 287 288 /* 289 * We need to write before read as per 7.4.17 of system prog manual. 290 * We do both and or the results to be safe 291 */ 292 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 293 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 294 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 295 error = error0 | error1; 296 297 /* 298 * Clear the APIC error status (do this on all cpus that enter here) 299 * (two writes are required due to the semantics of accessing the 300 * error status register.) 301 */ 302 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 303 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 304 305 /* 306 * Prevent more than 1 CPU from handling error interrupt causing 307 * double printing (interleave of characters from multiple 308 * CPU's when using prom_printf) 309 */ 310 if (lock_try(&apic_error_lock) == 0) 311 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 312 if (error) { 313 #if DEBUG 314 if (apic_debug) 315 debug_enter("pcplusmp: APIC Error interrupt received"); 316 #endif /* DEBUG */ 317 if (apic_panic_on_apic_error) 318 cmn_err(CE_PANIC, 319 "APIC Error interrupt on CPU %d. Status = %x", 320 psm_get_cpu_id(), error); 321 else { 322 if ((error & ~APIC_CS_ERRORS) == 0) { 323 /* cksum error only */ 324 apic_error |= APIC_ERR_APIC_ERROR; 325 apic_apic_error |= error; 326 apic_num_apic_errors++; 327 apic_num_cksum_errors++; 328 } else { 329 /* 330 * prom_printf is the best shot we have of 331 * something which is problem free from 332 * high level/NMI type of interrupts 333 */ 334 prom_printf("APIC Error interrupt on CPU %d. " 335 "Status 0 = %x, Status 1 = %x\n", 336 psm_get_cpu_id(), error0, error1); 337 apic_error |= APIC_ERR_APIC_ERROR; 338 apic_apic_error |= error; 339 apic_num_apic_errors++; 340 for (i = 0; i < apic_error_display_delay; i++) { 341 tenmicrosec(); 342 } 343 /* 344 * provide more delay next time limited to 345 * roughly 1 clock tick time 346 */ 347 if (apic_error_display_delay < 500) 348 apic_error_display_delay *= 2; 349 } 350 } 351 lock_clear(&apic_error_lock); 352 return (DDI_INTR_CLAIMED); 353 } else { 354 lock_clear(&apic_error_lock); 355 return (DDI_INTR_UNCLAIMED); 356 } 357 } 358 359 /* 360 * Turn off the mask bit in the performance counter Local Vector Table entry. 361 */ 362 void 363 apic_cpcovf_mask_clear(void) 364 { 365 apic_reg_ops->apic_write(APIC_PCINT_VECT, 366 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK)); 367 } 368 369 /*ARGSUSED*/ 370 static int 371 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) 372 { 373 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect); 374 return (0); 375 } 376 377 /*ARGSUSED*/ 378 static int 379 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) 380 { 381 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK); 382 return (0); 383 } 384 385 /*ARGSUSED*/ 386 int 387 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg) 388 { 389 cpuset_t cpu_set; 390 391 CPUSET_ONLY(cpu_set, cpuid); 392 393 switch (what) { 394 case CPU_ON: 395 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set), 396 (xc_func_t)apic_cmci_enable); 397 break; 398 399 case CPU_OFF: 400 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set), 401 (xc_func_t)apic_cmci_disable); 402 break; 403 404 default: 405 break; 406 } 407 408 return (0); 409 } 410 411 static void 412 apic_disable_local_apic(void) 413 { 414 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL); 415 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK); 416 417 /* local intr reg 0 */ 418 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK); 419 420 /* disable NMI */ 421 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK); 422 423 /* and error interrupt */ 424 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK); 425 426 /* and perf counter intr */ 427 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK); 428 429 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR); 430 } 431 432 static void 433 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start) 434 { 435 int loop_count; 436 uint32_t vector; 437 uint_t apicid; 438 ulong_t iflag; 439 440 apicid = apic_cpus[cpun].aci_local_id; 441 442 /* 443 * Interrupts on current CPU will be disabled during the 444 * steps in order to avoid unwanted side effects from 445 * executing interrupt handlers on a problematic BIOS. 446 */ 447 iflag = intr_clear(); 448 449 if (start) { 450 outb(CMOS_ADDR, SSB); 451 outb(CMOS_DATA, BIOS_SHUTDOWN); 452 } 453 454 /* 455 * According to X2APIC specification in section '2.3.5.1' of 456 * Interrupt Command Register Semantics, the semantics of 457 * programming the Interrupt Command Register to dispatch an interrupt 458 * is simplified. A single MSR write to the 64-bit ICR is required 459 * for dispatching an interrupt. Specifically, with the 64-bit MSR 460 * interface to ICR, system software is not required to check the 461 * status of the delivery status bit prior to writing to the ICR 462 * to send an IPI. With the removal of the Delivery Status bit, 463 * system software no longer has a reason to read the ICR. It remains 464 * readable only to aid in debugging. 465 */ 466 #ifdef DEBUG 467 APIC_AV_PENDING_SET(); 468 #else 469 if (apic_mode == LOCAL_APIC) { 470 APIC_AV_PENDING_SET(); 471 } 472 #endif /* DEBUG */ 473 474 /* for integrated - make sure there is one INIT IPI in buffer */ 475 /* for external - it will wake up the cpu */ 476 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET); 477 478 /* If only 1 CPU is installed, PENDING bit will not go low */ 479 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) { 480 if (apic_mode == LOCAL_APIC && 481 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING) 482 apic_ret(); 483 else 484 break; 485 } 486 487 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET); 488 drv_usecwait(20000); /* 20 milli sec */ 489 490 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 491 /* integrated apic */ 492 493 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 494 (APIC_VECTOR_MASK | APIC_IPL_MASK); 495 496 /* to offset the INIT IPI queue up in the buffer */ 497 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 498 drv_usecwait(200); /* 20 micro sec */ 499 500 /* 501 * send the second SIPI (Startup IPI) as recommended by Intel 502 * software development manual. 503 */ 504 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 505 drv_usecwait(200); /* 20 micro sec */ 506 } 507 508 intr_restore(iflag); 509 } 510 511 /*ARGSUSED1*/ 512 int 513 apic_cpu_start(processorid_t cpun, caddr_t arg) 514 { 515 ASSERT(MUTEX_HELD(&cpu_lock)); 516 517 if (!apic_cpu_in_range(cpun)) { 518 return (EINVAL); 519 } 520 521 /* 522 * Switch to apic_common_send_ipi for safety during starting other CPUs. 523 */ 524 if (apic_mode == LOCAL_X2APIC) { 525 apic_switch_ipi_callback(B_TRUE); 526 } 527 528 apic_cmos_ssb_set = 1; 529 apic_cpu_send_SIPI(cpun, B_TRUE); 530 531 return (0); 532 } 533 534 /* 535 * Put CPU into halted state with interrupts disabled. 536 */ 537 /*ARGSUSED1*/ 538 int 539 apic_cpu_stop(processorid_t cpun, caddr_t arg) 540 { 541 int rc; 542 cpu_t *cp; 543 extern cpuset_t cpu_ready_set; 544 extern void cpu_idle_intercept_cpu(cpu_t *cp); 545 546 ASSERT(MUTEX_HELD(&cpu_lock)); 547 548 if (!apic_cpu_in_range(cpun)) { 549 return (EINVAL); 550 } 551 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) { 552 return (ENOTSUP); 553 } 554 555 cp = cpu_get(cpun); 556 ASSERT(cp != NULL); 557 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0); 558 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0); 559 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0); 560 561 /* Clear CPU_READY flag to disable cross calls. */ 562 cp->cpu_flags &= ~CPU_READY; 563 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun); 564 rc = xc_flush_cpu(cp); 565 if (rc != 0) { 566 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun); 567 cp->cpu_flags |= CPU_READY; 568 return (rc); 569 } 570 571 /* Intercept target CPU at a safe point before powering it off. */ 572 cpu_idle_intercept_cpu(cp); 573 574 apic_cpu_send_SIPI(cpun, B_FALSE); 575 cp->cpu_flags &= ~CPU_RUNNING; 576 577 return (0); 578 } 579 580 int 581 apic_cpu_ops(psm_cpu_request_t *reqp) 582 { 583 if (reqp == NULL) { 584 return (EINVAL); 585 } 586 587 switch (reqp->pcr_cmd) { 588 case PSM_CPU_ADD: 589 return (apic_cpu_add(reqp)); 590 591 case PSM_CPU_REMOVE: 592 return (apic_cpu_remove(reqp)); 593 594 case PSM_CPU_STOP: 595 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid, 596 reqp->req.cpu_stop.ctx)); 597 598 default: 599 return (ENOTSUP); 600 } 601 } 602 603 #ifdef DEBUG 604 int apic_break_on_cpu = 9; 605 int apic_stretch_interrupts = 0; 606 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 607 #endif /* DEBUG */ 608 609 /* 610 * generates an interprocessor interrupt to another CPU. Any changes made to 611 * this routine must be accompanied by similar changes to 612 * apic_common_send_ipi(). 613 */ 614 void 615 apic_send_ipi(int cpun, int ipl) 616 { 617 int vector; 618 ulong_t flag; 619 620 vector = apic_resv_vector[ipl]; 621 622 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 623 624 flag = intr_clear(); 625 626 APIC_AV_PENDING_SET(); 627 628 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 629 vector); 630 631 intr_restore(flag); 632 } 633 634 void 635 apic_send_pir_ipi(processorid_t cpun) 636 { 637 const int vector = apic_pir_vect; 638 ulong_t flag; 639 640 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 641 642 flag = intr_clear(); 643 644 /* Self-IPI for inducing PIR makes no sense. */ 645 if ((cpun != psm_get_cpu_id())) { 646 APIC_AV_PENDING_SET(); 647 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 648 vector); 649 } 650 651 intr_restore(flag); 652 } 653 654 int 655 apic_get_pir_ipivect(void) 656 { 657 return (apic_pir_vect); 658 } 659 660 /*ARGSUSED*/ 661 void 662 apic_set_idlecpu(processorid_t cpun) 663 { 664 } 665 666 /*ARGSUSED*/ 667 void 668 apic_unset_idlecpu(processorid_t cpun) 669 { 670 } 671 672 673 void 674 apic_ret() 675 { 676 } 677 678 /* 679 * If apic_coarse_time == 1, then apic_gettime() is used instead of 680 * apic_gethrtime(). This is used for performance instead of accuracy. 681 */ 682 683 hrtime_t 684 apic_gettime() 685 { 686 int old_hrtime_stamp; 687 hrtime_t temp; 688 689 /* 690 * In one-shot mode, we do not keep time, so if anyone 691 * calls psm_gettime() directly, we vector over to 692 * gethrtime(). 693 * one-shot mode MUST NOT be enabled if this psm is the source of 694 * hrtime. 695 */ 696 697 if (apic_oneshot) 698 return (gethrtime()); 699 700 701 gettime_again: 702 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 703 apic_ret(); 704 705 temp = apic_nsec_since_boot; 706 707 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 708 goto gettime_again; 709 } 710 return (temp); 711 } 712 713 /* 714 * Here we return the number of nanoseconds since booting. Note every 715 * clock interrupt increments apic_nsec_since_boot by the appropriate 716 * amount. 717 */ 718 hrtime_t 719 apic_gethrtime(void) 720 { 721 int curr_timeval, countval, elapsed_ticks; 722 int old_hrtime_stamp, status; 723 hrtime_t temp; 724 uint32_t cpun; 725 ulong_t oflags; 726 727 /* 728 * In one-shot mode, we do not keep time, so if anyone 729 * calls psm_gethrtime() directly, we vector over to 730 * gethrtime(). 731 * one-shot mode MUST NOT be enabled if this psm is the source of 732 * hrtime. 733 */ 734 735 if (apic_oneshot) 736 return (gethrtime()); 737 738 oflags = intr_clear(); /* prevent migration */ 739 740 cpun = apic_reg_ops->apic_read(APIC_LID_REG); 741 if (apic_mode == LOCAL_APIC) 742 cpun >>= APIC_ID_BIT_OFFSET; 743 744 lock_set(&apic_gethrtime_lock); 745 746 gethrtime_again: 747 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 748 apic_ret(); 749 750 /* 751 * Check to see which CPU we are on. Note the time is kept on 752 * the local APIC of CPU 0. If on CPU 0, simply read the current 753 * counter. If on another CPU, issue a remote read command to CPU 0. 754 */ 755 if (cpun == apic_cpus[0].aci_local_id) { 756 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT); 757 } else { 758 #ifdef DEBUG 759 APIC_AV_PENDING_SET(); 760 #else 761 if (apic_mode == LOCAL_APIC) 762 APIC_AV_PENDING_SET(); 763 #endif /* DEBUG */ 764 765 apic_reg_ops->apic_write_int_cmd( 766 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE); 767 768 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1)) 769 & AV_READ_PENDING) { 770 apic_ret(); 771 } 772 773 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 774 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ); 775 else { /* 0 = invalid */ 776 apic_remote_hrterr++; 777 /* 778 * return last hrtime right now, will need more 779 * testing if change to retry 780 */ 781 temp = apic_last_hrtime; 782 783 lock_clear(&apic_gethrtime_lock); 784 785 intr_restore(oflags); 786 787 return (temp); 788 } 789 } 790 if (countval > last_count_read) 791 countval = 0; 792 else 793 last_count_read = countval; 794 795 elapsed_ticks = apic_hertz_count - countval; 796 797 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks); 798 temp = apic_nsec_since_boot + curr_timeval; 799 800 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 801 /* we might have clobbered last_count_read. Restore it */ 802 last_count_read = apic_hertz_count; 803 goto gethrtime_again; 804 } 805 806 if (temp < apic_last_hrtime) { 807 /* return last hrtime if error occurs */ 808 apic_hrtime_error++; 809 temp = apic_last_hrtime; 810 } 811 else 812 apic_last_hrtime = temp; 813 814 lock_clear(&apic_gethrtime_lock); 815 intr_restore(oflags); 816 817 return (temp); 818 } 819 820 /* apic NMI handler */ 821 /*ARGSUSED*/ 822 void 823 apic_nmi_intr(caddr_t arg, struct regs *rp) 824 { 825 if (apic_shutdown_processors) { 826 apic_disable_local_apic(); 827 return; 828 } 829 830 apic_error |= APIC_ERR_NMI; 831 832 if (!lock_try(&apic_nmi_lock)) 833 return; 834 apic_num_nmis++; 835 836 if (apic_kmdb_on_nmi && psm_debugger()) { 837 debug_enter("NMI received: entering kmdb\n"); 838 } else if (apic_panic_on_nmi) { 839 /* Keep panic from entering kmdb. */ 840 nopanicdebug = 1; 841 panic("NMI received\n"); 842 } else { 843 /* 844 * prom_printf is the best shot we have of something which is 845 * problem free from high level/NMI type of interrupts 846 */ 847 prom_printf("NMI received\n"); 848 } 849 850 lock_clear(&apic_nmi_lock); 851 } 852 853 processorid_t 854 apic_get_next_processorid(processorid_t cpu_id) 855 { 856 857 int i; 858 859 if (cpu_id == -1) 860 return ((processorid_t)0); 861 862 for (i = cpu_id + 1; i < NCPU; i++) { 863 if (apic_cpu_in_range(i)) 864 return (i); 865 } 866 867 return ((processorid_t)-1); 868 } 869 870 int 871 apic_cpu_add(psm_cpu_request_t *reqp) 872 { 873 int i, rv = 0; 874 ulong_t iflag; 875 boolean_t first = B_TRUE; 876 uchar_t localver = 0; 877 uint32_t localid, procid; 878 processorid_t cpuid = (processorid_t)-1; 879 mach_cpu_add_arg_t *ap; 880 881 ASSERT(reqp != NULL); 882 reqp->req.cpu_add.cpuid = (processorid_t)-1; 883 884 /* Check whether CPU hotplug is supported. */ 885 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 886 return (ENOTSUP); 887 } 888 889 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp; 890 switch (ap->type) { 891 case MACH_CPU_ARG_LOCAL_APIC: 892 localid = ap->arg.apic.apic_id; 893 procid = ap->arg.apic.proc_id; 894 if (localid >= 255 || procid > 255) { 895 cmn_err(CE_WARN, 896 "!apic: apicid(%u) or procid(%u) is invalid.", 897 localid, procid); 898 return (EINVAL); 899 } 900 break; 901 902 case MACH_CPU_ARG_LOCAL_X2APIC: 903 localid = ap->arg.apic.apic_id; 904 procid = ap->arg.apic.proc_id; 905 if (localid >= UINT32_MAX) { 906 cmn_err(CE_WARN, 907 "!apic: x2apicid(%u) is invalid.", localid); 908 return (EINVAL); 909 } else if (localid >= 255 && apic_mode == LOCAL_APIC) { 910 cmn_err(CE_WARN, "!apic: system is in APIC mode, " 911 "can't support x2APIC processor."); 912 return (ENOTSUP); 913 } 914 break; 915 916 default: 917 cmn_err(CE_WARN, 918 "!apic: unknown argument type %d to apic_cpu_add().", 919 ap->type); 920 return (EINVAL); 921 } 922 923 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 924 iflag = intr_clear(); 925 lock_set(&apic_ioapic_lock); 926 927 /* Check whether local APIC id already exists. */ 928 for (i = 0; i < apic_nproc; i++) { 929 if (!CPU_IN_SET(apic_cpumask, i)) 930 continue; 931 if (apic_cpus[i].aci_local_id == localid) { 932 lock_clear(&apic_ioapic_lock); 933 intr_restore(iflag); 934 cmn_err(CE_WARN, 935 "!apic: local apic id %u already exists.", 936 localid); 937 return (EEXIST); 938 } else if (apic_cpus[i].aci_processor_id == procid) { 939 lock_clear(&apic_ioapic_lock); 940 intr_restore(iflag); 941 cmn_err(CE_WARN, 942 "!apic: processor id %u already exists.", 943 (int)procid); 944 return (EEXIST); 945 } 946 947 /* 948 * There's no local APIC version number available in MADT table, 949 * so assume that all CPUs are homogeneous and use local APIC 950 * version number of the first existing CPU. 951 */ 952 if (first) { 953 first = B_FALSE; 954 localver = apic_cpus[i].aci_local_ver; 955 } 956 } 957 ASSERT(first == B_FALSE); 958 959 /* 960 * Try to assign the same cpuid if APIC id exists in the dirty cache. 961 */ 962 for (i = 0; i < apic_max_nproc; i++) { 963 if (CPU_IN_SET(apic_cpumask, i)) { 964 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0); 965 continue; 966 } 967 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE); 968 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) && 969 apic_cpus[i].aci_local_id == localid && 970 apic_cpus[i].aci_processor_id == procid) { 971 cpuid = i; 972 break; 973 } 974 } 975 976 /* Avoid the dirty cache and allocate fresh slot if possible. */ 977 if (cpuid == (processorid_t)-1) { 978 for (i = 0; i < apic_max_nproc; i++) { 979 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) && 980 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) { 981 cpuid = i; 982 break; 983 } 984 } 985 } 986 987 /* Try to find any free slot as last resort. */ 988 if (cpuid == (processorid_t)-1) { 989 for (i = 0; i < apic_max_nproc; i++) { 990 if (apic_cpus[i].aci_status & APIC_CPU_FREE) { 991 cpuid = i; 992 break; 993 } 994 } 995 } 996 997 if (cpuid == (processorid_t)-1) { 998 lock_clear(&apic_ioapic_lock); 999 intr_restore(iflag); 1000 cmn_err(CE_NOTE, 1001 "!apic: failed to allocate cpu id for processor %u.", 1002 procid); 1003 rv = EAGAIN; 1004 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) { 1005 lock_clear(&apic_ioapic_lock); 1006 intr_restore(iflag); 1007 cmn_err(CE_NOTE, 1008 "!apic: failed to build mapping for processor %u.", 1009 procid); 1010 rv = EBUSY; 1011 } else { 1012 ASSERT(cpuid >= 0 && cpuid < NCPU); 1013 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus); 1014 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0])); 1015 apic_cpus[cpuid].aci_processor_id = procid; 1016 apic_cpus[cpuid].aci_local_id = localid; 1017 apic_cpus[cpuid].aci_local_ver = localver; 1018 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid); 1019 if (cpuid >= apic_nproc) { 1020 apic_nproc = cpuid + 1; 1021 } 1022 lock_clear(&apic_ioapic_lock); 1023 intr_restore(iflag); 1024 reqp->req.cpu_add.cpuid = cpuid; 1025 } 1026 1027 return (rv); 1028 } 1029 1030 int 1031 apic_cpu_remove(psm_cpu_request_t *reqp) 1032 { 1033 int i; 1034 ulong_t iflag; 1035 processorid_t cpuid; 1036 1037 /* Check whether CPU hotplug is supported. */ 1038 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 1039 return (ENOTSUP); 1040 } 1041 1042 cpuid = reqp->req.cpu_remove.cpuid; 1043 1044 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 1045 iflag = intr_clear(); 1046 lock_set(&apic_ioapic_lock); 1047 1048 if (!apic_cpu_in_range(cpuid)) { 1049 lock_clear(&apic_ioapic_lock); 1050 intr_restore(iflag); 1051 cmn_err(CE_WARN, 1052 "!apic: cpuid %d doesn't exist in apic_cpus array.", 1053 cpuid); 1054 return (ENODEV); 1055 } 1056 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0); 1057 1058 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) { 1059 lock_clear(&apic_ioapic_lock); 1060 intr_restore(iflag); 1061 return (ENOENT); 1062 } 1063 1064 if (cpuid == apic_nproc - 1) { 1065 /* 1066 * We are removing the highest numbered cpuid so we need to 1067 * find the next highest cpuid as the new value for apic_nproc. 1068 */ 1069 for (i = apic_nproc; i > 0; i--) { 1070 if (CPU_IN_SET(apic_cpumask, i - 1)) { 1071 apic_nproc = i; 1072 break; 1073 } 1074 } 1075 /* at least one CPU left */ 1076 ASSERT(i > 0); 1077 } 1078 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid); 1079 /* mark slot as free and keep it in the dirty cache */ 1080 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY; 1081 1082 lock_clear(&apic_ioapic_lock); 1083 intr_restore(iflag); 1084 1085 return (0); 1086 } 1087 1088 /* 1089 * Return the number of ticks the APIC decrements in SF nanoseconds. 1090 * The fixed-frequency PIT (aka 8254) is used for the measurement. 1091 */ 1092 static uint64_t 1093 apic_calibrate_impl() 1094 { 1095 uint8_t pit_tick_lo; 1096 uint16_t pit_tick, target_pit_tick, pit_ticks_adj; 1097 uint32_t pit_ticks; 1098 uint32_t start_apic_tick, end_apic_tick, apic_ticks; 1099 ulong_t iflag; 1100 1101 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init); 1102 apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL); 1103 1104 iflag = intr_clear(); 1105 1106 do { 1107 pit_tick_lo = inb(PITCTR0_PORT); 1108 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1109 } while (pit_tick < APIC_TIME_MIN || 1110 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 1111 1112 /* 1113 * Wait for the PIT to decrement by 5 ticks to ensure 1114 * we didn't start in the middle of a tick. 1115 * Compare with 0x10 for the wrap around case. 1116 */ 1117 target_pit_tick = pit_tick - 5; 1118 do { 1119 pit_tick_lo = inb(PITCTR0_PORT); 1120 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1121 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1122 1123 start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1124 1125 /* 1126 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks 1127 */ 1128 target_pit_tick = pit_tick - APIC_TIME_COUNT; 1129 do { 1130 pit_tick_lo = inb(PITCTR0_PORT); 1131 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1132 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1133 1134 end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1135 1136 intr_restore(iflag); 1137 1138 apic_ticks = start_apic_tick - end_apic_tick; 1139 1140 /* The PIT might have decremented by more ticks than planned */ 1141 pit_ticks_adj = target_pit_tick - pit_tick; 1142 /* total number of PIT ticks corresponding to apic_ticks */ 1143 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj; 1144 1145 /* 1146 * Determine the number of nanoseconds per APIC clock tick 1147 * and then determine how many APIC ticks to interrupt at the 1148 * desired frequency 1149 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s 1150 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s 1151 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9) 1152 * apic_ticks_per_SFns = 1153 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9) 1154 */ 1155 return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC)); 1156 } 1157 1158 /* 1159 * It was found empirically that 5 measurements seem sufficient to give a good 1160 * accuracy. Most spurious measurements are higher than the target value thus 1161 * we eliminate up to 2/5 spurious measurements. 1162 */ 1163 #define APIC_CALIBRATE_MEASUREMENTS 5 1164 1165 #define APIC_CALIBRATE_PERCENT_OFF_WARNING 10 1166 1167 /* 1168 * Return the number of ticks the APIC decrements in SF nanoseconds. 1169 * Several measurements are taken to filter out outliers. 1170 */ 1171 uint64_t 1172 apic_calibrate() 1173 { 1174 uint64_t measurements[APIC_CALIBRATE_MEASUREMENTS]; 1175 int median_idx; 1176 uint64_t median; 1177 1178 /* 1179 * When running under a virtual machine, the emulated PIT and APIC 1180 * counters do not always return the right values and can roll over. 1181 * Those spurious measurements are relatively rare but could 1182 * significantly affect the calibration. 1183 * Therefore we take several measurements and then keep the median. 1184 * The median is preferred to the average here as we only want to 1185 * discard outliers. 1186 */ 1187 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) 1188 measurements[i] = apic_calibrate_impl(); 1189 1190 /* 1191 * sort results and retrieve median. 1192 */ 1193 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) { 1194 for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) { 1195 if (measurements[j] < measurements[i]) { 1196 uint64_t tmp = measurements[i]; 1197 measurements[i] = measurements[j]; 1198 measurements[j] = tmp; 1199 } 1200 } 1201 } 1202 median_idx = APIC_CALIBRATE_MEASUREMENTS / 2; 1203 median = measurements[median_idx]; 1204 1205 #if (APIC_CALIBRATE_MEASUREMENTS >= 3) 1206 /* 1207 * Check that measurements are consistent. Post a warning 1208 * if the three middle values are not close to each other. 1209 */ 1210 uint64_t delta_warn = median * 1211 APIC_CALIBRATE_PERCENT_OFF_WARNING / 100; 1212 if ((median - measurements[median_idx - 1]) > delta_warn || 1213 (measurements[median_idx + 1] - median) > delta_warn) { 1214 cmn_err(CE_WARN, "apic_calibrate measurements lack " 1215 "precision: %llu, %llu, %llu.", 1216 (u_longlong_t)measurements[median_idx - 1], 1217 (u_longlong_t)median, 1218 (u_longlong_t)measurements[median_idx + 1]); 1219 } 1220 #endif 1221 1222 return (median); 1223 } 1224 1225 /* 1226 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 1227 * frequency. Note at this stage in the boot sequence, the boot processor 1228 * is the only active processor. 1229 * hertz value of 0 indicates a one-shot mode request. In this case 1230 * the function returns the resolution (in nanoseconds) for the hardware 1231 * timer interrupt. If one-shot mode capability is not available, 1232 * the return value will be 0. apic_enable_oneshot is a global switch 1233 * for disabling the functionality. 1234 * A non-zero positive value for hertz indicates a periodic mode request. 1235 * In this case the hardware will be programmed to generate clock interrupts 1236 * at hertz frequency and returns the resolution of interrupts in 1237 * nanosecond. 1238 */ 1239 1240 int 1241 apic_clkinit(int hertz) 1242 { 1243 int ret; 1244 1245 apic_int_busy_mark = (apic_int_busy_mark * 1246 apic_sample_factor_redistribution) / 100; 1247 apic_int_free_mark = (apic_int_free_mark * 1248 apic_sample_factor_redistribution) / 100; 1249 apic_diff_for_redistribution = (apic_diff_for_redistribution * 1250 apic_sample_factor_redistribution) / 100; 1251 1252 ret = apic_timer_init(hertz); 1253 return (ret); 1254 1255 } 1256 1257 /* 1258 * apic_preshutdown: 1259 * Called early in shutdown whilst we can still access filesystems to do 1260 * things like loading modules which will be required to complete shutdown 1261 * after filesystems are all unmounted. 1262 */ 1263 void 1264 apic_preshutdown(int cmd, int fcn) 1265 { 1266 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 1267 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 1268 } 1269 1270 void 1271 apic_shutdown(int cmd, int fcn) 1272 { 1273 int restarts, attempts; 1274 int i; 1275 uchar_t byte; 1276 ulong_t iflag; 1277 1278 hpet_acpi_fini(); 1279 1280 /* Send NMI to all CPUs except self to do per processor shutdown */ 1281 iflag = intr_clear(); 1282 #ifdef DEBUG 1283 APIC_AV_PENDING_SET(); 1284 #else 1285 if (apic_mode == LOCAL_APIC) 1286 APIC_AV_PENDING_SET(); 1287 #endif /* DEBUG */ 1288 apic_shutdown_processors = 1; 1289 apic_reg_ops->apic_write(APIC_INT_CMD1, 1290 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF); 1291 1292 /* restore cmos shutdown byte before reboot */ 1293 if (apic_cmos_ssb_set) { 1294 outb(CMOS_ADDR, SSB); 1295 outb(CMOS_DATA, 0); 1296 } 1297 1298 ioapic_disable_redirection(); 1299 1300 /* disable apic mode if imcr present */ 1301 if (apic_imcrp) { 1302 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1303 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 1304 } 1305 1306 apic_disable_local_apic(); 1307 1308 intr_restore(iflag); 1309 1310 /* remainder of function is for shutdown cases only */ 1311 if (cmd != A_SHUTDOWN) 1312 return; 1313 1314 /* 1315 * Switch system back into Legacy-Mode if using ACPI and 1316 * not powering-off. Some BIOSes need to remain in ACPI-mode 1317 * for power-off to succeed (Dell Dimension 4600) 1318 * Do not disable ACPI while doing fastreboot 1319 */ 1320 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT) 1321 (void) AcpiDisable(); 1322 1323 if (fcn == AD_FASTREBOOT) { 1324 apic_reg_ops->apic_write(APIC_INT_CMD1, 1325 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF); 1326 } 1327 1328 /* remainder of function is for shutdown+poweroff case only */ 1329 if (fcn != AD_POWEROFF) 1330 return; 1331 1332 switch (apic_poweroff_method) { 1333 case APIC_POWEROFF_VIA_RTC: 1334 1335 /* select the extended NVRAM bank in the RTC */ 1336 outb(CMOS_ADDR, RTC_REGA); 1337 byte = inb(CMOS_DATA); 1338 outb(CMOS_DATA, (byte | EXT_BANK)); 1339 1340 outb(CMOS_ADDR, PFR_REG); 1341 1342 /* for Predator must toggle the PAB bit */ 1343 byte = inb(CMOS_DATA); 1344 1345 /* 1346 * clear power active bar, wakeup alarm and 1347 * kickstart 1348 */ 1349 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 1350 outb(CMOS_DATA, byte); 1351 1352 /* delay before next write */ 1353 drv_usecwait(1000); 1354 1355 /* for S40 the following would suffice */ 1356 byte = inb(CMOS_DATA); 1357 1358 /* power active bar control bit */ 1359 byte |= PAB_CBIT; 1360 outb(CMOS_DATA, byte); 1361 1362 break; 1363 1364 case APIC_POWEROFF_VIA_ASPEN_BMC: 1365 restarts = 0; 1366 restart_aspen_bmc: 1367 if (++restarts == 3) 1368 break; 1369 attempts = 0; 1370 do { 1371 byte = inb(MISMIC_FLAG_REGISTER); 1372 byte &= MISMIC_BUSY_MASK; 1373 if (byte != 0) { 1374 drv_usecwait(1000); 1375 if (attempts >= 3) 1376 goto restart_aspen_bmc; 1377 ++attempts; 1378 } 1379 } while (byte != 0); 1380 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 1381 byte = inb(MISMIC_FLAG_REGISTER); 1382 byte |= 0x1; 1383 outb(MISMIC_FLAG_REGISTER, byte); 1384 i = 0; 1385 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 1386 i++) { 1387 attempts = 0; 1388 do { 1389 byte = inb(MISMIC_FLAG_REGISTER); 1390 byte &= MISMIC_BUSY_MASK; 1391 if (byte != 0) { 1392 drv_usecwait(1000); 1393 if (attempts >= 3) 1394 goto restart_aspen_bmc; 1395 ++attempts; 1396 } 1397 } while (byte != 0); 1398 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 1399 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 1400 byte = inb(MISMIC_FLAG_REGISTER); 1401 byte |= 0x1; 1402 outb(MISMIC_FLAG_REGISTER, byte); 1403 } 1404 break; 1405 1406 case APIC_POWEROFF_VIA_SITKA_BMC: 1407 restarts = 0; 1408 restart_sitka_bmc: 1409 if (++restarts == 3) 1410 break; 1411 attempts = 0; 1412 do { 1413 byte = inb(SMS_STATUS_REGISTER); 1414 byte &= SMS_STATE_MASK; 1415 if ((byte == SMS_READ_STATE) || 1416 (byte == SMS_WRITE_STATE)) { 1417 drv_usecwait(1000); 1418 if (attempts >= 3) 1419 goto restart_sitka_bmc; 1420 ++attempts; 1421 } 1422 } while ((byte == SMS_READ_STATE) || 1423 (byte == SMS_WRITE_STATE)); 1424 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 1425 i = 0; 1426 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 1427 i++) { 1428 attempts = 0; 1429 do { 1430 byte = inb(SMS_STATUS_REGISTER); 1431 byte &= SMS_IBF_MASK; 1432 if (byte != 0) { 1433 drv_usecwait(1000); 1434 if (attempts >= 3) 1435 goto restart_sitka_bmc; 1436 ++attempts; 1437 } 1438 } while (byte != 0); 1439 outb(sitka_bmc[i].port, sitka_bmc[i].data); 1440 } 1441 break; 1442 1443 case APIC_POWEROFF_NONE: 1444 1445 /* If no APIC direct method, we will try using ACPI */ 1446 if (apic_enable_acpi) { 1447 if (acpi_poweroff() == 1) 1448 return; 1449 } else 1450 return; 1451 1452 break; 1453 } 1454 /* 1455 * Wait a limited time here for power to go off. 1456 * If the power does not go off, then there was a 1457 * problem and we should continue to the halt which 1458 * prints a message for the user to press a key to 1459 * reboot. 1460 */ 1461 drv_usecwait(7000000); /* wait seven seconds */ 1462 1463 } 1464 1465 cyclic_id_t apic_cyclic_id; 1466 1467 /* 1468 * The following functions are in the platform specific file so that they 1469 * can be different functions depending on whether we are running on 1470 * bare metal or a hypervisor. 1471 */ 1472 1473 /* 1474 * map an apic for memory-mapped access 1475 */ 1476 uint32_t * 1477 mapin_apic(uint32_t addr, size_t len, int flags) 1478 { 1479 return ((void *)psm_map_phys(addr, len, flags)); 1480 } 1481 1482 uint32_t * 1483 mapin_ioapic(uint32_t addr, size_t len, int flags) 1484 { 1485 return (mapin_apic(addr, len, flags)); 1486 } 1487 1488 /* 1489 * unmap an apic 1490 */ 1491 void 1492 mapout_apic(caddr_t addr, size_t len) 1493 { 1494 psm_unmap_phys(addr, len); 1495 } 1496 1497 void 1498 mapout_ioapic(caddr_t addr, size_t len) 1499 { 1500 mapout_apic(addr, len); 1501 } 1502 1503 uint32_t 1504 ioapic_read(int ioapic_ix, uint32_t reg) 1505 { 1506 volatile uint32_t *ioapic; 1507 1508 ioapic = apicioadr[ioapic_ix]; 1509 ioapic[APIC_IO_REG] = reg; 1510 return (ioapic[APIC_IO_DATA]); 1511 } 1512 1513 void 1514 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value) 1515 { 1516 volatile uint32_t *ioapic; 1517 1518 ioapic = apicioadr[ioapic_ix]; 1519 ioapic[APIC_IO_REG] = reg; 1520 ioapic[APIC_IO_DATA] = value; 1521 } 1522 1523 void 1524 ioapic_write_eoi(int ioapic_ix, uint32_t value) 1525 { 1526 volatile uint32_t *ioapic; 1527 1528 ioapic = apicioadr[ioapic_ix]; 1529 ioapic[APIC_IO_EOI] = value; 1530 } 1531 1532 /* 1533 * Round-robin algorithm to find the next CPU with interrupts enabled. 1534 * It can't share the same static variable apic_next_bind_cpu with 1535 * apic_get_next_bind_cpu(), since that will cause all interrupts to be 1536 * bound to CPU1 at boot time. During boot, only CPU0 is online with 1537 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu() 1538 * are called. However, the pcplusmp driver assumes that there will be 1539 * boot_ncpus CPUs configured eventually so it tries to distribute all 1540 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all 1541 * interrupts being targetted at CPU1, we need to use a dedicated static 1542 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu. 1543 */ 1544 1545 processorid_t 1546 apic_find_cpu(int flag) 1547 { 1548 int i; 1549 static processorid_t acid = 0; 1550 1551 /* Find the first CPU with the passed-in flag set */ 1552 for (i = 0; i < apic_nproc; i++) { 1553 if (++acid >= apic_nproc) { 1554 acid = 0; 1555 } 1556 if (apic_cpu_in_range(acid) && 1557 (apic_cpus[acid].aci_status & flag)) { 1558 break; 1559 } 1560 } 1561 1562 ASSERT((apic_cpus[acid].aci_status & flag) != 0); 1563 return (acid); 1564 } 1565 1566 void 1567 apic_intrmap_init(int apic_mode) 1568 { 1569 int suppress_brdcst_eoi = 0; 1570 1571 /* 1572 * Intel Software Developer's Manual 3A, 10.12.7: 1573 * 1574 * Routing of device interrupts to local APIC units operating in 1575 * x2APIC mode requires use of the interrupt-remapping architecture 1576 * specified in the Intel Virtualization Technology for Directed 1577 * I/O, Revision 1.3. Because of this, BIOS must enumerate support 1578 * for and software must enable this interrupt remapping with 1579 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in 1580 * the local APIC units. 1581 * 1582 * 1583 * In other words, to use the APIC in x2APIC mode, we need interrupt 1584 * remapping. Since we don't start up the IOMMU by default, we 1585 * won't be able to do any interrupt remapping and therefore have to 1586 * use the APIC in traditional 'local APIC' mode with memory mapped 1587 * I/O. 1588 */ 1589 1590 if (psm_vt_ops != NULL) { 1591 if (((apic_intrmap_ops_t *)psm_vt_ops)-> 1592 apic_intrmap_init(apic_mode) == DDI_SUCCESS) { 1593 1594 apic_vt_ops = psm_vt_ops; 1595 1596 /* 1597 * We leverage the interrupt remapping engine to 1598 * suppress broadcast EOI; thus we must send the 1599 * directed EOI with the directed-EOI handler. 1600 */ 1601 if (apic_directed_EOI_supported() == 0) { 1602 suppress_brdcst_eoi = 1; 1603 } 1604 1605 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi); 1606 1607 if (apic_detect_x2apic()) { 1608 apic_enable_x2apic(); 1609 } 1610 1611 if (apic_directed_EOI_supported() == 0) { 1612 apic_set_directed_EOI_handler(); 1613 } 1614 } 1615 } 1616 } 1617 1618 /*ARGSUSED*/ 1619 static void 1620 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt) 1621 { 1622 irdt->ir_hi <<= APIC_ID_BIT_OFFSET; 1623 } 1624 1625 /*ARGSUSED*/ 1626 static void 1627 apic_record_msi(void *intrmap_private, msi_regs_t *mregs) 1628 { 1629 mregs->mr_addr = MSI_ADDR_HDR | 1630 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | 1631 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) | 1632 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT); 1633 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | 1634 mregs->mr_data; 1635 } 1636 1637 /* 1638 * Functions from apic_introp.c 1639 * 1640 * Those functions are used by apic_intr_ops(). 1641 */ 1642 1643 /* 1644 * MSI support flag: 1645 * reflects whether MSI is supported at APIC level 1646 * it can also be patched through /etc/system 1647 * 1648 * 0 = default value - don't know and need to call apic_check_msi_support() 1649 * to find out then set it accordingly 1650 * 1 = supported 1651 * -1 = not supported 1652 */ 1653 int apic_support_msi = 0; 1654 1655 /* Multiple vector support for MSI-X */ 1656 int apic_msix_enable = 1; 1657 1658 /* Multiple vector support for MSI */ 1659 int apic_multi_msi_enable = 1; 1660 1661 /* 1662 * Check whether the system supports MSI. 1663 * 1664 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find 1665 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we 1666 * return PSM_SUCCESS to indicate this system supports MSI. 1667 * 1668 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is 1669 * by detecting if we are running inside the KVM hypervisor, which guarantees 1670 * this version number.) 1671 */ 1672 int 1673 apic_check_msi_support() 1674 { 1675 dev_info_t *cdip; 1676 char dev_type[16]; 1677 int dev_len; 1678 1679 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n")); 1680 1681 /* 1682 * check whether the first level children of root_node have 1683 * PCI-E or PCI capability. 1684 */ 1685 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL; 1686 cdip = ddi_get_next_sibling(cdip)) { 1687 1688 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p," 1689 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip, 1690 ddi_driver_name(cdip), ddi_binding_name(cdip), 1691 ddi_node_name(cdip))); 1692 dev_len = sizeof (dev_type); 1693 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1694 "device_type", (caddr_t)dev_type, &dev_len) 1695 != DDI_PROP_SUCCESS) 1696 continue; 1697 if (strcmp(dev_type, "pciex") == 0) 1698 return (PSM_SUCCESS); 1699 if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM) 1700 return (PSM_SUCCESS); 1701 } 1702 1703 /* MSI is not supported on this system */ 1704 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' " 1705 "device_type found\n")); 1706 return (PSM_FAILURE); 1707 } 1708 1709 /* 1710 * apic_pci_msi_unconfigure: 1711 * 1712 * This and next two interfaces are copied from pci_intr_lib.c 1713 * Do ensure that these two files stay in sync. 1714 * These needed to be copied over here to avoid a deadlock situation on 1715 * certain mp systems that use MSI interrupts. 1716 * 1717 * IMPORTANT regards next three interfaces: 1718 * i) are called only for MSI/X interrupts. 1719 * ii) called with interrupts disabled, and must not block 1720 */ 1721 void 1722 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum) 1723 { 1724 ushort_t msi_ctrl; 1725 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1726 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1727 1728 ASSERT((handle != NULL) && (cap_ptr != 0)); 1729 1730 if (type == DDI_INTR_TYPE_MSI) { 1731 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1732 msi_ctrl &= (~PCI_MSI_MME_MASK); 1733 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1734 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0); 1735 1736 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1737 pci_config_put16(handle, 1738 cap_ptr + PCI_MSI_64BIT_DATA, 0); 1739 pci_config_put32(handle, 1740 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0); 1741 } else { 1742 pci_config_put16(handle, 1743 cap_ptr + PCI_MSI_32BIT_DATA, 0); 1744 } 1745 1746 } else if (type == DDI_INTR_TYPE_MSIX) { 1747 uintptr_t off; 1748 uint32_t mask; 1749 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip); 1750 1751 ASSERT(msix_p != NULL); 1752 1753 /* Offset into "inum"th entry in the MSI-X table & mask it */ 1754 off = (uintptr_t)msix_p->msix_tbl_addr + (inum * 1755 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET; 1756 1757 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off); 1758 1759 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1)); 1760 1761 /* Offset into the "inum"th entry in the MSI-X table */ 1762 off = (uintptr_t)msix_p->msix_tbl_addr + 1763 (inum * PCI_MSIX_VECTOR_SIZE); 1764 1765 /* Reset the "data" and "addr" bits */ 1766 ddi_put32(msix_p->msix_tbl_hdl, 1767 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0); 1768 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0); 1769 } 1770 } 1771 1772 /* 1773 * apic_pci_msi_disable_mode: 1774 */ 1775 void 1776 apic_pci_msi_disable_mode(dev_info_t *rdip, int type) 1777 { 1778 ushort_t msi_ctrl; 1779 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1780 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1781 1782 ASSERT((handle != NULL) && (cap_ptr != 0)); 1783 1784 if (type == DDI_INTR_TYPE_MSI) { 1785 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1786 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT)) 1787 return; 1788 1789 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */ 1790 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1791 1792 } else if (type == DDI_INTR_TYPE_MSIX) { 1793 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL); 1794 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) { 1795 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT; 1796 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, 1797 msi_ctrl); 1798 } 1799 } 1800 } 1801 1802 uint32_t 1803 apic_get_localapicid(uint32_t cpuid) 1804 { 1805 ASSERT(cpuid < apic_nproc && apic_cpus != NULL); 1806 1807 return (apic_cpus[cpuid].aci_local_id); 1808 } 1809 1810 uchar_t 1811 apic_get_ioapicid(uchar_t ioapicindex) 1812 { 1813 ASSERT(ioapicindex < MAX_IO_APIC); 1814 1815 return (apic_io_id[ioapicindex]); 1816 } 1817