1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* 26 * Copyright (c) 2017, Joyent, Inc. All rights reserved. 27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 28 */ 29 30 /* 31 * PSMI 1.1 extensions are supported only in 2.6 and later versions. 32 * PSMI 1.2 extensions are supported only in 2.7 and later versions. 33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10. 34 * PSMI 1.5 extensions are supported in Solaris Nevada. 35 * PSMI 1.6 extensions are supported in Solaris Nevada. 36 * PSMI 1.7 extensions are supported in Solaris Nevada. 37 */ 38 #define PSMI_1_7 39 40 #include <sys/processor.h> 41 #include <sys/time.h> 42 #include <sys/psm.h> 43 #include <sys/smp_impldefs.h> 44 #include <sys/cram.h> 45 #include <sys/acpi/acpi.h> 46 #include <sys/acpica.h> 47 #include <sys/psm_common.h> 48 #include <sys/apic.h> 49 #include <sys/pit.h> 50 #include <sys/ddi.h> 51 #include <sys/sunddi.h> 52 #include <sys/ddi_impldefs.h> 53 #include <sys/pci.h> 54 #include <sys/promif.h> 55 #include <sys/x86_archext.h> 56 #include <sys/cpc_impl.h> 57 #include <sys/uadmin.h> 58 #include <sys/panic.h> 59 #include <sys/debug.h> 60 #include <sys/archsystm.h> 61 #include <sys/trap.h> 62 #include <sys/machsystm.h> 63 #include <sys/sysmacros.h> 64 #include <sys/cpuvar.h> 65 #include <sys/rm_platter.h> 66 #include <sys/privregs.h> 67 #include <sys/note.h> 68 #include <sys/pci_intr_lib.h> 69 #include <sys/spl.h> 70 #include <sys/clock.h> 71 #include <sys/dditypes.h> 72 #include <sys/sunddi.h> 73 #include <sys/x_call.h> 74 #include <sys/reboot.h> 75 #include <sys/hpet.h> 76 #include <sys/apic_common.h> 77 #include <sys/apic_timer.h> 78 79 static void apic_record_ioapic_rdt(void *intrmap_private, 80 ioapic_rdt_t *irdt); 81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs); 82 83 /* 84 * Common routines between pcplusmp & apix (taken from apic.c). 85 */ 86 87 int apic_clkinit(int); 88 hrtime_t apic_gethrtime(void); 89 void apic_send_ipi(int, int); 90 void apic_set_idlecpu(processorid_t); 91 void apic_unset_idlecpu(processorid_t); 92 void apic_shutdown(int, int); 93 void apic_preshutdown(int, int); 94 processorid_t apic_get_next_processorid(processorid_t); 95 96 hrtime_t apic_gettime(); 97 98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP; 99 100 /* Now the ones for Dynamic Interrupt distribution */ 101 int apic_enable_dynamic_migration = 0; 102 103 /* maximum loop count when sending Start IPIs. */ 104 int apic_sipi_max_loop_count = 0x1000; 105 106 /* 107 * These variables are frequently accessed in apic_intr_enter(), 108 * apic_intr_exit and apic_setspl, so group them together 109 */ 110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */ 111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */ 112 int apic_clkvect; 113 114 /* vector at which error interrupts come in */ 115 int apic_errvect; 116 int apic_enable_error_intr = 1; 117 int apic_error_display_delay = 100; 118 119 /* vector at which performance counter overflow interrupts come in */ 120 int apic_cpcovf_vect; 121 int apic_enable_cpcovf_intr = 1; 122 123 /* vector at which CMCI interrupts come in */ 124 int apic_cmci_vect; 125 extern int cmi_enable_cmci; 126 extern void cmi_cmci_trap(void); 127 128 kmutex_t cmci_cpu_setup_lock; /* protects cmci_cpu_setup_registered */ 129 int cmci_cpu_setup_registered; 130 131 lock_t apic_mode_switch_lock; 132 133 /* 134 * Patchable global variables. 135 */ 136 int apic_forceload = 0; 137 138 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */ 139 140 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */ 141 int apic_panic_on_nmi = 0; 142 int apic_panic_on_apic_error = 0; 143 144 int apic_verbose = 0; /* 0x1ff */ 145 146 #ifdef DEBUG 147 int apic_debug = 0; 148 int apic_restrict_vector = 0; 149 150 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE]; 151 int apic_debug_msgbufindex = 0; 152 153 #endif /* DEBUG */ 154 155 uint_t apic_nticks = 0; 156 uint_t apic_skipped_redistribute = 0; 157 158 uint_t last_count_read = 0; 159 lock_t apic_gethrtime_lock; 160 volatile int apic_hrtime_stamp = 0; 161 volatile hrtime_t apic_nsec_since_boot = 0; 162 163 static hrtime_t apic_last_hrtime = 0; 164 int apic_hrtime_error = 0; 165 int apic_remote_hrterr = 0; 166 int apic_num_nmis = 0; 167 int apic_apic_error = 0; 168 int apic_num_apic_errors = 0; 169 int apic_num_cksum_errors = 0; 170 171 int apic_error = 0; 172 173 static int apic_cmos_ssb_set = 0; 174 175 /* use to make sure only one cpu handles the nmi */ 176 lock_t apic_nmi_lock; 177 /* use to make sure only one cpu handles the error interrupt */ 178 lock_t apic_error_lock; 179 180 static struct { 181 uchar_t cntl; 182 uchar_t data; 183 } aspen_bmc[] = { 184 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 185 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 186 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */ 187 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */ 188 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */ 189 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */ 190 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */ 191 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */ 192 193 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */ 194 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 195 }; 196 197 static struct { 198 int port; 199 uchar_t data; 200 } sitka_bmc[] = { 201 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 202 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 203 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */ 204 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */ 205 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */ 206 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */ 207 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */ 208 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */ 209 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 210 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */ 211 212 { SMS_COMMAND_REGISTER, SMS_WRITE_START }, 213 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */ 214 { SMS_COMMAND_REGISTER, SMS_WRITE_END }, 215 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */ 216 }; 217 218 /* Patchable global variables. */ 219 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */ 220 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */ 221 222 /* default apic ops without interrupt remapping */ 223 static apic_intrmap_ops_t apic_nointrmap_ops = { 224 (int (*)(int))return_instr, 225 (void (*)(int))return_instr, 226 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr, 227 (void (*)(void *, void *, uint16_t, int))return_instr, 228 (void (*)(void **))return_instr, 229 apic_record_ioapic_rdt, 230 apic_record_msi, 231 }; 232 233 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops; 234 apic_cpus_info_t *apic_cpus = NULL; 235 cpuset_t apic_cpumask; 236 uint_t apic_picinit_called; 237 238 /* Flag to indicate that we need to shut down all processors */ 239 static uint_t apic_shutdown_processors; 240 241 /* 242 * Probe the ioapic method for apix module. Called in apic_probe_common() 243 */ 244 int 245 apic_ioapic_method_probe() 246 { 247 if (apix_enable == 0) 248 return (PSM_SUCCESS); 249 250 /* 251 * Set IOAPIC EOI handling method. The priority from low to high is: 252 * 1. IOxAPIC: with EOI register 253 * 2. IOMMU interrupt mapping 254 * 3. Mask-Before-EOI method for systems without boot 255 * interrupt routing, such as systems with only one IOAPIC; 256 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution 257 * which disables the boot interrupt routing already. 258 * 4. Directed EOI 259 */ 260 if (apic_io_ver[0] >= 0x20) 261 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC; 262 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max)) 263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK; 264 if (apic_directed_EOI_supported()) 265 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI; 266 267 /* fall back to pcplusmp */ 268 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) { 269 /* make sure apix is after pcplusmp in /etc/mach */ 270 apix_enable = 0; /* go ahead with pcplusmp install next */ 271 return (PSM_FAILURE); 272 } 273 274 return (PSM_SUCCESS); 275 } 276 277 /* 278 * handler for APIC Error interrupt. Just print a warning and continue 279 */ 280 int 281 apic_error_intr() 282 { 283 uint_t error0, error1, error; 284 uint_t i; 285 286 /* 287 * We need to write before read as per 7.4.17 of system prog manual. 288 * We do both and or the results to be safe 289 */ 290 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 291 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 292 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS); 293 error = error0 | error1; 294 295 /* 296 * Clear the APIC error status (do this on all cpus that enter here) 297 * (two writes are required due to the semantics of accessing the 298 * error status register.) 299 */ 300 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 301 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0); 302 303 /* 304 * Prevent more than 1 CPU from handling error interrupt causing 305 * double printing (interleave of characters from multiple 306 * CPU's when using prom_printf) 307 */ 308 if (lock_try(&apic_error_lock) == 0) 309 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 310 if (error) { 311 #if DEBUG 312 if (apic_debug) 313 debug_enter("pcplusmp: APIC Error interrupt received"); 314 #endif /* DEBUG */ 315 if (apic_panic_on_apic_error) 316 cmn_err(CE_PANIC, 317 "APIC Error interrupt on CPU %d. Status = %x", 318 psm_get_cpu_id(), error); 319 else { 320 if ((error & ~APIC_CS_ERRORS) == 0) { 321 /* cksum error only */ 322 apic_error |= APIC_ERR_APIC_ERROR; 323 apic_apic_error |= error; 324 apic_num_apic_errors++; 325 apic_num_cksum_errors++; 326 } else { 327 /* 328 * prom_printf is the best shot we have of 329 * something which is problem free from 330 * high level/NMI type of interrupts 331 */ 332 prom_printf("APIC Error interrupt on CPU %d. " 333 "Status 0 = %x, Status 1 = %x\n", 334 psm_get_cpu_id(), error0, error1); 335 apic_error |= APIC_ERR_APIC_ERROR; 336 apic_apic_error |= error; 337 apic_num_apic_errors++; 338 for (i = 0; i < apic_error_display_delay; i++) { 339 tenmicrosec(); 340 } 341 /* 342 * provide more delay next time limited to 343 * roughly 1 clock tick time 344 */ 345 if (apic_error_display_delay < 500) 346 apic_error_display_delay *= 2; 347 } 348 } 349 lock_clear(&apic_error_lock); 350 return (DDI_INTR_CLAIMED); 351 } else { 352 lock_clear(&apic_error_lock); 353 return (DDI_INTR_UNCLAIMED); 354 } 355 } 356 357 /* 358 * Turn off the mask bit in the performance counter Local Vector Table entry. 359 */ 360 void 361 apic_cpcovf_mask_clear(void) 362 { 363 apic_reg_ops->apic_write(APIC_PCINT_VECT, 364 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK)); 365 } 366 367 /*ARGSUSED*/ 368 static int 369 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) 370 { 371 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect); 372 return (0); 373 } 374 375 /*ARGSUSED*/ 376 static int 377 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3) 378 { 379 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK); 380 return (0); 381 } 382 383 /*ARGSUSED*/ 384 int 385 cmci_cpu_setup(cpu_setup_t what, int cpuid, void *arg) 386 { 387 cpuset_t cpu_set; 388 389 CPUSET_ONLY(cpu_set, cpuid); 390 391 switch (what) { 392 case CPU_ON: 393 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set), 394 (xc_func_t)apic_cmci_enable); 395 break; 396 397 case CPU_OFF: 398 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set), 399 (xc_func_t)apic_cmci_disable); 400 break; 401 402 default: 403 break; 404 } 405 406 return (0); 407 } 408 409 static void 410 apic_disable_local_apic(void) 411 { 412 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL); 413 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK); 414 415 /* local intr reg 0 */ 416 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK); 417 418 /* disable NMI */ 419 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK); 420 421 /* and error interrupt */ 422 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK); 423 424 /* and perf counter intr */ 425 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK); 426 427 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR); 428 } 429 430 static void 431 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start) 432 { 433 int loop_count; 434 uint32_t vector; 435 uint_t apicid; 436 ulong_t iflag; 437 438 apicid = apic_cpus[cpun].aci_local_id; 439 440 /* 441 * Interrupts on current CPU will be disabled during the 442 * steps in order to avoid unwanted side effects from 443 * executing interrupt handlers on a problematic BIOS. 444 */ 445 iflag = intr_clear(); 446 447 if (start) { 448 outb(CMOS_ADDR, SSB); 449 outb(CMOS_DATA, BIOS_SHUTDOWN); 450 } 451 452 /* 453 * According to X2APIC specification in section '2.3.5.1' of 454 * Interrupt Command Register Semantics, the semantics of 455 * programming the Interrupt Command Register to dispatch an interrupt 456 * is simplified. A single MSR write to the 64-bit ICR is required 457 * for dispatching an interrupt. Specifically, with the 64-bit MSR 458 * interface to ICR, system software is not required to check the 459 * status of the delivery status bit prior to writing to the ICR 460 * to send an IPI. With the removal of the Delivery Status bit, 461 * system software no longer has a reason to read the ICR. It remains 462 * readable only to aid in debugging. 463 */ 464 #ifdef DEBUG 465 APIC_AV_PENDING_SET(); 466 #else 467 if (apic_mode == LOCAL_APIC) { 468 APIC_AV_PENDING_SET(); 469 } 470 #endif /* DEBUG */ 471 472 /* for integrated - make sure there is one INIT IPI in buffer */ 473 /* for external - it will wake up the cpu */ 474 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET); 475 476 /* If only 1 CPU is installed, PENDING bit will not go low */ 477 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) { 478 if (apic_mode == LOCAL_APIC && 479 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING) 480 apic_ret(); 481 else 482 break; 483 } 484 485 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET); 486 drv_usecwait(20000); /* 20 milli sec */ 487 488 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) { 489 /* integrated apic */ 490 491 vector = (rm_platter_pa >> MMU_PAGESHIFT) & 492 (APIC_VECTOR_MASK | APIC_IPL_MASK); 493 494 /* to offset the INIT IPI queue up in the buffer */ 495 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 496 drv_usecwait(200); /* 20 micro sec */ 497 498 /* 499 * send the second SIPI (Startup IPI) as recommended by Intel 500 * software development manual. 501 */ 502 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP); 503 drv_usecwait(200); /* 20 micro sec */ 504 } 505 506 intr_restore(iflag); 507 } 508 509 /*ARGSUSED1*/ 510 int 511 apic_cpu_start(processorid_t cpun, caddr_t arg) 512 { 513 ASSERT(MUTEX_HELD(&cpu_lock)); 514 515 if (!apic_cpu_in_range(cpun)) { 516 return (EINVAL); 517 } 518 519 /* 520 * Switch to apic_common_send_ipi for safety during starting other CPUs. 521 */ 522 if (apic_mode == LOCAL_X2APIC) { 523 apic_switch_ipi_callback(B_TRUE); 524 } 525 526 apic_cmos_ssb_set = 1; 527 apic_cpu_send_SIPI(cpun, B_TRUE); 528 529 return (0); 530 } 531 532 /* 533 * Put CPU into halted state with interrupts disabled. 534 */ 535 /*ARGSUSED1*/ 536 int 537 apic_cpu_stop(processorid_t cpun, caddr_t arg) 538 { 539 int rc; 540 cpu_t *cp; 541 extern cpuset_t cpu_ready_set; 542 extern void cpu_idle_intercept_cpu(cpu_t *cp); 543 544 ASSERT(MUTEX_HELD(&cpu_lock)); 545 546 if (!apic_cpu_in_range(cpun)) { 547 return (EINVAL); 548 } 549 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) { 550 return (ENOTSUP); 551 } 552 553 cp = cpu_get(cpun); 554 ASSERT(cp != NULL); 555 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0); 556 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0); 557 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0); 558 559 /* Clear CPU_READY flag to disable cross calls. */ 560 cp->cpu_flags &= ~CPU_READY; 561 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun); 562 rc = xc_flush_cpu(cp); 563 if (rc != 0) { 564 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun); 565 cp->cpu_flags |= CPU_READY; 566 return (rc); 567 } 568 569 /* Intercept target CPU at a safe point before powering it off. */ 570 cpu_idle_intercept_cpu(cp); 571 572 apic_cpu_send_SIPI(cpun, B_FALSE); 573 cp->cpu_flags &= ~CPU_RUNNING; 574 575 return (0); 576 } 577 578 int 579 apic_cpu_ops(psm_cpu_request_t *reqp) 580 { 581 if (reqp == NULL) { 582 return (EINVAL); 583 } 584 585 switch (reqp->pcr_cmd) { 586 case PSM_CPU_ADD: 587 return (apic_cpu_add(reqp)); 588 589 case PSM_CPU_REMOVE: 590 return (apic_cpu_remove(reqp)); 591 592 case PSM_CPU_STOP: 593 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid, 594 reqp->req.cpu_stop.ctx)); 595 596 default: 597 return (ENOTSUP); 598 } 599 } 600 601 #ifdef DEBUG 602 int apic_break_on_cpu = 9; 603 int apic_stretch_interrupts = 0; 604 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */ 605 #endif /* DEBUG */ 606 607 /* 608 * generates an interprocessor interrupt to another CPU. Any changes made to 609 * this routine must be accompanied by similar changes to 610 * apic_common_send_ipi(). 611 */ 612 void 613 apic_send_ipi(int cpun, int ipl) 614 { 615 int vector; 616 ulong_t flag; 617 618 vector = apic_resv_vector[ipl]; 619 620 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR)); 621 622 flag = intr_clear(); 623 624 APIC_AV_PENDING_SET(); 625 626 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id, 627 vector); 628 629 intr_restore(flag); 630 } 631 632 633 /*ARGSUSED*/ 634 void 635 apic_set_idlecpu(processorid_t cpun) 636 { 637 } 638 639 /*ARGSUSED*/ 640 void 641 apic_unset_idlecpu(processorid_t cpun) 642 { 643 } 644 645 646 void 647 apic_ret() 648 { 649 } 650 651 /* 652 * If apic_coarse_time == 1, then apic_gettime() is used instead of 653 * apic_gethrtime(). This is used for performance instead of accuracy. 654 */ 655 656 hrtime_t 657 apic_gettime() 658 { 659 int old_hrtime_stamp; 660 hrtime_t temp; 661 662 /* 663 * In one-shot mode, we do not keep time, so if anyone 664 * calls psm_gettime() directly, we vector over to 665 * gethrtime(). 666 * one-shot mode MUST NOT be enabled if this psm is the source of 667 * hrtime. 668 */ 669 670 if (apic_oneshot) 671 return (gethrtime()); 672 673 674 gettime_again: 675 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 676 apic_ret(); 677 678 temp = apic_nsec_since_boot; 679 680 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 681 goto gettime_again; 682 } 683 return (temp); 684 } 685 686 /* 687 * Here we return the number of nanoseconds since booting. Note every 688 * clock interrupt increments apic_nsec_since_boot by the appropriate 689 * amount. 690 */ 691 hrtime_t 692 apic_gethrtime(void) 693 { 694 int curr_timeval, countval, elapsed_ticks; 695 int old_hrtime_stamp, status; 696 hrtime_t temp; 697 uint32_t cpun; 698 ulong_t oflags; 699 700 /* 701 * In one-shot mode, we do not keep time, so if anyone 702 * calls psm_gethrtime() directly, we vector over to 703 * gethrtime(). 704 * one-shot mode MUST NOT be enabled if this psm is the source of 705 * hrtime. 706 */ 707 708 if (apic_oneshot) 709 return (gethrtime()); 710 711 oflags = intr_clear(); /* prevent migration */ 712 713 cpun = apic_reg_ops->apic_read(APIC_LID_REG); 714 if (apic_mode == LOCAL_APIC) 715 cpun >>= APIC_ID_BIT_OFFSET; 716 717 lock_set(&apic_gethrtime_lock); 718 719 gethrtime_again: 720 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1) 721 apic_ret(); 722 723 /* 724 * Check to see which CPU we are on. Note the time is kept on 725 * the local APIC of CPU 0. If on CPU 0, simply read the current 726 * counter. If on another CPU, issue a remote read command to CPU 0. 727 */ 728 if (cpun == apic_cpus[0].aci_local_id) { 729 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT); 730 } else { 731 #ifdef DEBUG 732 APIC_AV_PENDING_SET(); 733 #else 734 if (apic_mode == LOCAL_APIC) 735 APIC_AV_PENDING_SET(); 736 #endif /* DEBUG */ 737 738 apic_reg_ops->apic_write_int_cmd( 739 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE); 740 741 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1)) 742 & AV_READ_PENDING) { 743 apic_ret(); 744 } 745 746 if (status & AV_REMOTE_STATUS) /* 1 = valid */ 747 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ); 748 else { /* 0 = invalid */ 749 apic_remote_hrterr++; 750 /* 751 * return last hrtime right now, will need more 752 * testing if change to retry 753 */ 754 temp = apic_last_hrtime; 755 756 lock_clear(&apic_gethrtime_lock); 757 758 intr_restore(oflags); 759 760 return (temp); 761 } 762 } 763 if (countval > last_count_read) 764 countval = 0; 765 else 766 last_count_read = countval; 767 768 elapsed_ticks = apic_hertz_count - countval; 769 770 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks); 771 temp = apic_nsec_since_boot + curr_timeval; 772 773 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */ 774 /* we might have clobbered last_count_read. Restore it */ 775 last_count_read = apic_hertz_count; 776 goto gethrtime_again; 777 } 778 779 if (temp < apic_last_hrtime) { 780 /* return last hrtime if error occurs */ 781 apic_hrtime_error++; 782 temp = apic_last_hrtime; 783 } 784 else 785 apic_last_hrtime = temp; 786 787 lock_clear(&apic_gethrtime_lock); 788 intr_restore(oflags); 789 790 return (temp); 791 } 792 793 /* apic NMI handler */ 794 /*ARGSUSED*/ 795 void 796 apic_nmi_intr(caddr_t arg, struct regs *rp) 797 { 798 if (apic_shutdown_processors) { 799 apic_disable_local_apic(); 800 return; 801 } 802 803 apic_error |= APIC_ERR_NMI; 804 805 if (!lock_try(&apic_nmi_lock)) 806 return; 807 apic_num_nmis++; 808 809 if (apic_kmdb_on_nmi && psm_debugger()) { 810 debug_enter("NMI received: entering kmdb\n"); 811 } else if (apic_panic_on_nmi) { 812 /* Keep panic from entering kmdb. */ 813 nopanicdebug = 1; 814 panic("NMI received\n"); 815 } else { 816 /* 817 * prom_printf is the best shot we have of something which is 818 * problem free from high level/NMI type of interrupts 819 */ 820 prom_printf("NMI received\n"); 821 } 822 823 lock_clear(&apic_nmi_lock); 824 } 825 826 processorid_t 827 apic_get_next_processorid(processorid_t cpu_id) 828 { 829 830 int i; 831 832 if (cpu_id == -1) 833 return ((processorid_t)0); 834 835 for (i = cpu_id + 1; i < NCPU; i++) { 836 if (apic_cpu_in_range(i)) 837 return (i); 838 } 839 840 return ((processorid_t)-1); 841 } 842 843 int 844 apic_cpu_add(psm_cpu_request_t *reqp) 845 { 846 int i, rv = 0; 847 ulong_t iflag; 848 boolean_t first = B_TRUE; 849 uchar_t localver = 0; 850 uint32_t localid, procid; 851 processorid_t cpuid = (processorid_t)-1; 852 mach_cpu_add_arg_t *ap; 853 854 ASSERT(reqp != NULL); 855 reqp->req.cpu_add.cpuid = (processorid_t)-1; 856 857 /* Check whether CPU hotplug is supported. */ 858 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 859 return (ENOTSUP); 860 } 861 862 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp; 863 switch (ap->type) { 864 case MACH_CPU_ARG_LOCAL_APIC: 865 localid = ap->arg.apic.apic_id; 866 procid = ap->arg.apic.proc_id; 867 if (localid >= 255 || procid > 255) { 868 cmn_err(CE_WARN, 869 "!apic: apicid(%u) or procid(%u) is invalid.", 870 localid, procid); 871 return (EINVAL); 872 } 873 break; 874 875 case MACH_CPU_ARG_LOCAL_X2APIC: 876 localid = ap->arg.apic.apic_id; 877 procid = ap->arg.apic.proc_id; 878 if (localid >= UINT32_MAX) { 879 cmn_err(CE_WARN, 880 "!apic: x2apicid(%u) is invalid.", localid); 881 return (EINVAL); 882 } else if (localid >= 255 && apic_mode == LOCAL_APIC) { 883 cmn_err(CE_WARN, "!apic: system is in APIC mode, " 884 "can't support x2APIC processor."); 885 return (ENOTSUP); 886 } 887 break; 888 889 default: 890 cmn_err(CE_WARN, 891 "!apic: unknown argument type %d to apic_cpu_add().", 892 ap->type); 893 return (EINVAL); 894 } 895 896 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 897 iflag = intr_clear(); 898 lock_set(&apic_ioapic_lock); 899 900 /* Check whether local APIC id already exists. */ 901 for (i = 0; i < apic_nproc; i++) { 902 if (!CPU_IN_SET(apic_cpumask, i)) 903 continue; 904 if (apic_cpus[i].aci_local_id == localid) { 905 lock_clear(&apic_ioapic_lock); 906 intr_restore(iflag); 907 cmn_err(CE_WARN, 908 "!apic: local apic id %u already exists.", 909 localid); 910 return (EEXIST); 911 } else if (apic_cpus[i].aci_processor_id == procid) { 912 lock_clear(&apic_ioapic_lock); 913 intr_restore(iflag); 914 cmn_err(CE_WARN, 915 "!apic: processor id %u already exists.", 916 (int)procid); 917 return (EEXIST); 918 } 919 920 /* 921 * There's no local APIC version number available in MADT table, 922 * so assume that all CPUs are homogeneous and use local APIC 923 * version number of the first existing CPU. 924 */ 925 if (first) { 926 first = B_FALSE; 927 localver = apic_cpus[i].aci_local_ver; 928 } 929 } 930 ASSERT(first == B_FALSE); 931 932 /* 933 * Try to assign the same cpuid if APIC id exists in the dirty cache. 934 */ 935 for (i = 0; i < apic_max_nproc; i++) { 936 if (CPU_IN_SET(apic_cpumask, i)) { 937 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0); 938 continue; 939 } 940 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE); 941 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) && 942 apic_cpus[i].aci_local_id == localid && 943 apic_cpus[i].aci_processor_id == procid) { 944 cpuid = i; 945 break; 946 } 947 } 948 949 /* Avoid the dirty cache and allocate fresh slot if possible. */ 950 if (cpuid == (processorid_t)-1) { 951 for (i = 0; i < apic_max_nproc; i++) { 952 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) && 953 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) { 954 cpuid = i; 955 break; 956 } 957 } 958 } 959 960 /* Try to find any free slot as last resort. */ 961 if (cpuid == (processorid_t)-1) { 962 for (i = 0; i < apic_max_nproc; i++) { 963 if (apic_cpus[i].aci_status & APIC_CPU_FREE) { 964 cpuid = i; 965 break; 966 } 967 } 968 } 969 970 if (cpuid == (processorid_t)-1) { 971 lock_clear(&apic_ioapic_lock); 972 intr_restore(iflag); 973 cmn_err(CE_NOTE, 974 "!apic: failed to allocate cpu id for processor %u.", 975 procid); 976 rv = EAGAIN; 977 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) { 978 lock_clear(&apic_ioapic_lock); 979 intr_restore(iflag); 980 cmn_err(CE_NOTE, 981 "!apic: failed to build mapping for processor %u.", 982 procid); 983 rv = EBUSY; 984 } else { 985 ASSERT(cpuid >= 0 && cpuid < NCPU); 986 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus); 987 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0])); 988 apic_cpus[cpuid].aci_processor_id = procid; 989 apic_cpus[cpuid].aci_local_id = localid; 990 apic_cpus[cpuid].aci_local_ver = localver; 991 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid); 992 if (cpuid >= apic_nproc) { 993 apic_nproc = cpuid + 1; 994 } 995 lock_clear(&apic_ioapic_lock); 996 intr_restore(iflag); 997 reqp->req.cpu_add.cpuid = cpuid; 998 } 999 1000 return (rv); 1001 } 1002 1003 int 1004 apic_cpu_remove(psm_cpu_request_t *reqp) 1005 { 1006 int i; 1007 ulong_t iflag; 1008 processorid_t cpuid; 1009 1010 /* Check whether CPU hotplug is supported. */ 1011 if (!plat_dr_support_cpu() || apic_max_nproc == -1) { 1012 return (ENOTSUP); 1013 } 1014 1015 cpuid = reqp->req.cpu_remove.cpuid; 1016 1017 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */ 1018 iflag = intr_clear(); 1019 lock_set(&apic_ioapic_lock); 1020 1021 if (!apic_cpu_in_range(cpuid)) { 1022 lock_clear(&apic_ioapic_lock); 1023 intr_restore(iflag); 1024 cmn_err(CE_WARN, 1025 "!apic: cpuid %d doesn't exist in apic_cpus array.", 1026 cpuid); 1027 return (ENODEV); 1028 } 1029 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0); 1030 1031 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) { 1032 lock_clear(&apic_ioapic_lock); 1033 intr_restore(iflag); 1034 return (ENOENT); 1035 } 1036 1037 if (cpuid == apic_nproc - 1) { 1038 /* 1039 * We are removing the highest numbered cpuid so we need to 1040 * find the next highest cpuid as the new value for apic_nproc. 1041 */ 1042 for (i = apic_nproc; i > 0; i--) { 1043 if (CPU_IN_SET(apic_cpumask, i - 1)) { 1044 apic_nproc = i; 1045 break; 1046 } 1047 } 1048 /* at least one CPU left */ 1049 ASSERT(i > 0); 1050 } 1051 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid); 1052 /* mark slot as free and keep it in the dirty cache */ 1053 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY; 1054 1055 lock_clear(&apic_ioapic_lock); 1056 intr_restore(iflag); 1057 1058 return (0); 1059 } 1060 1061 /* 1062 * Return the number of ticks the APIC decrements in SF nanoseconds. 1063 * The fixed-frequency PIT (aka 8254) is used for the measurement. 1064 */ 1065 static uint64_t 1066 apic_calibrate_impl() 1067 { 1068 uint8_t pit_tick_lo; 1069 uint16_t pit_tick, target_pit_tick, pit_ticks_adj; 1070 uint32_t pit_ticks; 1071 uint32_t start_apic_tick, end_apic_tick, apic_ticks; 1072 ulong_t iflag; 1073 1074 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init); 1075 apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL); 1076 1077 iflag = intr_clear(); 1078 1079 do { 1080 pit_tick_lo = inb(PITCTR0_PORT); 1081 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1082 } while (pit_tick < APIC_TIME_MIN || 1083 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX); 1084 1085 /* 1086 * Wait for the PIT to decrement by 5 ticks to ensure 1087 * we didn't start in the middle of a tick. 1088 * Compare with 0x10 for the wrap around case. 1089 */ 1090 target_pit_tick = pit_tick - 5; 1091 do { 1092 pit_tick_lo = inb(PITCTR0_PORT); 1093 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1094 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1095 1096 start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1097 1098 /* 1099 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks 1100 */ 1101 target_pit_tick = pit_tick - APIC_TIME_COUNT; 1102 do { 1103 pit_tick_lo = inb(PITCTR0_PORT); 1104 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo; 1105 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10); 1106 1107 end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT); 1108 1109 intr_restore(iflag); 1110 1111 apic_ticks = start_apic_tick - end_apic_tick; 1112 1113 /* The PIT might have decremented by more ticks than planned */ 1114 pit_ticks_adj = target_pit_tick - pit_tick; 1115 /* total number of PIT ticks corresponding to apic_ticks */ 1116 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj; 1117 1118 /* 1119 * Determine the number of nanoseconds per APIC clock tick 1120 * and then determine how many APIC ticks to interrupt at the 1121 * desired frequency 1122 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s 1123 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s 1124 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9) 1125 * apic_ticks_per_SFns = 1126 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9) 1127 */ 1128 return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC)); 1129 } 1130 1131 /* 1132 * It was found empirically that 5 measurements seem sufficient to give a good 1133 * accuracy. Most spurious measurements are higher than the target value thus 1134 * we eliminate up to 2/5 spurious measurements. 1135 */ 1136 #define APIC_CALIBRATE_MEASUREMENTS 5 1137 1138 #define APIC_CALIBRATE_PERCENT_OFF_WARNING 10 1139 1140 /* 1141 * Return the number of ticks the APIC decrements in SF nanoseconds. 1142 * Several measurements are taken to filter out outliers. 1143 */ 1144 uint64_t 1145 apic_calibrate() 1146 { 1147 uint64_t measurements[APIC_CALIBRATE_MEASUREMENTS]; 1148 int median_idx; 1149 uint64_t median; 1150 1151 /* 1152 * When running under a virtual machine, the emulated PIT and APIC 1153 * counters do not always return the right values and can roll over. 1154 * Those spurious measurements are relatively rare but could 1155 * significantly affect the calibration. 1156 * Therefore we take several measurements and then keep the median. 1157 * The median is preferred to the average here as we only want to 1158 * discard outliers. 1159 */ 1160 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) 1161 measurements[i] = apic_calibrate_impl(); 1162 1163 /* 1164 * sort results and retrieve median. 1165 */ 1166 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) { 1167 for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) { 1168 if (measurements[j] < measurements[i]) { 1169 uint64_t tmp = measurements[i]; 1170 measurements[i] = measurements[j]; 1171 measurements[j] = tmp; 1172 } 1173 } 1174 } 1175 median_idx = APIC_CALIBRATE_MEASUREMENTS / 2; 1176 median = measurements[median_idx]; 1177 1178 #if (APIC_CALIBRATE_MEASUREMENTS >= 3) 1179 /* 1180 * Check that measurements are consistent. Post a warning 1181 * if the three middle values are not close to each other. 1182 */ 1183 uint64_t delta_warn = median * 1184 APIC_CALIBRATE_PERCENT_OFF_WARNING / 100; 1185 if ((median - measurements[median_idx - 1]) > delta_warn || 1186 (measurements[median_idx + 1] - median) > delta_warn) { 1187 cmn_err(CE_WARN, "apic_calibrate measurements lack " 1188 "precision: %llu, %llu, %llu.", 1189 (u_longlong_t)measurements[median_idx - 1], 1190 (u_longlong_t)median, 1191 (u_longlong_t)measurements[median_idx + 1]); 1192 } 1193 #endif 1194 1195 return (median); 1196 } 1197 1198 /* 1199 * Initialise the APIC timer on the local APIC of CPU 0 to the desired 1200 * frequency. Note at this stage in the boot sequence, the boot processor 1201 * is the only active processor. 1202 * hertz value of 0 indicates a one-shot mode request. In this case 1203 * the function returns the resolution (in nanoseconds) for the hardware 1204 * timer interrupt. If one-shot mode capability is not available, 1205 * the return value will be 0. apic_enable_oneshot is a global switch 1206 * for disabling the functionality. 1207 * A non-zero positive value for hertz indicates a periodic mode request. 1208 * In this case the hardware will be programmed to generate clock interrupts 1209 * at hertz frequency and returns the resolution of interrupts in 1210 * nanosecond. 1211 */ 1212 1213 int 1214 apic_clkinit(int hertz) 1215 { 1216 int ret; 1217 1218 apic_int_busy_mark = (apic_int_busy_mark * 1219 apic_sample_factor_redistribution) / 100; 1220 apic_int_free_mark = (apic_int_free_mark * 1221 apic_sample_factor_redistribution) / 100; 1222 apic_diff_for_redistribution = (apic_diff_for_redistribution * 1223 apic_sample_factor_redistribution) / 100; 1224 1225 ret = apic_timer_init(hertz); 1226 return (ret); 1227 1228 } 1229 1230 /* 1231 * apic_preshutdown: 1232 * Called early in shutdown whilst we can still access filesystems to do 1233 * things like loading modules which will be required to complete shutdown 1234 * after filesystems are all unmounted. 1235 */ 1236 void 1237 apic_preshutdown(int cmd, int fcn) 1238 { 1239 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n", 1240 cmd, fcn, apic_poweroff_method, apic_enable_acpi)); 1241 } 1242 1243 void 1244 apic_shutdown(int cmd, int fcn) 1245 { 1246 int restarts, attempts; 1247 int i; 1248 uchar_t byte; 1249 ulong_t iflag; 1250 1251 hpet_acpi_fini(); 1252 1253 /* Send NMI to all CPUs except self to do per processor shutdown */ 1254 iflag = intr_clear(); 1255 #ifdef DEBUG 1256 APIC_AV_PENDING_SET(); 1257 #else 1258 if (apic_mode == LOCAL_APIC) 1259 APIC_AV_PENDING_SET(); 1260 #endif /* DEBUG */ 1261 apic_shutdown_processors = 1; 1262 apic_reg_ops->apic_write(APIC_INT_CMD1, 1263 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF); 1264 1265 /* restore cmos shutdown byte before reboot */ 1266 if (apic_cmos_ssb_set) { 1267 outb(CMOS_ADDR, SSB); 1268 outb(CMOS_DATA, 0); 1269 } 1270 1271 ioapic_disable_redirection(); 1272 1273 /* disable apic mode if imcr present */ 1274 if (apic_imcrp) { 1275 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT); 1276 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC); 1277 } 1278 1279 apic_disable_local_apic(); 1280 1281 intr_restore(iflag); 1282 1283 /* remainder of function is for shutdown cases only */ 1284 if (cmd != A_SHUTDOWN) 1285 return; 1286 1287 /* 1288 * Switch system back into Legacy-Mode if using ACPI and 1289 * not powering-off. Some BIOSes need to remain in ACPI-mode 1290 * for power-off to succeed (Dell Dimension 4600) 1291 * Do not disable ACPI while doing fastreboot 1292 */ 1293 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT) 1294 (void) AcpiDisable(); 1295 1296 if (fcn == AD_FASTREBOOT) { 1297 apic_reg_ops->apic_write(APIC_INT_CMD1, 1298 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF); 1299 } 1300 1301 /* remainder of function is for shutdown+poweroff case only */ 1302 if (fcn != AD_POWEROFF) 1303 return; 1304 1305 switch (apic_poweroff_method) { 1306 case APIC_POWEROFF_VIA_RTC: 1307 1308 /* select the extended NVRAM bank in the RTC */ 1309 outb(CMOS_ADDR, RTC_REGA); 1310 byte = inb(CMOS_DATA); 1311 outb(CMOS_DATA, (byte | EXT_BANK)); 1312 1313 outb(CMOS_ADDR, PFR_REG); 1314 1315 /* for Predator must toggle the PAB bit */ 1316 byte = inb(CMOS_DATA); 1317 1318 /* 1319 * clear power active bar, wakeup alarm and 1320 * kickstart 1321 */ 1322 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG); 1323 outb(CMOS_DATA, byte); 1324 1325 /* delay before next write */ 1326 drv_usecwait(1000); 1327 1328 /* for S40 the following would suffice */ 1329 byte = inb(CMOS_DATA); 1330 1331 /* power active bar control bit */ 1332 byte |= PAB_CBIT; 1333 outb(CMOS_DATA, byte); 1334 1335 break; 1336 1337 case APIC_POWEROFF_VIA_ASPEN_BMC: 1338 restarts = 0; 1339 restart_aspen_bmc: 1340 if (++restarts == 3) 1341 break; 1342 attempts = 0; 1343 do { 1344 byte = inb(MISMIC_FLAG_REGISTER); 1345 byte &= MISMIC_BUSY_MASK; 1346 if (byte != 0) { 1347 drv_usecwait(1000); 1348 if (attempts >= 3) 1349 goto restart_aspen_bmc; 1350 ++attempts; 1351 } 1352 } while (byte != 0); 1353 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS); 1354 byte = inb(MISMIC_FLAG_REGISTER); 1355 byte |= 0x1; 1356 outb(MISMIC_FLAG_REGISTER, byte); 1357 i = 0; 1358 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0])); 1359 i++) { 1360 attempts = 0; 1361 do { 1362 byte = inb(MISMIC_FLAG_REGISTER); 1363 byte &= MISMIC_BUSY_MASK; 1364 if (byte != 0) { 1365 drv_usecwait(1000); 1366 if (attempts >= 3) 1367 goto restart_aspen_bmc; 1368 ++attempts; 1369 } 1370 } while (byte != 0); 1371 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl); 1372 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data); 1373 byte = inb(MISMIC_FLAG_REGISTER); 1374 byte |= 0x1; 1375 outb(MISMIC_FLAG_REGISTER, byte); 1376 } 1377 break; 1378 1379 case APIC_POWEROFF_VIA_SITKA_BMC: 1380 restarts = 0; 1381 restart_sitka_bmc: 1382 if (++restarts == 3) 1383 break; 1384 attempts = 0; 1385 do { 1386 byte = inb(SMS_STATUS_REGISTER); 1387 byte &= SMS_STATE_MASK; 1388 if ((byte == SMS_READ_STATE) || 1389 (byte == SMS_WRITE_STATE)) { 1390 drv_usecwait(1000); 1391 if (attempts >= 3) 1392 goto restart_sitka_bmc; 1393 ++attempts; 1394 } 1395 } while ((byte == SMS_READ_STATE) || 1396 (byte == SMS_WRITE_STATE)); 1397 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS); 1398 i = 0; 1399 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0])); 1400 i++) { 1401 attempts = 0; 1402 do { 1403 byte = inb(SMS_STATUS_REGISTER); 1404 byte &= SMS_IBF_MASK; 1405 if (byte != 0) { 1406 drv_usecwait(1000); 1407 if (attempts >= 3) 1408 goto restart_sitka_bmc; 1409 ++attempts; 1410 } 1411 } while (byte != 0); 1412 outb(sitka_bmc[i].port, sitka_bmc[i].data); 1413 } 1414 break; 1415 1416 case APIC_POWEROFF_NONE: 1417 1418 /* If no APIC direct method, we will try using ACPI */ 1419 if (apic_enable_acpi) { 1420 if (acpi_poweroff() == 1) 1421 return; 1422 } else 1423 return; 1424 1425 break; 1426 } 1427 /* 1428 * Wait a limited time here for power to go off. 1429 * If the power does not go off, then there was a 1430 * problem and we should continue to the halt which 1431 * prints a message for the user to press a key to 1432 * reboot. 1433 */ 1434 drv_usecwait(7000000); /* wait seven seconds */ 1435 1436 } 1437 1438 cyclic_id_t apic_cyclic_id; 1439 1440 /* 1441 * The following functions are in the platform specific file so that they 1442 * can be different functions depending on whether we are running on 1443 * bare metal or a hypervisor. 1444 */ 1445 1446 /* 1447 * map an apic for memory-mapped access 1448 */ 1449 uint32_t * 1450 mapin_apic(uint32_t addr, size_t len, int flags) 1451 { 1452 return ((void *)psm_map_phys(addr, len, flags)); 1453 } 1454 1455 uint32_t * 1456 mapin_ioapic(uint32_t addr, size_t len, int flags) 1457 { 1458 return (mapin_apic(addr, len, flags)); 1459 } 1460 1461 /* 1462 * unmap an apic 1463 */ 1464 void 1465 mapout_apic(caddr_t addr, size_t len) 1466 { 1467 psm_unmap_phys(addr, len); 1468 } 1469 1470 void 1471 mapout_ioapic(caddr_t addr, size_t len) 1472 { 1473 mapout_apic(addr, len); 1474 } 1475 1476 uint32_t 1477 ioapic_read(int ioapic_ix, uint32_t reg) 1478 { 1479 volatile uint32_t *ioapic; 1480 1481 ioapic = apicioadr[ioapic_ix]; 1482 ioapic[APIC_IO_REG] = reg; 1483 return (ioapic[APIC_IO_DATA]); 1484 } 1485 1486 void 1487 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value) 1488 { 1489 volatile uint32_t *ioapic; 1490 1491 ioapic = apicioadr[ioapic_ix]; 1492 ioapic[APIC_IO_REG] = reg; 1493 ioapic[APIC_IO_DATA] = value; 1494 } 1495 1496 void 1497 ioapic_write_eoi(int ioapic_ix, uint32_t value) 1498 { 1499 volatile uint32_t *ioapic; 1500 1501 ioapic = apicioadr[ioapic_ix]; 1502 ioapic[APIC_IO_EOI] = value; 1503 } 1504 1505 /* 1506 * Round-robin algorithm to find the next CPU with interrupts enabled. 1507 * It can't share the same static variable apic_next_bind_cpu with 1508 * apic_get_next_bind_cpu(), since that will cause all interrupts to be 1509 * bound to CPU1 at boot time. During boot, only CPU0 is online with 1510 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu() 1511 * are called. However, the pcplusmp driver assumes that there will be 1512 * boot_ncpus CPUs configured eventually so it tries to distribute all 1513 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all 1514 * interrupts being targetted at CPU1, we need to use a dedicated static 1515 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu. 1516 */ 1517 1518 processorid_t 1519 apic_find_cpu(int flag) 1520 { 1521 int i; 1522 static processorid_t acid = 0; 1523 1524 /* Find the first CPU with the passed-in flag set */ 1525 for (i = 0; i < apic_nproc; i++) { 1526 if (++acid >= apic_nproc) { 1527 acid = 0; 1528 } 1529 if (apic_cpu_in_range(acid) && 1530 (apic_cpus[acid].aci_status & flag)) { 1531 break; 1532 } 1533 } 1534 1535 ASSERT((apic_cpus[acid].aci_status & flag) != 0); 1536 return (acid); 1537 } 1538 1539 void 1540 apic_intrmap_init(int apic_mode) 1541 { 1542 int suppress_brdcst_eoi = 0; 1543 1544 /* 1545 * Intel Software Developer's Manual 3A, 10.12.7: 1546 * 1547 * Routing of device interrupts to local APIC units operating in 1548 * x2APIC mode requires use of the interrupt-remapping architecture 1549 * specified in the Intel Virtualization Technology for Directed 1550 * I/O, Revision 1.3. Because of this, BIOS must enumerate support 1551 * for and software must enable this interrupt remapping with 1552 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in 1553 * the local APIC units. 1554 * 1555 * 1556 * In other words, to use the APIC in x2APIC mode, we need interrupt 1557 * remapping. Since we don't start up the IOMMU by default, we 1558 * won't be able to do any interrupt remapping and therefore have to 1559 * use the APIC in traditional 'local APIC' mode with memory mapped 1560 * I/O. 1561 */ 1562 1563 if (psm_vt_ops != NULL) { 1564 if (((apic_intrmap_ops_t *)psm_vt_ops)-> 1565 apic_intrmap_init(apic_mode) == DDI_SUCCESS) { 1566 1567 apic_vt_ops = psm_vt_ops; 1568 1569 /* 1570 * We leverage the interrupt remapping engine to 1571 * suppress broadcast EOI; thus we must send the 1572 * directed EOI with the directed-EOI handler. 1573 */ 1574 if (apic_directed_EOI_supported() == 0) { 1575 suppress_brdcst_eoi = 1; 1576 } 1577 1578 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi); 1579 1580 if (apic_detect_x2apic()) { 1581 apic_enable_x2apic(); 1582 } 1583 1584 if (apic_directed_EOI_supported() == 0) { 1585 apic_set_directed_EOI_handler(); 1586 } 1587 } 1588 } 1589 } 1590 1591 /*ARGSUSED*/ 1592 static void 1593 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt) 1594 { 1595 irdt->ir_hi <<= APIC_ID_BIT_OFFSET; 1596 } 1597 1598 /*ARGSUSED*/ 1599 static void 1600 apic_record_msi(void *intrmap_private, msi_regs_t *mregs) 1601 { 1602 mregs->mr_addr = MSI_ADDR_HDR | 1603 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) | 1604 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) | 1605 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT); 1606 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) | 1607 mregs->mr_data; 1608 } 1609 1610 /* 1611 * Functions from apic_introp.c 1612 * 1613 * Those functions are used by apic_intr_ops(). 1614 */ 1615 1616 /* 1617 * MSI support flag: 1618 * reflects whether MSI is supported at APIC level 1619 * it can also be patched through /etc/system 1620 * 1621 * 0 = default value - don't know and need to call apic_check_msi_support() 1622 * to find out then set it accordingly 1623 * 1 = supported 1624 * -1 = not supported 1625 */ 1626 int apic_support_msi = 0; 1627 1628 /* Multiple vector support for MSI-X */ 1629 int apic_msix_enable = 1; 1630 1631 /* Multiple vector support for MSI */ 1632 int apic_multi_msi_enable = 1; 1633 1634 /* 1635 * Check whether the system supports MSI. 1636 * 1637 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find 1638 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we 1639 * return PSM_SUCCESS to indicate this system supports MSI. 1640 * 1641 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is 1642 * by detecting if we are running inside the KVM hypervisor, which guarantees 1643 * this version number.) 1644 */ 1645 int 1646 apic_check_msi_support() 1647 { 1648 dev_info_t *cdip; 1649 char dev_type[16]; 1650 int dev_len; 1651 1652 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n")); 1653 1654 /* 1655 * check whether the first level children of root_node have 1656 * PCI-E or PCI capability. 1657 */ 1658 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL; 1659 cdip = ddi_get_next_sibling(cdip)) { 1660 1661 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p," 1662 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip, 1663 ddi_driver_name(cdip), ddi_binding_name(cdip), 1664 ddi_node_name(cdip))); 1665 dev_len = sizeof (dev_type); 1666 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS, 1667 "device_type", (caddr_t)dev_type, &dev_len) 1668 != DDI_PROP_SUCCESS) 1669 continue; 1670 if (strcmp(dev_type, "pciex") == 0) 1671 return (PSM_SUCCESS); 1672 if (strcmp(dev_type, "pci") == 0 && get_hwenv() == HW_KVM) 1673 return (PSM_SUCCESS); 1674 } 1675 1676 /* MSI is not supported on this system */ 1677 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' " 1678 "device_type found\n")); 1679 return (PSM_FAILURE); 1680 } 1681 1682 /* 1683 * apic_pci_msi_unconfigure: 1684 * 1685 * This and next two interfaces are copied from pci_intr_lib.c 1686 * Do ensure that these two files stay in sync. 1687 * These needed to be copied over here to avoid a deadlock situation on 1688 * certain mp systems that use MSI interrupts. 1689 * 1690 * IMPORTANT regards next three interfaces: 1691 * i) are called only for MSI/X interrupts. 1692 * ii) called with interrupts disabled, and must not block 1693 */ 1694 void 1695 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum) 1696 { 1697 ushort_t msi_ctrl; 1698 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1699 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1700 1701 ASSERT((handle != NULL) && (cap_ptr != 0)); 1702 1703 if (type == DDI_INTR_TYPE_MSI) { 1704 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1705 msi_ctrl &= (~PCI_MSI_MME_MASK); 1706 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1707 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0); 1708 1709 if (msi_ctrl & PCI_MSI_64BIT_MASK) { 1710 pci_config_put16(handle, 1711 cap_ptr + PCI_MSI_64BIT_DATA, 0); 1712 pci_config_put32(handle, 1713 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0); 1714 } else { 1715 pci_config_put16(handle, 1716 cap_ptr + PCI_MSI_32BIT_DATA, 0); 1717 } 1718 1719 } else if (type == DDI_INTR_TYPE_MSIX) { 1720 uintptr_t off; 1721 uint32_t mask; 1722 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip); 1723 1724 ASSERT(msix_p != NULL); 1725 1726 /* Offset into "inum"th entry in the MSI-X table & mask it */ 1727 off = (uintptr_t)msix_p->msix_tbl_addr + (inum * 1728 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET; 1729 1730 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off); 1731 1732 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1)); 1733 1734 /* Offset into the "inum"th entry in the MSI-X table */ 1735 off = (uintptr_t)msix_p->msix_tbl_addr + 1736 (inum * PCI_MSIX_VECTOR_SIZE); 1737 1738 /* Reset the "data" and "addr" bits */ 1739 ddi_put32(msix_p->msix_tbl_hdl, 1740 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0); 1741 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0); 1742 } 1743 } 1744 1745 /* 1746 * apic_pci_msi_disable_mode: 1747 */ 1748 void 1749 apic_pci_msi_disable_mode(dev_info_t *rdip, int type) 1750 { 1751 ushort_t msi_ctrl; 1752 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip); 1753 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip); 1754 1755 ASSERT((handle != NULL) && (cap_ptr != 0)); 1756 1757 if (type == DDI_INTR_TYPE_MSI) { 1758 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL); 1759 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT)) 1760 return; 1761 1762 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */ 1763 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl); 1764 1765 } else if (type == DDI_INTR_TYPE_MSIX) { 1766 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL); 1767 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) { 1768 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT; 1769 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL, 1770 msi_ctrl); 1771 } 1772 } 1773 } 1774 1775 uint32_t 1776 apic_get_localapicid(uint32_t cpuid) 1777 { 1778 ASSERT(cpuid < apic_nproc && apic_cpus != NULL); 1779 1780 return (apic_cpus[cpuid].aci_local_id); 1781 } 1782 1783 uchar_t 1784 apic_get_ioapicid(uchar_t ioapicindex) 1785 { 1786 ASSERT(ioapicindex < MAX_IO_APIC); 1787 1788 return (apic_io_id[ioapicindex]); 1789 } 1790