1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/errno.h> 4 #include <linux/kernel.h> 5 #include <linux/mm.h> 6 #include <linux/smp.h> 7 #include <linux/prctl.h> 8 #include <linux/slab.h> 9 #include <linux/sched.h> 10 #include <linux/module.h> 11 #include <linux/pm.h> 12 #include <linux/clockchips.h> 13 #include <linux/random.h> 14 #include <linux/user-return-notifier.h> 15 #include <linux/dmi.h> 16 #include <linux/utsname.h> 17 #include <linux/stackprotector.h> 18 #include <linux/tick.h> 19 #include <linux/cpuidle.h> 20 #include <trace/events/power.h> 21 #include <linux/hw_breakpoint.h> 22 #include <asm/cpu.h> 23 #include <asm/apic.h> 24 #include <asm/syscalls.h> 25 #include <asm/idle.h> 26 #include <asm/uaccess.h> 27 #include <asm/i387.h> 28 #include <asm/fpu-internal.h> 29 #include <asm/debugreg.h> 30 #include <asm/nmi.h> 31 32 /* 33 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 34 * no more per-task TSS's. The TSS size is kept cacheline-aligned 35 * so they are allowed to end up in the .data..cacheline_aligned 36 * section. Since TSS's are completely CPU-local, we want them 37 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 38 */ 39 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; 40 41 #ifdef CONFIG_X86_64 42 static DEFINE_PER_CPU(unsigned char, is_idle); 43 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 44 45 void idle_notifier_register(struct notifier_block *n) 46 { 47 atomic_notifier_chain_register(&idle_notifier, n); 48 } 49 EXPORT_SYMBOL_GPL(idle_notifier_register); 50 51 void idle_notifier_unregister(struct notifier_block *n) 52 { 53 atomic_notifier_chain_unregister(&idle_notifier, n); 54 } 55 EXPORT_SYMBOL_GPL(idle_notifier_unregister); 56 #endif 57 58 struct kmem_cache *task_xstate_cachep; 59 EXPORT_SYMBOL_GPL(task_xstate_cachep); 60 61 /* 62 * this gets called so that we can store lazy state into memory and copy the 63 * current task into the new thread. 64 */ 65 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 66 { 67 int ret; 68 69 *dst = *src; 70 if (fpu_allocated(&src->thread.fpu)) { 71 memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); 72 ret = fpu_alloc(&dst->thread.fpu); 73 if (ret) 74 return ret; 75 fpu_copy(dst, src); 76 } 77 return 0; 78 } 79 80 void free_thread_xstate(struct task_struct *tsk) 81 { 82 fpu_free(&tsk->thread.fpu); 83 } 84 85 void arch_release_task_struct(struct task_struct *tsk) 86 { 87 free_thread_xstate(tsk); 88 } 89 90 void arch_task_cache_init(void) 91 { 92 task_xstate_cachep = 93 kmem_cache_create("task_xstate", xstate_size, 94 __alignof__(union thread_xstate), 95 SLAB_PANIC | SLAB_NOTRACK, NULL); 96 } 97 98 /* 99 * Free current thread data structures etc.. 100 */ 101 void exit_thread(void) 102 { 103 struct task_struct *me = current; 104 struct thread_struct *t = &me->thread; 105 unsigned long *bp = t->io_bitmap_ptr; 106 107 if (bp) { 108 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 109 110 t->io_bitmap_ptr = NULL; 111 clear_thread_flag(TIF_IO_BITMAP); 112 /* 113 * Careful, clear this in the TSS too: 114 */ 115 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 116 t->io_bitmap_max = 0; 117 put_cpu(); 118 kfree(bp); 119 } 120 121 drop_fpu(me); 122 } 123 124 void flush_thread(void) 125 { 126 struct task_struct *tsk = current; 127 128 flush_ptrace_hw_breakpoint(tsk); 129 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 130 drop_init_fpu(tsk); 131 /* 132 * Free the FPU state for non xsave platforms. They get reallocated 133 * lazily at the first use. 134 */ 135 if (!use_eager_fpu()) 136 free_thread_xstate(tsk); 137 } 138 139 static void hard_disable_TSC(void) 140 { 141 write_cr4(read_cr4() | X86_CR4_TSD); 142 } 143 144 void disable_TSC(void) 145 { 146 preempt_disable(); 147 if (!test_and_set_thread_flag(TIF_NOTSC)) 148 /* 149 * Must flip the CPU state synchronously with 150 * TIF_NOTSC in the current running context. 151 */ 152 hard_disable_TSC(); 153 preempt_enable(); 154 } 155 156 static void hard_enable_TSC(void) 157 { 158 write_cr4(read_cr4() & ~X86_CR4_TSD); 159 } 160 161 static void enable_TSC(void) 162 { 163 preempt_disable(); 164 if (test_and_clear_thread_flag(TIF_NOTSC)) 165 /* 166 * Must flip the CPU state synchronously with 167 * TIF_NOTSC in the current running context. 168 */ 169 hard_enable_TSC(); 170 preempt_enable(); 171 } 172 173 int get_tsc_mode(unsigned long adr) 174 { 175 unsigned int val; 176 177 if (test_thread_flag(TIF_NOTSC)) 178 val = PR_TSC_SIGSEGV; 179 else 180 val = PR_TSC_ENABLE; 181 182 return put_user(val, (unsigned int __user *)adr); 183 } 184 185 int set_tsc_mode(unsigned int val) 186 { 187 if (val == PR_TSC_SIGSEGV) 188 disable_TSC(); 189 else if (val == PR_TSC_ENABLE) 190 enable_TSC(); 191 else 192 return -EINVAL; 193 194 return 0; 195 } 196 197 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 198 struct tss_struct *tss) 199 { 200 struct thread_struct *prev, *next; 201 202 prev = &prev_p->thread; 203 next = &next_p->thread; 204 205 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ 206 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { 207 unsigned long debugctl = get_debugctlmsr(); 208 209 debugctl &= ~DEBUGCTLMSR_BTF; 210 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) 211 debugctl |= DEBUGCTLMSR_BTF; 212 213 update_debugctlmsr(debugctl); 214 } 215 216 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 217 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 218 /* prev and next are different */ 219 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 220 hard_disable_TSC(); 221 else 222 hard_enable_TSC(); 223 } 224 225 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 226 /* 227 * Copy the relevant range of the IO bitmap. 228 * Normally this is 128 bytes or less: 229 */ 230 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 231 max(prev->io_bitmap_max, next->io_bitmap_max)); 232 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 233 /* 234 * Clear any possible leftover bits: 235 */ 236 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 237 } 238 propagate_user_return_notify(prev_p, next_p); 239 } 240 241 /* 242 * Idle related variables and functions 243 */ 244 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 245 EXPORT_SYMBOL(boot_option_idle_override); 246 247 static void (*x86_idle)(void); 248 249 #ifndef CONFIG_SMP 250 static inline void play_dead(void) 251 { 252 BUG(); 253 } 254 #endif 255 256 #ifdef CONFIG_X86_64 257 void enter_idle(void) 258 { 259 this_cpu_write(is_idle, 1); 260 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 261 } 262 263 static void __exit_idle(void) 264 { 265 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) 266 return; 267 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 268 } 269 270 /* Called from interrupts to signify idle end */ 271 void exit_idle(void) 272 { 273 /* idle loop has pid 0 */ 274 if (current->pid) 275 return; 276 __exit_idle(); 277 } 278 #endif 279 280 void arch_cpu_idle_prepare(void) 281 { 282 /* 283 * If we're the non-boot CPU, nothing set the stack canary up 284 * for us. CPU0 already has it initialized but no harm in 285 * doing it again. This is a good place for updating it, as 286 * we wont ever return from this function (so the invalid 287 * canaries already on the stack wont ever trigger). 288 */ 289 boot_init_stack_canary(); 290 } 291 292 void arch_cpu_idle_enter(void) 293 { 294 local_touch_nmi(); 295 enter_idle(); 296 } 297 298 void arch_cpu_idle_exit(void) 299 { 300 __exit_idle(); 301 } 302 303 void arch_cpu_idle_dead(void) 304 { 305 play_dead(); 306 } 307 308 /* 309 * Called from the generic idle code. 310 */ 311 void arch_cpu_idle(void) 312 { 313 if (cpuidle_idle_call()) 314 x86_idle(); 315 } 316 317 /* 318 * We use this if we don't have any better idle routine.. 319 */ 320 void default_idle(void) 321 { 322 trace_cpu_idle_rcuidle(1, smp_processor_id()); 323 safe_halt(); 324 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 325 } 326 #ifdef CONFIG_APM_MODULE 327 EXPORT_SYMBOL(default_idle); 328 #endif 329 330 #ifdef CONFIG_XEN 331 bool xen_set_default_idle(void) 332 { 333 bool ret = !!x86_idle; 334 335 x86_idle = default_idle; 336 337 return ret; 338 } 339 #endif 340 void stop_this_cpu(void *dummy) 341 { 342 local_irq_disable(); 343 /* 344 * Remove this CPU: 345 */ 346 set_cpu_online(smp_processor_id(), false); 347 disable_local_APIC(); 348 349 for (;;) 350 halt(); 351 } 352 353 bool amd_e400_c1e_detected; 354 EXPORT_SYMBOL(amd_e400_c1e_detected); 355 356 static cpumask_var_t amd_e400_c1e_mask; 357 358 void amd_e400_remove_cpu(int cpu) 359 { 360 if (amd_e400_c1e_mask != NULL) 361 cpumask_clear_cpu(cpu, amd_e400_c1e_mask); 362 } 363 364 /* 365 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt 366 * pending message MSR. If we detect C1E, then we handle it the same 367 * way as C3 power states (local apic timer and TSC stop) 368 */ 369 static void amd_e400_idle(void) 370 { 371 if (need_resched()) 372 return; 373 374 if (!amd_e400_c1e_detected) { 375 u32 lo, hi; 376 377 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 378 379 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 380 amd_e400_c1e_detected = true; 381 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 382 mark_tsc_unstable("TSC halt in AMD C1E"); 383 pr_info("System has AMD C1E enabled\n"); 384 } 385 } 386 387 if (amd_e400_c1e_detected) { 388 int cpu = smp_processor_id(); 389 390 if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { 391 cpumask_set_cpu(cpu, amd_e400_c1e_mask); 392 /* 393 * Force broadcast so ACPI can not interfere. 394 */ 395 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 396 &cpu); 397 pr_info("Switch to broadcast mode on CPU%d\n", cpu); 398 } 399 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 400 401 default_idle(); 402 403 /* 404 * The switch back from broadcast mode needs to be 405 * called with interrupts disabled. 406 */ 407 local_irq_disable(); 408 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 409 local_irq_enable(); 410 } else 411 default_idle(); 412 } 413 414 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 415 { 416 #ifdef CONFIG_SMP 417 if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) 418 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 419 #endif 420 if (x86_idle || boot_option_idle_override == IDLE_POLL) 421 return; 422 423 if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { 424 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 425 pr_info("using AMD E400 aware idle routine\n"); 426 x86_idle = amd_e400_idle; 427 } else 428 x86_idle = default_idle; 429 } 430 431 void __init init_amd_e400_c1e_mask(void) 432 { 433 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 434 if (x86_idle == amd_e400_idle) 435 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 436 } 437 438 static int __init idle_setup(char *str) 439 { 440 if (!str) 441 return -EINVAL; 442 443 if (!strcmp(str, "poll")) { 444 pr_info("using polling idle threads\n"); 445 boot_option_idle_override = IDLE_POLL; 446 cpu_idle_poll_ctrl(true); 447 } else if (!strcmp(str, "halt")) { 448 /* 449 * When the boot option of idle=halt is added, halt is 450 * forced to be used for CPU idle. In such case CPU C2/C3 451 * won't be used again. 452 * To continue to load the CPU idle driver, don't touch 453 * the boot_option_idle_override. 454 */ 455 x86_idle = default_idle; 456 boot_option_idle_override = IDLE_HALT; 457 } else if (!strcmp(str, "nomwait")) { 458 /* 459 * If the boot option of "idle=nomwait" is added, 460 * it means that mwait will be disabled for CPU C2/C3 461 * states. In such case it won't touch the variable 462 * of boot_option_idle_override. 463 */ 464 boot_option_idle_override = IDLE_NOMWAIT; 465 } else 466 return -1; 467 468 return 0; 469 } 470 early_param("idle", idle_setup); 471 472 unsigned long arch_align_stack(unsigned long sp) 473 { 474 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 475 sp -= get_random_int() % 8192; 476 return sp & ~0xf; 477 } 478 479 unsigned long arch_randomize_brk(struct mm_struct *mm) 480 { 481 unsigned long range_end = mm->brk + 0x02000000; 482 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 483 } 484 485