1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/errno.h> 4 #include <linux/kernel.h> 5 #include <linux/mm.h> 6 #include <linux/smp.h> 7 #include <linux/prctl.h> 8 #include <linux/slab.h> 9 #include <linux/sched.h> 10 #include <linux/module.h> 11 #include <linux/pm.h> 12 #include <linux/clockchips.h> 13 #include <linux/random.h> 14 #include <linux/user-return-notifier.h> 15 #include <linux/dmi.h> 16 #include <linux/utsname.h> 17 #include <linux/stackprotector.h> 18 #include <linux/tick.h> 19 #include <linux/cpuidle.h> 20 #include <trace/events/power.h> 21 #include <linux/hw_breakpoint.h> 22 #include <asm/cpu.h> 23 #include <asm/apic.h> 24 #include <asm/syscalls.h> 25 #include <asm/idle.h> 26 #include <asm/uaccess.h> 27 #include <asm/i387.h> 28 #include <asm/fpu-internal.h> 29 #include <asm/debugreg.h> 30 #include <asm/nmi.h> 31 #include <asm/tlbflush.h> 32 33 /* 34 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 35 * no more per-task TSS's. The TSS size is kept cacheline-aligned 36 * so they are allowed to end up in the .data..cacheline_aligned 37 * section. Since TSS's are completely CPU-local, we want them 38 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 39 */ 40 __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; 41 42 #ifdef CONFIG_X86_64 43 static DEFINE_PER_CPU(unsigned char, is_idle); 44 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 45 46 void idle_notifier_register(struct notifier_block *n) 47 { 48 atomic_notifier_chain_register(&idle_notifier, n); 49 } 50 EXPORT_SYMBOL_GPL(idle_notifier_register); 51 52 void idle_notifier_unregister(struct notifier_block *n) 53 { 54 atomic_notifier_chain_unregister(&idle_notifier, n); 55 } 56 EXPORT_SYMBOL_GPL(idle_notifier_unregister); 57 #endif 58 59 struct kmem_cache *task_xstate_cachep; 60 EXPORT_SYMBOL_GPL(task_xstate_cachep); 61 62 /* 63 * this gets called so that we can store lazy state into memory and copy the 64 * current task into the new thread. 65 */ 66 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 67 { 68 *dst = *src; 69 70 dst->thread.fpu_counter = 0; 71 dst->thread.fpu.has_fpu = 0; 72 dst->thread.fpu.last_cpu = ~0; 73 dst->thread.fpu.state = NULL; 74 if (tsk_used_math(src)) { 75 int err = fpu_alloc(&dst->thread.fpu); 76 if (err) 77 return err; 78 fpu_copy(dst, src); 79 } 80 return 0; 81 } 82 83 void free_thread_xstate(struct task_struct *tsk) 84 { 85 fpu_free(&tsk->thread.fpu); 86 } 87 88 void arch_release_task_struct(struct task_struct *tsk) 89 { 90 free_thread_xstate(tsk); 91 } 92 93 void arch_task_cache_init(void) 94 { 95 task_xstate_cachep = 96 kmem_cache_create("task_xstate", xstate_size, 97 __alignof__(union thread_xstate), 98 SLAB_PANIC | SLAB_NOTRACK, NULL); 99 setup_xstate_comp(); 100 } 101 102 /* 103 * Free current thread data structures etc.. 104 */ 105 void exit_thread(void) 106 { 107 struct task_struct *me = current; 108 struct thread_struct *t = &me->thread; 109 unsigned long *bp = t->io_bitmap_ptr; 110 111 if (bp) { 112 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 113 114 t->io_bitmap_ptr = NULL; 115 clear_thread_flag(TIF_IO_BITMAP); 116 /* 117 * Careful, clear this in the TSS too: 118 */ 119 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 120 t->io_bitmap_max = 0; 121 put_cpu(); 122 kfree(bp); 123 } 124 125 drop_fpu(me); 126 } 127 128 void flush_thread(void) 129 { 130 struct task_struct *tsk = current; 131 132 flush_ptrace_hw_breakpoint(tsk); 133 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 134 drop_init_fpu(tsk); 135 /* 136 * Free the FPU state for non xsave platforms. They get reallocated 137 * lazily at the first use. 138 */ 139 if (!use_eager_fpu()) 140 free_thread_xstate(tsk); 141 } 142 143 static void hard_disable_TSC(void) 144 { 145 cr4_set_bits(X86_CR4_TSD); 146 } 147 148 void disable_TSC(void) 149 { 150 preempt_disable(); 151 if (!test_and_set_thread_flag(TIF_NOTSC)) 152 /* 153 * Must flip the CPU state synchronously with 154 * TIF_NOTSC in the current running context. 155 */ 156 hard_disable_TSC(); 157 preempt_enable(); 158 } 159 160 static void hard_enable_TSC(void) 161 { 162 cr4_clear_bits(X86_CR4_TSD); 163 } 164 165 static void enable_TSC(void) 166 { 167 preempt_disable(); 168 if (test_and_clear_thread_flag(TIF_NOTSC)) 169 /* 170 * Must flip the CPU state synchronously with 171 * TIF_NOTSC in the current running context. 172 */ 173 hard_enable_TSC(); 174 preempt_enable(); 175 } 176 177 int get_tsc_mode(unsigned long adr) 178 { 179 unsigned int val; 180 181 if (test_thread_flag(TIF_NOTSC)) 182 val = PR_TSC_SIGSEGV; 183 else 184 val = PR_TSC_ENABLE; 185 186 return put_user(val, (unsigned int __user *)adr); 187 } 188 189 int set_tsc_mode(unsigned int val) 190 { 191 if (val == PR_TSC_SIGSEGV) 192 disable_TSC(); 193 else if (val == PR_TSC_ENABLE) 194 enable_TSC(); 195 else 196 return -EINVAL; 197 198 return 0; 199 } 200 201 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 202 struct tss_struct *tss) 203 { 204 struct thread_struct *prev, *next; 205 206 prev = &prev_p->thread; 207 next = &next_p->thread; 208 209 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ 210 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { 211 unsigned long debugctl = get_debugctlmsr(); 212 213 debugctl &= ~DEBUGCTLMSR_BTF; 214 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) 215 debugctl |= DEBUGCTLMSR_BTF; 216 217 update_debugctlmsr(debugctl); 218 } 219 220 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 221 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 222 /* prev and next are different */ 223 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 224 hard_disable_TSC(); 225 else 226 hard_enable_TSC(); 227 } 228 229 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 230 /* 231 * Copy the relevant range of the IO bitmap. 232 * Normally this is 128 bytes or less: 233 */ 234 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 235 max(prev->io_bitmap_max, next->io_bitmap_max)); 236 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 237 /* 238 * Clear any possible leftover bits: 239 */ 240 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 241 } 242 propagate_user_return_notify(prev_p, next_p); 243 } 244 245 /* 246 * Idle related variables and functions 247 */ 248 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 249 EXPORT_SYMBOL(boot_option_idle_override); 250 251 static void (*x86_idle)(void); 252 253 #ifndef CONFIG_SMP 254 static inline void play_dead(void) 255 { 256 BUG(); 257 } 258 #endif 259 260 #ifdef CONFIG_X86_64 261 void enter_idle(void) 262 { 263 this_cpu_write(is_idle, 1); 264 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 265 } 266 267 static void __exit_idle(void) 268 { 269 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0) 270 return; 271 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 272 } 273 274 /* Called from interrupts to signify idle end */ 275 void exit_idle(void) 276 { 277 /* idle loop has pid 0 */ 278 if (current->pid) 279 return; 280 __exit_idle(); 281 } 282 #endif 283 284 void arch_cpu_idle_enter(void) 285 { 286 local_touch_nmi(); 287 enter_idle(); 288 } 289 290 void arch_cpu_idle_exit(void) 291 { 292 __exit_idle(); 293 } 294 295 void arch_cpu_idle_dead(void) 296 { 297 play_dead(); 298 } 299 300 /* 301 * Called from the generic idle code. 302 */ 303 void arch_cpu_idle(void) 304 { 305 x86_idle(); 306 } 307 308 /* 309 * We use this if we don't have any better idle routine.. 310 */ 311 void default_idle(void) 312 { 313 trace_cpu_idle_rcuidle(1, smp_processor_id()); 314 safe_halt(); 315 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 316 } 317 #ifdef CONFIG_APM_MODULE 318 EXPORT_SYMBOL(default_idle); 319 #endif 320 321 #ifdef CONFIG_XEN 322 bool xen_set_default_idle(void) 323 { 324 bool ret = !!x86_idle; 325 326 x86_idle = default_idle; 327 328 return ret; 329 } 330 #endif 331 void stop_this_cpu(void *dummy) 332 { 333 local_irq_disable(); 334 /* 335 * Remove this CPU: 336 */ 337 set_cpu_online(smp_processor_id(), false); 338 disable_local_APIC(); 339 340 for (;;) 341 halt(); 342 } 343 344 bool amd_e400_c1e_detected; 345 EXPORT_SYMBOL(amd_e400_c1e_detected); 346 347 static cpumask_var_t amd_e400_c1e_mask; 348 349 void amd_e400_remove_cpu(int cpu) 350 { 351 if (amd_e400_c1e_mask != NULL) 352 cpumask_clear_cpu(cpu, amd_e400_c1e_mask); 353 } 354 355 /* 356 * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt 357 * pending message MSR. If we detect C1E, then we handle it the same 358 * way as C3 power states (local apic timer and TSC stop) 359 */ 360 static void amd_e400_idle(void) 361 { 362 if (!amd_e400_c1e_detected) { 363 u32 lo, hi; 364 365 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); 366 367 if (lo & K8_INTP_C1E_ACTIVE_MASK) { 368 amd_e400_c1e_detected = true; 369 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 370 mark_tsc_unstable("TSC halt in AMD C1E"); 371 pr_info("System has AMD C1E enabled\n"); 372 } 373 } 374 375 if (amd_e400_c1e_detected) { 376 int cpu = smp_processor_id(); 377 378 if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { 379 cpumask_set_cpu(cpu, amd_e400_c1e_mask); 380 /* 381 * Force broadcast so ACPI can not interfere. 382 */ 383 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, 384 &cpu); 385 pr_info("Switch to broadcast mode on CPU%d\n", cpu); 386 } 387 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); 388 389 default_idle(); 390 391 /* 392 * The switch back from broadcast mode needs to be 393 * called with interrupts disabled. 394 */ 395 local_irq_disable(); 396 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); 397 local_irq_enable(); 398 } else 399 default_idle(); 400 } 401 402 void select_idle_routine(const struct cpuinfo_x86 *c) 403 { 404 #ifdef CONFIG_SMP 405 if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) 406 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 407 #endif 408 if (x86_idle || boot_option_idle_override == IDLE_POLL) 409 return; 410 411 if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) { 412 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 413 pr_info("using AMD E400 aware idle routine\n"); 414 x86_idle = amd_e400_idle; 415 } else 416 x86_idle = default_idle; 417 } 418 419 void __init init_amd_e400_c1e_mask(void) 420 { 421 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 422 if (x86_idle == amd_e400_idle) 423 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 424 } 425 426 static int __init idle_setup(char *str) 427 { 428 if (!str) 429 return -EINVAL; 430 431 if (!strcmp(str, "poll")) { 432 pr_info("using polling idle threads\n"); 433 boot_option_idle_override = IDLE_POLL; 434 cpu_idle_poll_ctrl(true); 435 } else if (!strcmp(str, "halt")) { 436 /* 437 * When the boot option of idle=halt is added, halt is 438 * forced to be used for CPU idle. In such case CPU C2/C3 439 * won't be used again. 440 * To continue to load the CPU idle driver, don't touch 441 * the boot_option_idle_override. 442 */ 443 x86_idle = default_idle; 444 boot_option_idle_override = IDLE_HALT; 445 } else if (!strcmp(str, "nomwait")) { 446 /* 447 * If the boot option of "idle=nomwait" is added, 448 * it means that mwait will be disabled for CPU C2/C3 449 * states. In such case it won't touch the variable 450 * of boot_option_idle_override. 451 */ 452 boot_option_idle_override = IDLE_NOMWAIT; 453 } else 454 return -1; 455 456 return 0; 457 } 458 early_param("idle", idle_setup); 459 460 unsigned long arch_align_stack(unsigned long sp) 461 { 462 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 463 sp -= get_random_int() % 8192; 464 return sp & ~0xf; 465 } 466 467 unsigned long arch_randomize_brk(struct mm_struct *mm) 468 { 469 unsigned long range_end = mm->brk + 0x02000000; 470 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 471 } 472 473