1 /* 2 * Copyright (C) 1995 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * 7 * X86-64 port 8 * Andi Kleen. 9 * 10 * CPU hotplug support - ashok.raj@intel.com 11 */ 12 13 /* 14 * This file handles the architecture-dependent parts of process handling.. 15 */ 16 17 #include <stdarg.h> 18 19 #include <linux/cpu.h> 20 #include <linux/errno.h> 21 #include <linux/sched.h> 22 #include <linux/fs.h> 23 #include <linux/kernel.h> 24 #include <linux/mm.h> 25 #include <linux/elfcore.h> 26 #include <linux/smp.h> 27 #include <linux/slab.h> 28 #include <linux/user.h> 29 #include <linux/interrupt.h> 30 #include <linux/utsname.h> 31 #include <linux/delay.h> 32 #include <linux/module.h> 33 #include <linux/ptrace.h> 34 #include <linux/random.h> 35 #include <linux/notifier.h> 36 #include <linux/kprobes.h> 37 #include <linux/kdebug.h> 38 #include <linux/tick.h> 39 #include <linux/prctl.h> 40 41 #include <asm/uaccess.h> 42 #include <asm/pgtable.h> 43 #include <asm/system.h> 44 #include <asm/io.h> 45 #include <asm/processor.h> 46 #include <asm/i387.h> 47 #include <asm/mmu_context.h> 48 #include <asm/pda.h> 49 #include <asm/prctl.h> 50 #include <asm/desc.h> 51 #include <asm/proto.h> 52 #include <asm/ia32.h> 53 #include <asm/idle.h> 54 55 asmlinkage extern void ret_from_fork(void); 56 57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 58 59 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 60 61 void idle_notifier_register(struct notifier_block *n) 62 { 63 atomic_notifier_chain_register(&idle_notifier, n); 64 } 65 66 void enter_idle(void) 67 { 68 write_pda(isidle, 1); 69 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 70 } 71 72 static void __exit_idle(void) 73 { 74 if (test_and_clear_bit_pda(0, isidle) == 0) 75 return; 76 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 77 } 78 79 /* Called from interrupts to signify idle end */ 80 void exit_idle(void) 81 { 82 /* idle loop has pid 0 */ 83 if (current->pid) 84 return; 85 __exit_idle(); 86 } 87 88 #ifdef CONFIG_HOTPLUG_CPU 89 DECLARE_PER_CPU(int, cpu_state); 90 91 #include <asm/nmi.h> 92 /* We halt the CPU with physical CPU hotplug */ 93 static inline void play_dead(void) 94 { 95 idle_task_exit(); 96 mb(); 97 /* Ack it */ 98 __get_cpu_var(cpu_state) = CPU_DEAD; 99 100 local_irq_disable(); 101 /* mask all interrupts, flush any and all caches, and halt */ 102 wbinvd_halt(); 103 } 104 #else 105 static inline void play_dead(void) 106 { 107 BUG(); 108 } 109 #endif /* CONFIG_HOTPLUG_CPU */ 110 111 /* 112 * The idle thread. There's no useful work to be 113 * done, so just try to conserve power and have a 114 * low exit latency (ie sit in a loop waiting for 115 * somebody to say that they'd like to reschedule) 116 */ 117 void cpu_idle(void) 118 { 119 current_thread_info()->status |= TS_POLLING; 120 /* endless idle loop with no priority at all */ 121 while (1) { 122 tick_nohz_stop_sched_tick(1); 123 while (!need_resched()) { 124 125 rmb(); 126 127 if (cpu_is_offline(smp_processor_id())) 128 play_dead(); 129 /* 130 * Idle routines should keep interrupts disabled 131 * from here on, until they go to idle. 132 * Otherwise, idle callbacks can misfire. 133 */ 134 local_irq_disable(); 135 enter_idle(); 136 /* Don't trace irqs off for idle */ 137 stop_critical_timings(); 138 pm_idle(); 139 start_critical_timings(); 140 /* In many cases the interrupt that ended idle 141 has already called exit_idle. But some idle 142 loops can be woken up without interrupt. */ 143 __exit_idle(); 144 } 145 146 tick_nohz_restart_sched_tick(); 147 preempt_enable_no_resched(); 148 schedule(); 149 preempt_disable(); 150 } 151 } 152 153 /* Prints also some state that isn't saved in the pt_regs */ 154 void __show_regs(struct pt_regs * regs) 155 { 156 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 157 unsigned long d0, d1, d2, d3, d6, d7; 158 unsigned int fsindex, gsindex; 159 unsigned int ds, cs, es; 160 161 printk("\n"); 162 print_modules(); 163 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 164 current->pid, current->comm, print_tainted(), 165 init_utsname()->release, 166 (int)strcspn(init_utsname()->version, " "), 167 init_utsname()->version); 168 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 169 printk_address(regs->ip, 1); 170 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, 171 regs->flags); 172 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 173 regs->ax, regs->bx, regs->cx); 174 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 175 regs->dx, regs->si, regs->di); 176 printk("RBP: %016lx R08: %016lx R09: %016lx\n", 177 regs->bp, regs->r8, regs->r9); 178 printk("R10: %016lx R11: %016lx R12: %016lx\n", 179 regs->r10, regs->r11, regs->r12); 180 printk("R13: %016lx R14: %016lx R15: %016lx\n", 181 regs->r13, regs->r14, regs->r15); 182 183 asm("movl %%ds,%0" : "=r" (ds)); 184 asm("movl %%cs,%0" : "=r" (cs)); 185 asm("movl %%es,%0" : "=r" (es)); 186 asm("movl %%fs,%0" : "=r" (fsindex)); 187 asm("movl %%gs,%0" : "=r" (gsindex)); 188 189 rdmsrl(MSR_FS_BASE, fs); 190 rdmsrl(MSR_GS_BASE, gs); 191 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 192 193 cr0 = read_cr0(); 194 cr2 = read_cr2(); 195 cr3 = read_cr3(); 196 cr4 = read_cr4(); 197 198 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 199 fs,fsindex,gs,gsindex,shadowgs); 200 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 201 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); 202 203 get_debugreg(d0, 0); 204 get_debugreg(d1, 1); 205 get_debugreg(d2, 2); 206 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 207 get_debugreg(d3, 3); 208 get_debugreg(d6, 6); 209 get_debugreg(d7, 7); 210 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 211 } 212 213 void show_regs(struct pt_regs *regs) 214 { 215 printk("CPU %d:", smp_processor_id()); 216 __show_regs(regs); 217 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 218 } 219 220 /* 221 * Free current thread data structures etc.. 222 */ 223 void exit_thread(void) 224 { 225 struct task_struct *me = current; 226 struct thread_struct *t = &me->thread; 227 228 if (me->thread.io_bitmap_ptr) { 229 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 230 231 kfree(t->io_bitmap_ptr); 232 t->io_bitmap_ptr = NULL; 233 clear_thread_flag(TIF_IO_BITMAP); 234 /* 235 * Careful, clear this in the TSS too: 236 */ 237 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 238 t->io_bitmap_max = 0; 239 put_cpu(); 240 } 241 } 242 243 void flush_thread(void) 244 { 245 struct task_struct *tsk = current; 246 247 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { 248 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); 249 if (test_tsk_thread_flag(tsk, TIF_IA32)) { 250 clear_tsk_thread_flag(tsk, TIF_IA32); 251 } else { 252 set_tsk_thread_flag(tsk, TIF_IA32); 253 current_thread_info()->status |= TS_COMPAT; 254 } 255 } 256 clear_tsk_thread_flag(tsk, TIF_DEBUG); 257 258 tsk->thread.debugreg0 = 0; 259 tsk->thread.debugreg1 = 0; 260 tsk->thread.debugreg2 = 0; 261 tsk->thread.debugreg3 = 0; 262 tsk->thread.debugreg6 = 0; 263 tsk->thread.debugreg7 = 0; 264 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 265 /* 266 * Forget coprocessor state.. 267 */ 268 tsk->fpu_counter = 0; 269 clear_fpu(tsk); 270 clear_used_math(); 271 } 272 273 void release_thread(struct task_struct *dead_task) 274 { 275 if (dead_task->mm) { 276 if (dead_task->mm->context.size) { 277 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", 278 dead_task->comm, 279 dead_task->mm->context.ldt, 280 dead_task->mm->context.size); 281 BUG(); 282 } 283 } 284 } 285 286 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 287 { 288 struct user_desc ud = { 289 .base_addr = addr, 290 .limit = 0xfffff, 291 .seg_32bit = 1, 292 .limit_in_pages = 1, 293 .useable = 1, 294 }; 295 struct desc_struct *desc = t->thread.tls_array; 296 desc += tls; 297 fill_ldt(desc, &ud); 298 } 299 300 static inline u32 read_32bit_tls(struct task_struct *t, int tls) 301 { 302 return get_desc_base(&t->thread.tls_array[tls]); 303 } 304 305 /* 306 * This gets called before we allocate a new thread and copy 307 * the current task into it. 308 */ 309 void prepare_to_copy(struct task_struct *tsk) 310 { 311 unlazy_fpu(tsk); 312 } 313 314 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 315 unsigned long unused, 316 struct task_struct * p, struct pt_regs * regs) 317 { 318 int err; 319 struct pt_regs * childregs; 320 struct task_struct *me = current; 321 322 childregs = ((struct pt_regs *) 323 (THREAD_SIZE + task_stack_page(p))) - 1; 324 *childregs = *regs; 325 326 childregs->ax = 0; 327 childregs->sp = sp; 328 if (sp == ~0UL) 329 childregs->sp = (unsigned long)childregs; 330 331 p->thread.sp = (unsigned long) childregs; 332 p->thread.sp0 = (unsigned long) (childregs+1); 333 p->thread.usersp = me->thread.usersp; 334 335 set_tsk_thread_flag(p, TIF_FORK); 336 337 p->thread.fs = me->thread.fs; 338 p->thread.gs = me->thread.gs; 339 340 savesegment(gs, p->thread.gsindex); 341 savesegment(fs, p->thread.fsindex); 342 savesegment(es, p->thread.es); 343 savesegment(ds, p->thread.ds); 344 345 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 346 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 347 if (!p->thread.io_bitmap_ptr) { 348 p->thread.io_bitmap_max = 0; 349 return -ENOMEM; 350 } 351 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 352 IO_BITMAP_BYTES); 353 set_tsk_thread_flag(p, TIF_IO_BITMAP); 354 } 355 356 /* 357 * Set a new TLS for the child thread? 358 */ 359 if (clone_flags & CLONE_SETTLS) { 360 #ifdef CONFIG_IA32_EMULATION 361 if (test_thread_flag(TIF_IA32)) 362 err = do_set_thread_area(p, -1, 363 (struct user_desc __user *)childregs->si, 0); 364 else 365 #endif 366 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 367 if (err) 368 goto out; 369 } 370 err = 0; 371 out: 372 if (err && p->thread.io_bitmap_ptr) { 373 kfree(p->thread.io_bitmap_ptr); 374 p->thread.io_bitmap_max = 0; 375 } 376 return err; 377 } 378 379 void 380 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 381 { 382 loadsegment(fs, 0); 383 loadsegment(es, 0); 384 loadsegment(ds, 0); 385 load_gs_index(0); 386 regs->ip = new_ip; 387 regs->sp = new_sp; 388 write_pda(oldrsp, new_sp); 389 regs->cs = __USER_CS; 390 regs->ss = __USER_DS; 391 regs->flags = 0x200; 392 set_fs(USER_DS); 393 /* 394 * Free the old FP and other extended state 395 */ 396 free_thread_xstate(current); 397 } 398 EXPORT_SYMBOL_GPL(start_thread); 399 400 static void hard_disable_TSC(void) 401 { 402 write_cr4(read_cr4() | X86_CR4_TSD); 403 } 404 405 void disable_TSC(void) 406 { 407 preempt_disable(); 408 if (!test_and_set_thread_flag(TIF_NOTSC)) 409 /* 410 * Must flip the CPU state synchronously with 411 * TIF_NOTSC in the current running context. 412 */ 413 hard_disable_TSC(); 414 preempt_enable(); 415 } 416 417 static void hard_enable_TSC(void) 418 { 419 write_cr4(read_cr4() & ~X86_CR4_TSD); 420 } 421 422 static void enable_TSC(void) 423 { 424 preempt_disable(); 425 if (test_and_clear_thread_flag(TIF_NOTSC)) 426 /* 427 * Must flip the CPU state synchronously with 428 * TIF_NOTSC in the current running context. 429 */ 430 hard_enable_TSC(); 431 preempt_enable(); 432 } 433 434 int get_tsc_mode(unsigned long adr) 435 { 436 unsigned int val; 437 438 if (test_thread_flag(TIF_NOTSC)) 439 val = PR_TSC_SIGSEGV; 440 else 441 val = PR_TSC_ENABLE; 442 443 return put_user(val, (unsigned int __user *)adr); 444 } 445 446 int set_tsc_mode(unsigned int val) 447 { 448 if (val == PR_TSC_SIGSEGV) 449 disable_TSC(); 450 else if (val == PR_TSC_ENABLE) 451 enable_TSC(); 452 else 453 return -EINVAL; 454 455 return 0; 456 } 457 458 /* 459 * This special macro can be used to load a debugging register 460 */ 461 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) 462 463 static inline void __switch_to_xtra(struct task_struct *prev_p, 464 struct task_struct *next_p, 465 struct tss_struct *tss) 466 { 467 struct thread_struct *prev, *next; 468 unsigned long debugctl; 469 470 prev = &prev_p->thread, 471 next = &next_p->thread; 472 473 debugctl = prev->debugctlmsr; 474 if (next->ds_area_msr != prev->ds_area_msr) { 475 /* we clear debugctl to make sure DS 476 * is not in use when we change it */ 477 debugctl = 0; 478 update_debugctlmsr(0); 479 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 480 } 481 482 if (next->debugctlmsr != debugctl) 483 update_debugctlmsr(next->debugctlmsr); 484 485 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 486 loaddebug(next, 0); 487 loaddebug(next, 1); 488 loaddebug(next, 2); 489 loaddebug(next, 3); 490 /* no 4 and 5 */ 491 loaddebug(next, 6); 492 loaddebug(next, 7); 493 } 494 495 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 496 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 497 /* prev and next are different */ 498 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 499 hard_disable_TSC(); 500 else 501 hard_enable_TSC(); 502 } 503 504 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 505 /* 506 * Copy the relevant range of the IO bitmap. 507 * Normally this is 128 bytes or less: 508 */ 509 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 510 max(prev->io_bitmap_max, next->io_bitmap_max)); 511 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 512 /* 513 * Clear any possible leftover bits: 514 */ 515 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 516 } 517 518 #ifdef X86_BTS 519 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 520 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 521 522 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 523 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 524 #endif 525 } 526 527 /* 528 * switch_to(x,y) should switch tasks from x to y. 529 * 530 * This could still be optimized: 531 * - fold all the options into a flag word and test it with a single test. 532 * - could test fs/gs bitsliced 533 * 534 * Kprobes not supported here. Set the probe on schedule instead. 535 */ 536 struct task_struct * 537 __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 538 { 539 struct thread_struct *prev = &prev_p->thread; 540 struct thread_struct *next = &next_p->thread; 541 int cpu = smp_processor_id(); 542 struct tss_struct *tss = &per_cpu(init_tss, cpu); 543 unsigned fsindex, gsindex; 544 545 /* we're going to use this soon, after a few expensive things */ 546 if (next_p->fpu_counter>5) 547 prefetch(next->xstate); 548 549 /* 550 * Reload esp0, LDT and the page table pointer: 551 */ 552 load_sp0(tss, next); 553 554 /* 555 * Switch DS and ES. 556 * This won't pick up thread selector changes, but I guess that is ok. 557 */ 558 savesegment(es, prev->es); 559 if (unlikely(next->es | prev->es)) 560 loadsegment(es, next->es); 561 562 savesegment(ds, prev->ds); 563 if (unlikely(next->ds | prev->ds)) 564 loadsegment(ds, next->ds); 565 566 567 /* We must save %fs and %gs before load_TLS() because 568 * %fs and %gs may be cleared by load_TLS(). 569 * 570 * (e.g. xen_load_tls()) 571 */ 572 savesegment(fs, fsindex); 573 savesegment(gs, gsindex); 574 575 load_TLS(next, cpu); 576 577 /* 578 * Leave lazy mode, flushing any hypercalls made here. 579 * This must be done before restoring TLS segments so 580 * the GDT and LDT are properly updated, and must be 581 * done before math_state_restore, so the TS bit is up 582 * to date. 583 */ 584 arch_leave_lazy_cpu_mode(); 585 586 /* 587 * Switch FS and GS. 588 * 589 * Segment register != 0 always requires a reload. Also 590 * reload when it has changed. When prev process used 64bit 591 * base always reload to avoid an information leak. 592 */ 593 if (unlikely(fsindex | next->fsindex | prev->fs)) { 594 loadsegment(fs, next->fsindex); 595 /* 596 * Check if the user used a selector != 0; if yes 597 * clear 64bit base, since overloaded base is always 598 * mapped to the Null selector 599 */ 600 if (fsindex) 601 prev->fs = 0; 602 } 603 /* when next process has a 64bit base use it */ 604 if (next->fs) 605 wrmsrl(MSR_FS_BASE, next->fs); 606 prev->fsindex = fsindex; 607 608 if (unlikely(gsindex | next->gsindex | prev->gs)) { 609 load_gs_index(next->gsindex); 610 if (gsindex) 611 prev->gs = 0; 612 } 613 if (next->gs) 614 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 615 prev->gsindex = gsindex; 616 617 /* Must be after DS reload */ 618 unlazy_fpu(prev_p); 619 620 /* 621 * Switch the PDA and FPU contexts. 622 */ 623 prev->usersp = read_pda(oldrsp); 624 write_pda(oldrsp, next->usersp); 625 write_pda(pcurrent, next_p); 626 627 write_pda(kernelstack, 628 (unsigned long)task_stack_page(next_p) + 629 THREAD_SIZE - PDA_STACKOFFSET); 630 #ifdef CONFIG_CC_STACKPROTECTOR 631 write_pda(stack_canary, next_p->stack_canary); 632 /* 633 * Build time only check to make sure the stack_canary is at 634 * offset 40 in the pda; this is a gcc ABI requirement 635 */ 636 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); 637 #endif 638 639 /* 640 * Now maybe reload the debug registers and handle I/O bitmaps 641 */ 642 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || 643 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 644 __switch_to_xtra(prev_p, next_p, tss); 645 646 /* If the task has used fpu the last 5 timeslices, just do a full 647 * restore of the math state immediately to avoid the trap; the 648 * chances of needing FPU soon are obviously high now 649 * 650 * tsk_used_math() checks prevent calling math_state_restore(), 651 * which can sleep in the case of !tsk_used_math() 652 */ 653 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 654 math_state_restore(); 655 return prev_p; 656 } 657 658 /* 659 * sys_execve() executes a new program. 660 */ 661 asmlinkage 662 long sys_execve(char __user *name, char __user * __user *argv, 663 char __user * __user *envp, struct pt_regs *regs) 664 { 665 long error; 666 char * filename; 667 668 filename = getname(name); 669 error = PTR_ERR(filename); 670 if (IS_ERR(filename)) 671 return error; 672 error = do_execve(filename, argv, envp, regs); 673 putname(filename); 674 return error; 675 } 676 677 void set_personality_64bit(void) 678 { 679 /* inherit personality from parent */ 680 681 /* Make sure to be in 64bit mode */ 682 clear_thread_flag(TIF_IA32); 683 684 /* TBD: overwrites user setup. Should have two bits. 685 But 64bit processes have always behaved this way, 686 so it's not too bad. The main problem is just that 687 32bit childs are affected again. */ 688 current->personality &= ~READ_IMPLIES_EXEC; 689 } 690 691 asmlinkage long sys_fork(struct pt_regs *regs) 692 { 693 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); 694 } 695 696 asmlinkage long 697 sys_clone(unsigned long clone_flags, unsigned long newsp, 698 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 699 { 700 if (!newsp) 701 newsp = regs->sp; 702 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); 703 } 704 705 /* 706 * This is trivial, and on the face of it looks like it 707 * could equally well be done in user mode. 708 * 709 * Not so, for quite unobvious reasons - register pressure. 710 * In user mode vfork() cannot have a stack frame, and if 711 * done by calling the "clone()" system call directly, you 712 * do not have enough call-clobbered registers to hold all 713 * the information you need. 714 */ 715 asmlinkage long sys_vfork(struct pt_regs *regs) 716 { 717 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, 718 NULL, NULL); 719 } 720 721 unsigned long get_wchan(struct task_struct *p) 722 { 723 unsigned long stack; 724 u64 fp,ip; 725 int count = 0; 726 727 if (!p || p == current || p->state==TASK_RUNNING) 728 return 0; 729 stack = (unsigned long)task_stack_page(p); 730 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) 731 return 0; 732 fp = *(u64 *)(p->thread.sp); 733 do { 734 if (fp < (unsigned long)stack || 735 fp > (unsigned long)stack+THREAD_SIZE) 736 return 0; 737 ip = *(u64 *)(fp+8); 738 if (!in_sched_functions(ip)) 739 return ip; 740 fp = *(u64 *)fp; 741 } while (count++ < 16); 742 return 0; 743 } 744 745 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 746 { 747 int ret = 0; 748 int doit = task == current; 749 int cpu; 750 751 switch (code) { 752 case ARCH_SET_GS: 753 if (addr >= TASK_SIZE_OF(task)) 754 return -EPERM; 755 cpu = get_cpu(); 756 /* handle small bases via the GDT because that's faster to 757 switch. */ 758 if (addr <= 0xffffffff) { 759 set_32bit_tls(task, GS_TLS, addr); 760 if (doit) { 761 load_TLS(&task->thread, cpu); 762 load_gs_index(GS_TLS_SEL); 763 } 764 task->thread.gsindex = GS_TLS_SEL; 765 task->thread.gs = 0; 766 } else { 767 task->thread.gsindex = 0; 768 task->thread.gs = addr; 769 if (doit) { 770 load_gs_index(0); 771 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 772 } 773 } 774 put_cpu(); 775 break; 776 case ARCH_SET_FS: 777 /* Not strictly needed for fs, but do it for symmetry 778 with gs */ 779 if (addr >= TASK_SIZE_OF(task)) 780 return -EPERM; 781 cpu = get_cpu(); 782 /* handle small bases via the GDT because that's faster to 783 switch. */ 784 if (addr <= 0xffffffff) { 785 set_32bit_tls(task, FS_TLS, addr); 786 if (doit) { 787 load_TLS(&task->thread, cpu); 788 loadsegment(fs, FS_TLS_SEL); 789 } 790 task->thread.fsindex = FS_TLS_SEL; 791 task->thread.fs = 0; 792 } else { 793 task->thread.fsindex = 0; 794 task->thread.fs = addr; 795 if (doit) { 796 /* set the selector to 0 to not confuse 797 __switch_to */ 798 loadsegment(fs, 0); 799 ret = checking_wrmsrl(MSR_FS_BASE, addr); 800 } 801 } 802 put_cpu(); 803 break; 804 case ARCH_GET_FS: { 805 unsigned long base; 806 if (task->thread.fsindex == FS_TLS_SEL) 807 base = read_32bit_tls(task, FS_TLS); 808 else if (doit) 809 rdmsrl(MSR_FS_BASE, base); 810 else 811 base = task->thread.fs; 812 ret = put_user(base, (unsigned long __user *)addr); 813 break; 814 } 815 case ARCH_GET_GS: { 816 unsigned long base; 817 unsigned gsindex; 818 if (task->thread.gsindex == GS_TLS_SEL) 819 base = read_32bit_tls(task, GS_TLS); 820 else if (doit) { 821 savesegment(gs, gsindex); 822 if (gsindex) 823 rdmsrl(MSR_KERNEL_GS_BASE, base); 824 else 825 base = task->thread.gs; 826 } 827 else 828 base = task->thread.gs; 829 ret = put_user(base, (unsigned long __user *)addr); 830 break; 831 } 832 833 default: 834 ret = -EINVAL; 835 break; 836 } 837 838 return ret; 839 } 840 841 long sys_arch_prctl(int code, unsigned long addr) 842 { 843 return do_arch_prctl(current, code, addr); 844 } 845 846 unsigned long arch_align_stack(unsigned long sp) 847 { 848 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 849 sp -= get_random_int() % 8192; 850 return sp & ~0xf; 851 } 852 853 unsigned long arch_randomize_brk(struct mm_struct *mm) 854 { 855 unsigned long range_end = mm->brk + 0x02000000; 856 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 857 } 858