1 /* 2 * Copyright (C) 1995 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * 7 * X86-64 port 8 * Andi Kleen. 9 * 10 * CPU hotplug support - ashok.raj@intel.com 11 */ 12 13 /* 14 * This file handles the architecture-dependent parts of process handling.. 15 */ 16 17 #include <stdarg.h> 18 19 #include <linux/cpu.h> 20 #include <linux/errno.h> 21 #include <linux/sched.h> 22 #include <linux/fs.h> 23 #include <linux/kernel.h> 24 #include <linux/mm.h> 25 #include <linux/elfcore.h> 26 #include <linux/smp.h> 27 #include <linux/slab.h> 28 #include <linux/user.h> 29 #include <linux/interrupt.h> 30 #include <linux/utsname.h> 31 #include <linux/delay.h> 32 #include <linux/module.h> 33 #include <linux/ptrace.h> 34 #include <linux/random.h> 35 #include <linux/notifier.h> 36 #include <linux/kprobes.h> 37 #include <linux/kdebug.h> 38 #include <linux/tick.h> 39 #include <linux/prctl.h> 40 41 #include <asm/uaccess.h> 42 #include <asm/pgtable.h> 43 #include <asm/system.h> 44 #include <asm/io.h> 45 #include <asm/processor.h> 46 #include <asm/i387.h> 47 #include <asm/mmu_context.h> 48 #include <asm/pda.h> 49 #include <asm/prctl.h> 50 #include <asm/desc.h> 51 #include <asm/proto.h> 52 #include <asm/ia32.h> 53 #include <asm/idle.h> 54 55 asmlinkage extern void ret_from_fork(void); 56 57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 58 59 static ATOMIC_NOTIFIER_HEAD(idle_notifier); 60 61 void idle_notifier_register(struct notifier_block *n) 62 { 63 atomic_notifier_chain_register(&idle_notifier, n); 64 } 65 66 void enter_idle(void) 67 { 68 write_pda(isidle, 1); 69 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 70 } 71 72 static void __exit_idle(void) 73 { 74 if (test_and_clear_bit_pda(0, isidle) == 0) 75 return; 76 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 77 } 78 79 /* Called from interrupts to signify idle end */ 80 void exit_idle(void) 81 { 82 /* idle loop has pid 0 */ 83 if (current->pid) 84 return; 85 __exit_idle(); 86 } 87 88 #ifdef CONFIG_HOTPLUG_CPU 89 DECLARE_PER_CPU(int, cpu_state); 90 91 #include <asm/nmi.h> 92 /* We halt the CPU with physical CPU hotplug */ 93 static inline void play_dead(void) 94 { 95 idle_task_exit(); 96 c1e_remove_cpu(raw_smp_processor_id()); 97 98 mb(); 99 /* Ack it */ 100 __get_cpu_var(cpu_state) = CPU_DEAD; 101 102 local_irq_disable(); 103 /* mask all interrupts, flush any and all caches, and halt */ 104 wbinvd_halt(); 105 } 106 #else 107 static inline void play_dead(void) 108 { 109 BUG(); 110 } 111 #endif /* CONFIG_HOTPLUG_CPU */ 112 113 /* 114 * The idle thread. There's no useful work to be 115 * done, so just try to conserve power and have a 116 * low exit latency (ie sit in a loop waiting for 117 * somebody to say that they'd like to reschedule) 118 */ 119 void cpu_idle(void) 120 { 121 current_thread_info()->status |= TS_POLLING; 122 /* endless idle loop with no priority at all */ 123 while (1) { 124 tick_nohz_stop_sched_tick(1); 125 while (!need_resched()) { 126 127 rmb(); 128 129 if (cpu_is_offline(smp_processor_id())) 130 play_dead(); 131 /* 132 * Idle routines should keep interrupts disabled 133 * from here on, until they go to idle. 134 * Otherwise, idle callbacks can misfire. 135 */ 136 local_irq_disable(); 137 enter_idle(); 138 /* Don't trace irqs off for idle */ 139 stop_critical_timings(); 140 pm_idle(); 141 start_critical_timings(); 142 /* In many cases the interrupt that ended idle 143 has already called exit_idle. But some idle 144 loops can be woken up without interrupt. */ 145 __exit_idle(); 146 } 147 148 tick_nohz_restart_sched_tick(); 149 preempt_enable_no_resched(); 150 schedule(); 151 preempt_disable(); 152 } 153 } 154 155 /* Prints also some state that isn't saved in the pt_regs */ 156 void __show_regs(struct pt_regs * regs) 157 { 158 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 159 unsigned long d0, d1, d2, d3, d6, d7; 160 unsigned int fsindex, gsindex; 161 unsigned int ds, cs, es; 162 163 printk("\n"); 164 print_modules(); 165 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 166 current->pid, current->comm, print_tainted(), 167 init_utsname()->release, 168 (int)strcspn(init_utsname()->version, " "), 169 init_utsname()->version); 170 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 171 printk_address(regs->ip, 1); 172 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, 173 regs->flags); 174 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", 175 regs->ax, regs->bx, regs->cx); 176 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", 177 regs->dx, regs->si, regs->di); 178 printk("RBP: %016lx R08: %016lx R09: %016lx\n", 179 regs->bp, regs->r8, regs->r9); 180 printk("R10: %016lx R11: %016lx R12: %016lx\n", 181 regs->r10, regs->r11, regs->r12); 182 printk("R13: %016lx R14: %016lx R15: %016lx\n", 183 regs->r13, regs->r14, regs->r15); 184 185 asm("movl %%ds,%0" : "=r" (ds)); 186 asm("movl %%cs,%0" : "=r" (cs)); 187 asm("movl %%es,%0" : "=r" (es)); 188 asm("movl %%fs,%0" : "=r" (fsindex)); 189 asm("movl %%gs,%0" : "=r" (gsindex)); 190 191 rdmsrl(MSR_FS_BASE, fs); 192 rdmsrl(MSR_GS_BASE, gs); 193 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 194 195 cr0 = read_cr0(); 196 cr2 = read_cr2(); 197 cr3 = read_cr3(); 198 cr4 = read_cr4(); 199 200 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 201 fs,fsindex,gs,gsindex,shadowgs); 202 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 203 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); 204 205 get_debugreg(d0, 0); 206 get_debugreg(d1, 1); 207 get_debugreg(d2, 2); 208 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); 209 get_debugreg(d3, 3); 210 get_debugreg(d6, 6); 211 get_debugreg(d7, 7); 212 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); 213 } 214 215 void show_regs(struct pt_regs *regs) 216 { 217 printk("CPU %d:", smp_processor_id()); 218 __show_regs(regs); 219 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 220 } 221 222 /* 223 * Free current thread data structures etc.. 224 */ 225 void exit_thread(void) 226 { 227 struct task_struct *me = current; 228 struct thread_struct *t = &me->thread; 229 230 if (me->thread.io_bitmap_ptr) { 231 struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); 232 233 kfree(t->io_bitmap_ptr); 234 t->io_bitmap_ptr = NULL; 235 clear_thread_flag(TIF_IO_BITMAP); 236 /* 237 * Careful, clear this in the TSS too: 238 */ 239 memset(tss->io_bitmap, 0xff, t->io_bitmap_max); 240 t->io_bitmap_max = 0; 241 put_cpu(); 242 } 243 } 244 245 void flush_thread(void) 246 { 247 struct task_struct *tsk = current; 248 249 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { 250 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); 251 if (test_tsk_thread_flag(tsk, TIF_IA32)) { 252 clear_tsk_thread_flag(tsk, TIF_IA32); 253 } else { 254 set_tsk_thread_flag(tsk, TIF_IA32); 255 current_thread_info()->status |= TS_COMPAT; 256 } 257 } 258 clear_tsk_thread_flag(tsk, TIF_DEBUG); 259 260 tsk->thread.debugreg0 = 0; 261 tsk->thread.debugreg1 = 0; 262 tsk->thread.debugreg2 = 0; 263 tsk->thread.debugreg3 = 0; 264 tsk->thread.debugreg6 = 0; 265 tsk->thread.debugreg7 = 0; 266 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 267 /* 268 * Forget coprocessor state.. 269 */ 270 tsk->fpu_counter = 0; 271 clear_fpu(tsk); 272 clear_used_math(); 273 } 274 275 void release_thread(struct task_struct *dead_task) 276 { 277 if (dead_task->mm) { 278 if (dead_task->mm->context.size) { 279 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", 280 dead_task->comm, 281 dead_task->mm->context.ldt, 282 dead_task->mm->context.size); 283 BUG(); 284 } 285 } 286 } 287 288 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) 289 { 290 struct user_desc ud = { 291 .base_addr = addr, 292 .limit = 0xfffff, 293 .seg_32bit = 1, 294 .limit_in_pages = 1, 295 .useable = 1, 296 }; 297 struct desc_struct *desc = t->thread.tls_array; 298 desc += tls; 299 fill_ldt(desc, &ud); 300 } 301 302 static inline u32 read_32bit_tls(struct task_struct *t, int tls) 303 { 304 return get_desc_base(&t->thread.tls_array[tls]); 305 } 306 307 /* 308 * This gets called before we allocate a new thread and copy 309 * the current task into it. 310 */ 311 void prepare_to_copy(struct task_struct *tsk) 312 { 313 unlazy_fpu(tsk); 314 } 315 316 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, 317 unsigned long unused, 318 struct task_struct * p, struct pt_regs * regs) 319 { 320 int err; 321 struct pt_regs * childregs; 322 struct task_struct *me = current; 323 324 childregs = ((struct pt_regs *) 325 (THREAD_SIZE + task_stack_page(p))) - 1; 326 *childregs = *regs; 327 328 childregs->ax = 0; 329 childregs->sp = sp; 330 if (sp == ~0UL) 331 childregs->sp = (unsigned long)childregs; 332 333 p->thread.sp = (unsigned long) childregs; 334 p->thread.sp0 = (unsigned long) (childregs+1); 335 p->thread.usersp = me->thread.usersp; 336 337 set_tsk_thread_flag(p, TIF_FORK); 338 339 p->thread.fs = me->thread.fs; 340 p->thread.gs = me->thread.gs; 341 342 savesegment(gs, p->thread.gsindex); 343 savesegment(fs, p->thread.fsindex); 344 savesegment(es, p->thread.es); 345 savesegment(ds, p->thread.ds); 346 347 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { 348 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 349 if (!p->thread.io_bitmap_ptr) { 350 p->thread.io_bitmap_max = 0; 351 return -ENOMEM; 352 } 353 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, 354 IO_BITMAP_BYTES); 355 set_tsk_thread_flag(p, TIF_IO_BITMAP); 356 } 357 358 /* 359 * Set a new TLS for the child thread? 360 */ 361 if (clone_flags & CLONE_SETTLS) { 362 #ifdef CONFIG_IA32_EMULATION 363 if (test_thread_flag(TIF_IA32)) 364 err = do_set_thread_area(p, -1, 365 (struct user_desc __user *)childregs->si, 0); 366 else 367 #endif 368 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 369 if (err) 370 goto out; 371 } 372 err = 0; 373 out: 374 if (err && p->thread.io_bitmap_ptr) { 375 kfree(p->thread.io_bitmap_ptr); 376 p->thread.io_bitmap_max = 0; 377 } 378 return err; 379 } 380 381 void 382 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 383 { 384 loadsegment(fs, 0); 385 loadsegment(es, 0); 386 loadsegment(ds, 0); 387 load_gs_index(0); 388 regs->ip = new_ip; 389 regs->sp = new_sp; 390 write_pda(oldrsp, new_sp); 391 regs->cs = __USER_CS; 392 regs->ss = __USER_DS; 393 regs->flags = 0x200; 394 set_fs(USER_DS); 395 /* 396 * Free the old FP and other extended state 397 */ 398 free_thread_xstate(current); 399 } 400 EXPORT_SYMBOL_GPL(start_thread); 401 402 static void hard_disable_TSC(void) 403 { 404 write_cr4(read_cr4() | X86_CR4_TSD); 405 } 406 407 void disable_TSC(void) 408 { 409 preempt_disable(); 410 if (!test_and_set_thread_flag(TIF_NOTSC)) 411 /* 412 * Must flip the CPU state synchronously with 413 * TIF_NOTSC in the current running context. 414 */ 415 hard_disable_TSC(); 416 preempt_enable(); 417 } 418 419 static void hard_enable_TSC(void) 420 { 421 write_cr4(read_cr4() & ~X86_CR4_TSD); 422 } 423 424 static void enable_TSC(void) 425 { 426 preempt_disable(); 427 if (test_and_clear_thread_flag(TIF_NOTSC)) 428 /* 429 * Must flip the CPU state synchronously with 430 * TIF_NOTSC in the current running context. 431 */ 432 hard_enable_TSC(); 433 preempt_enable(); 434 } 435 436 int get_tsc_mode(unsigned long adr) 437 { 438 unsigned int val; 439 440 if (test_thread_flag(TIF_NOTSC)) 441 val = PR_TSC_SIGSEGV; 442 else 443 val = PR_TSC_ENABLE; 444 445 return put_user(val, (unsigned int __user *)adr); 446 } 447 448 int set_tsc_mode(unsigned int val) 449 { 450 if (val == PR_TSC_SIGSEGV) 451 disable_TSC(); 452 else if (val == PR_TSC_ENABLE) 453 enable_TSC(); 454 else 455 return -EINVAL; 456 457 return 0; 458 } 459 460 /* 461 * This special macro can be used to load a debugging register 462 */ 463 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r) 464 465 static inline void __switch_to_xtra(struct task_struct *prev_p, 466 struct task_struct *next_p, 467 struct tss_struct *tss) 468 { 469 struct thread_struct *prev, *next; 470 unsigned long debugctl; 471 472 prev = &prev_p->thread, 473 next = &next_p->thread; 474 475 debugctl = prev->debugctlmsr; 476 if (next->ds_area_msr != prev->ds_area_msr) { 477 /* we clear debugctl to make sure DS 478 * is not in use when we change it */ 479 debugctl = 0; 480 update_debugctlmsr(0); 481 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); 482 } 483 484 if (next->debugctlmsr != debugctl) 485 update_debugctlmsr(next->debugctlmsr); 486 487 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 488 loaddebug(next, 0); 489 loaddebug(next, 1); 490 loaddebug(next, 2); 491 loaddebug(next, 3); 492 /* no 4 and 5 */ 493 loaddebug(next, 6); 494 loaddebug(next, 7); 495 } 496 497 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 498 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 499 /* prev and next are different */ 500 if (test_tsk_thread_flag(next_p, TIF_NOTSC)) 501 hard_disable_TSC(); 502 else 503 hard_enable_TSC(); 504 } 505 506 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 507 /* 508 * Copy the relevant range of the IO bitmap. 509 * Normally this is 128 bytes or less: 510 */ 511 memcpy(tss->io_bitmap, next->io_bitmap_ptr, 512 max(prev->io_bitmap_max, next->io_bitmap_max)); 513 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { 514 /* 515 * Clear any possible leftover bits: 516 */ 517 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 518 } 519 520 #ifdef X86_BTS 521 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) 522 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); 523 524 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) 525 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); 526 #endif 527 } 528 529 /* 530 * switch_to(x,y) should switch tasks from x to y. 531 * 532 * This could still be optimized: 533 * - fold all the options into a flag word and test it with a single test. 534 * - could test fs/gs bitsliced 535 * 536 * Kprobes not supported here. Set the probe on schedule instead. 537 */ 538 struct task_struct * 539 __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 540 { 541 struct thread_struct *prev = &prev_p->thread; 542 struct thread_struct *next = &next_p->thread; 543 int cpu = smp_processor_id(); 544 struct tss_struct *tss = &per_cpu(init_tss, cpu); 545 unsigned fsindex, gsindex; 546 547 /* we're going to use this soon, after a few expensive things */ 548 if (next_p->fpu_counter>5) 549 prefetch(next->xstate); 550 551 /* 552 * Reload esp0, LDT and the page table pointer: 553 */ 554 load_sp0(tss, next); 555 556 /* 557 * Switch DS and ES. 558 * This won't pick up thread selector changes, but I guess that is ok. 559 */ 560 savesegment(es, prev->es); 561 if (unlikely(next->es | prev->es)) 562 loadsegment(es, next->es); 563 564 savesegment(ds, prev->ds); 565 if (unlikely(next->ds | prev->ds)) 566 loadsegment(ds, next->ds); 567 568 569 /* We must save %fs and %gs before load_TLS() because 570 * %fs and %gs may be cleared by load_TLS(). 571 * 572 * (e.g. xen_load_tls()) 573 */ 574 savesegment(fs, fsindex); 575 savesegment(gs, gsindex); 576 577 load_TLS(next, cpu); 578 579 /* 580 * Leave lazy mode, flushing any hypercalls made here. 581 * This must be done before restoring TLS segments so 582 * the GDT and LDT are properly updated, and must be 583 * done before math_state_restore, so the TS bit is up 584 * to date. 585 */ 586 arch_leave_lazy_cpu_mode(); 587 588 /* 589 * Switch FS and GS. 590 * 591 * Segment register != 0 always requires a reload. Also 592 * reload when it has changed. When prev process used 64bit 593 * base always reload to avoid an information leak. 594 */ 595 if (unlikely(fsindex | next->fsindex | prev->fs)) { 596 loadsegment(fs, next->fsindex); 597 /* 598 * Check if the user used a selector != 0; if yes 599 * clear 64bit base, since overloaded base is always 600 * mapped to the Null selector 601 */ 602 if (fsindex) 603 prev->fs = 0; 604 } 605 /* when next process has a 64bit base use it */ 606 if (next->fs) 607 wrmsrl(MSR_FS_BASE, next->fs); 608 prev->fsindex = fsindex; 609 610 if (unlikely(gsindex | next->gsindex | prev->gs)) { 611 load_gs_index(next->gsindex); 612 if (gsindex) 613 prev->gs = 0; 614 } 615 if (next->gs) 616 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 617 prev->gsindex = gsindex; 618 619 /* Must be after DS reload */ 620 unlazy_fpu(prev_p); 621 622 /* 623 * Switch the PDA and FPU contexts. 624 */ 625 prev->usersp = read_pda(oldrsp); 626 write_pda(oldrsp, next->usersp); 627 write_pda(pcurrent, next_p); 628 629 write_pda(kernelstack, 630 (unsigned long)task_stack_page(next_p) + 631 THREAD_SIZE - PDA_STACKOFFSET); 632 #ifdef CONFIG_CC_STACKPROTECTOR 633 write_pda(stack_canary, next_p->stack_canary); 634 /* 635 * Build time only check to make sure the stack_canary is at 636 * offset 40 in the pda; this is a gcc ABI requirement 637 */ 638 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); 639 #endif 640 641 /* 642 * Now maybe reload the debug registers and handle I/O bitmaps 643 */ 644 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || 645 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 646 __switch_to_xtra(prev_p, next_p, tss); 647 648 /* If the task has used fpu the last 5 timeslices, just do a full 649 * restore of the math state immediately to avoid the trap; the 650 * chances of needing FPU soon are obviously high now 651 * 652 * tsk_used_math() checks prevent calling math_state_restore(), 653 * which can sleep in the case of !tsk_used_math() 654 */ 655 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 656 math_state_restore(); 657 return prev_p; 658 } 659 660 /* 661 * sys_execve() executes a new program. 662 */ 663 asmlinkage 664 long sys_execve(char __user *name, char __user * __user *argv, 665 char __user * __user *envp, struct pt_regs *regs) 666 { 667 long error; 668 char * filename; 669 670 filename = getname(name); 671 error = PTR_ERR(filename); 672 if (IS_ERR(filename)) 673 return error; 674 error = do_execve(filename, argv, envp, regs); 675 putname(filename); 676 return error; 677 } 678 679 void set_personality_64bit(void) 680 { 681 /* inherit personality from parent */ 682 683 /* Make sure to be in 64bit mode */ 684 clear_thread_flag(TIF_IA32); 685 686 /* TBD: overwrites user setup. Should have two bits. 687 But 64bit processes have always behaved this way, 688 so it's not too bad. The main problem is just that 689 32bit childs are affected again. */ 690 current->personality &= ~READ_IMPLIES_EXEC; 691 } 692 693 asmlinkage long sys_fork(struct pt_regs *regs) 694 { 695 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); 696 } 697 698 asmlinkage long 699 sys_clone(unsigned long clone_flags, unsigned long newsp, 700 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) 701 { 702 if (!newsp) 703 newsp = regs->sp; 704 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); 705 } 706 707 /* 708 * This is trivial, and on the face of it looks like it 709 * could equally well be done in user mode. 710 * 711 * Not so, for quite unobvious reasons - register pressure. 712 * In user mode vfork() cannot have a stack frame, and if 713 * done by calling the "clone()" system call directly, you 714 * do not have enough call-clobbered registers to hold all 715 * the information you need. 716 */ 717 asmlinkage long sys_vfork(struct pt_regs *regs) 718 { 719 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, 720 NULL, NULL); 721 } 722 723 unsigned long get_wchan(struct task_struct *p) 724 { 725 unsigned long stack; 726 u64 fp,ip; 727 int count = 0; 728 729 if (!p || p == current || p->state==TASK_RUNNING) 730 return 0; 731 stack = (unsigned long)task_stack_page(p); 732 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) 733 return 0; 734 fp = *(u64 *)(p->thread.sp); 735 do { 736 if (fp < (unsigned long)stack || 737 fp > (unsigned long)stack+THREAD_SIZE) 738 return 0; 739 ip = *(u64 *)(fp+8); 740 if (!in_sched_functions(ip)) 741 return ip; 742 fp = *(u64 *)fp; 743 } while (count++ < 16); 744 return 0; 745 } 746 747 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) 748 { 749 int ret = 0; 750 int doit = task == current; 751 int cpu; 752 753 switch (code) { 754 case ARCH_SET_GS: 755 if (addr >= TASK_SIZE_OF(task)) 756 return -EPERM; 757 cpu = get_cpu(); 758 /* handle small bases via the GDT because that's faster to 759 switch. */ 760 if (addr <= 0xffffffff) { 761 set_32bit_tls(task, GS_TLS, addr); 762 if (doit) { 763 load_TLS(&task->thread, cpu); 764 load_gs_index(GS_TLS_SEL); 765 } 766 task->thread.gsindex = GS_TLS_SEL; 767 task->thread.gs = 0; 768 } else { 769 task->thread.gsindex = 0; 770 task->thread.gs = addr; 771 if (doit) { 772 load_gs_index(0); 773 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 774 } 775 } 776 put_cpu(); 777 break; 778 case ARCH_SET_FS: 779 /* Not strictly needed for fs, but do it for symmetry 780 with gs */ 781 if (addr >= TASK_SIZE_OF(task)) 782 return -EPERM; 783 cpu = get_cpu(); 784 /* handle small bases via the GDT because that's faster to 785 switch. */ 786 if (addr <= 0xffffffff) { 787 set_32bit_tls(task, FS_TLS, addr); 788 if (doit) { 789 load_TLS(&task->thread, cpu); 790 loadsegment(fs, FS_TLS_SEL); 791 } 792 task->thread.fsindex = FS_TLS_SEL; 793 task->thread.fs = 0; 794 } else { 795 task->thread.fsindex = 0; 796 task->thread.fs = addr; 797 if (doit) { 798 /* set the selector to 0 to not confuse 799 __switch_to */ 800 loadsegment(fs, 0); 801 ret = checking_wrmsrl(MSR_FS_BASE, addr); 802 } 803 } 804 put_cpu(); 805 break; 806 case ARCH_GET_FS: { 807 unsigned long base; 808 if (task->thread.fsindex == FS_TLS_SEL) 809 base = read_32bit_tls(task, FS_TLS); 810 else if (doit) 811 rdmsrl(MSR_FS_BASE, base); 812 else 813 base = task->thread.fs; 814 ret = put_user(base, (unsigned long __user *)addr); 815 break; 816 } 817 case ARCH_GET_GS: { 818 unsigned long base; 819 unsigned gsindex; 820 if (task->thread.gsindex == GS_TLS_SEL) 821 base = read_32bit_tls(task, GS_TLS); 822 else if (doit) { 823 savesegment(gs, gsindex); 824 if (gsindex) 825 rdmsrl(MSR_KERNEL_GS_BASE, base); 826 else 827 base = task->thread.gs; 828 } 829 else 830 base = task->thread.gs; 831 ret = put_user(base, (unsigned long __user *)addr); 832 break; 833 } 834 835 default: 836 ret = -EINVAL; 837 break; 838 } 839 840 return ret; 841 } 842 843 long sys_arch_prctl(int code, unsigned long addr) 844 { 845 return do_arch_prctl(current, code, addr); 846 } 847 848 unsigned long arch_align_stack(unsigned long sp) 849 { 850 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 851 sp -= get_random_int() % 8192; 852 return sp & ~0xf; 853 } 854 855 unsigned long arch_randomize_brk(struct mm_struct *mm) 856 { 857 unsigned long range_end = mm->brk + 0x02000000; 858 return randomize_range(mm->brk, range_end, 0) ? : mm->brk; 859 } 860