1/* 2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code. 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * Enhanced CPU detection and feature setting code by Mike Jagdis 7 * and Martin Mares, November 1997. 8 */ 9 10.text 11#include <linux/threads.h> 12#include <linux/init.h> 13#include <linux/linkage.h> 14#include <asm/segment.h> 15#include <asm/page.h> 16#include <asm/pgtable.h> 17#include <asm/desc.h> 18#include <asm/cache.h> 19#include <asm/thread_info.h> 20#include <asm/asm-offsets.h> 21#include <asm/setup.h> 22 23/* 24 * References to members of the new_cpu_data structure. 25 */ 26 27#define X86 new_cpu_data+CPUINFO_x86 28#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor 29#define X86_MODEL new_cpu_data+CPUINFO_x86_model 30#define X86_MASK new_cpu_data+CPUINFO_x86_mask 31#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math 32#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level 33#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability 34#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id 35 36/* 37 * This is how much memory *in addition to the memory covered up to 38 * and including _end* we need mapped initially. 39 * We need: 40 * - one bit for each possible page, but only in low memory, which means 41 * 2^32/4096/8 = 128K worst case (4G/4G split.) 42 * - enough space to map all low memory, which means 43 * (2^32/4096) / 1024 pages (worst case, non PAE) 44 * (2^32/4096) / 512 + 4 pages (worst case for PAE) 45 * - a few pages for allocator use before the kernel pagetable has 46 * been set up 47 * 48 * Modulo rounding, each megabyte assigned here requires a kilobyte of 49 * memory, which is currently unreclaimed. 50 * 51 * This should be a multiple of a page. 52 */ 53LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) 54 55/* 56 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate 57 * pagetables from above the 16MB DMA limit, so we'll have to set 58 * up pagetables 16MB more (worst-case): 59 */ 60#ifdef CONFIG_DEBUG_PAGEALLOC 61LOW_PAGES = LOW_PAGES + 0x1000000 62#endif 63 64#if PTRS_PER_PMD > 1 65PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD 66#else 67PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) 68#endif 69BOOTBITMAP_SIZE = LOW_PAGES / 8 70ALLOCATOR_SLOP = 4 71 72INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm 73 74/* 75 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 76 * %esi points to the real-mode code as a 32-bit pointer. 77 * CS and DS must be 4 GB flat segments, but we don't depend on 78 * any particular GDT layout, because we load our own as soon as we 79 * can. 80 */ 81.section .text.head,"ax",@progbits 82ENTRY(startup_32) 83 /* check to see if KEEP_SEGMENTS flag is meaningful */ 84 cmpw $0x207, BP_version(%esi) 85 jb 1f 86 87 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 88 us to not reload segments */ 89 testb $(1<<6), BP_loadflags(%esi) 90 jnz 2f 91 92/* 93 * Set segments to known values. 94 */ 951: lgdt boot_gdt_descr - __PAGE_OFFSET 96 movl $(__BOOT_DS),%eax 97 movl %eax,%ds 98 movl %eax,%es 99 movl %eax,%fs 100 movl %eax,%gs 1012: 102 103/* 104 * Clear BSS first so that there are no surprises... 105 */ 106 cld 107 xorl %eax,%eax 108 movl $__bss_start - __PAGE_OFFSET,%edi 109 movl $__bss_stop - __PAGE_OFFSET,%ecx 110 subl %edi,%ecx 111 shrl $2,%ecx 112 rep ; stosl 113/* 114 * Copy bootup parameters out of the way. 115 * Note: %esi still has the pointer to the real-mode data. 116 * With the kexec as boot loader, parameter segment might be loaded beyond 117 * kernel image and might not even be addressable by early boot page tables. 118 * (kexec on panic case). Hence copy out the parameters before initializing 119 * page tables. 120 */ 121 movl $(boot_params - __PAGE_OFFSET),%edi 122 movl $(PARAM_SIZE/4),%ecx 123 cld 124 rep 125 movsl 126 movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi 127 andl %esi,%esi 128 jz 1f # No comand line 129 movl $(boot_command_line - __PAGE_OFFSET),%edi 130 movl $(COMMAND_LINE_SIZE/4),%ecx 131 rep 132 movsl 1331: 134 135#ifdef CONFIG_PARAVIRT 136 cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) 137 jb default_entry 138 139 /* Paravirt-compatible boot parameters. Look to see what architecture 140 we're booting under. */ 141 movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax 142 cmpl $num_subarch_entries, %eax 143 jae bad_subarch 144 145 movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax 146 subl $__PAGE_OFFSET, %eax 147 jmp *%eax 148 149bad_subarch: 150WEAK(lguest_entry) 151WEAK(xen_entry) 152 /* Unknown implementation; there's really 153 nothing we can do at this point. */ 154 ud2a 155 156 __INITDATA 157 158subarch_entries: 159 .long default_entry /* normal x86/PC */ 160 .long lguest_entry /* lguest hypervisor */ 161 .long xen_entry /* Xen hypervisor */ 162num_subarch_entries = (. - subarch_entries) / 4 163.previous 164#endif /* CONFIG_PARAVIRT */ 165 166/* 167 * Initialize page tables. This creates a PDE and a set of page 168 * tables, which are located immediately beyond _end. The variable 169 * init_pg_tables_end is set up to point to the first "safe" location. 170 * Mappings are created both at virtual address 0 (identity mapping) 171 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 172 * 173 * Warning: don't use %esi or the stack in this code. However, %esp 174 * can be used as a GPR if you really need it... 175 */ 176page_pde_offset = (__PAGE_OFFSET >> 20); 177 178default_entry: 179 movl $(pg0 - __PAGE_OFFSET), %edi 180 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx 181 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ 18210: 183 leal 0x007(%edi),%ecx /* Create PDE entry */ 184 movl %ecx,(%edx) /* Store identity PDE entry */ 185 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 186 addl $4,%edx 187 movl $1024, %ecx 18811: 189 stosl 190 addl $0x1000,%eax 191 loop 11b 192 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ 193 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ 194 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp 195 cmpl %ebp,%eax 196 jb 10b 197 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) 198 199 /* Do an early initialization of the fixmap area */ 200 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx 201 movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax 202 addl $0x67, %eax /* 0x67 == _PAGE_TABLE */ 203 movl %eax, 4092(%edx) 204 205 jmp 3f 206/* 207 * Non-boot CPU entry point; entered from trampoline.S 208 * We can't lgdt here, because lgdt itself uses a data segment, but 209 * we know the trampoline has already loaded the boot_gdt for us. 210 * 211 * If cpu hotplug is not supported then this code can go in init section 212 * which will be freed later 213 */ 214 215#ifndef CONFIG_HOTPLUG_CPU 216.section .init.text,"ax",@progbits 217#endif 218 219#ifdef CONFIG_SMP 220ENTRY(startup_32_smp) 221 cld 222 movl $(__BOOT_DS),%eax 223 movl %eax,%ds 224 movl %eax,%es 225 movl %eax,%fs 226 movl %eax,%gs 227#endif /* CONFIG_SMP */ 2283: 229 230/* 231 * New page tables may be in 4Mbyte page mode and may 232 * be using the global pages. 233 * 234 * NOTE! If we are on a 486 we may have no cr4 at all! 235 * So we do not try to touch it unless we really have 236 * some bits in it to set. This won't work if the BSP 237 * implements cr4 but this AP does not -- very unlikely 238 * but be warned! The same applies to the pse feature 239 * if not equally supported. --macro 240 * 241 * NOTE! We have to correct for the fact that we're 242 * not yet offset PAGE_OFFSET.. 243 */ 244#define cr4_bits mmu_cr4_features-__PAGE_OFFSET 245 movl cr4_bits,%edx 246 andl %edx,%edx 247 jz 6f 248 movl %cr4,%eax # Turn on paging options (PSE,PAE,..) 249 orl %edx,%eax 250 movl %eax,%cr4 251 252 btl $5, %eax # check if PAE is enabled 253 jnc 6f 254 255 /* Check if extended functions are implemented */ 256 movl $0x80000000, %eax 257 cpuid 258 cmpl $0x80000000, %eax 259 jbe 6f 260 mov $0x80000001, %eax 261 cpuid 262 /* Execute Disable bit supported? */ 263 btl $20, %edx 264 jnc 6f 265 266 /* Setup EFER (Extended Feature Enable Register) */ 267 movl $0xc0000080, %ecx 268 rdmsr 269 270 btsl $11, %eax 271 /* Make changes effective */ 272 wrmsr 273 2746: 275 276/* 277 * Enable paging 278 */ 279 movl $swapper_pg_dir-__PAGE_OFFSET,%eax 280 movl %eax,%cr3 /* set the page table pointer.. */ 281 movl %cr0,%eax 282 orl $0x80000000,%eax 283 movl %eax,%cr0 /* ..and set paging (PG) bit */ 284 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 2851: 286 /* Set up the stack pointer */ 287 lss stack_start,%esp 288 289/* 290 * Initialize eflags. Some BIOS's leave bits like NT set. This would 291 * confuse the debugger if this code is traced. 292 * XXX - best to initialize before switching to protected mode. 293 */ 294 pushl $0 295 popfl 296 297#ifdef CONFIG_SMP 298 cmpb $0, ready 299 jz 1f /* Initial CPU cleans BSS */ 300 jmp checkCPUtype 3011: 302#endif /* CONFIG_SMP */ 303 304/* 305 * start system 32-bit setup. We need to re-do some of the things done 306 * in 16-bit mode for the "real" operations. 307 */ 308 call setup_idt 309 310checkCPUtype: 311 312 movl $-1,X86_CPUID # -1 for no CPUID initially 313 314/* check if it is 486 or 386. */ 315/* 316 * XXX - this does a lot of unnecessary setup. Alignment checks don't 317 * apply at our cpl of 0 and the stack ought to be aligned already, and 318 * we don't need to preserve eflags. 319 */ 320 321 movb $3,X86 # at least 386 322 pushfl # push EFLAGS 323 popl %eax # get EFLAGS 324 movl %eax,%ecx # save original EFLAGS 325 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS 326 pushl %eax # copy to EFLAGS 327 popfl # set EFLAGS 328 pushfl # get new EFLAGS 329 popl %eax # put it in eax 330 xorl %ecx,%eax # change in flags 331 pushl %ecx # restore original EFLAGS 332 popfl 333 testl $0x40000,%eax # check if AC bit changed 334 je is386 335 336 movb $4,X86 # at least 486 337 testl $0x200000,%eax # check if ID bit changed 338 je is486 339 340 /* get vendor info */ 341 xorl %eax,%eax # call CPUID with 0 -> return vendor ID 342 cpuid 343 movl %eax,X86_CPUID # save CPUID level 344 movl %ebx,X86_VENDOR_ID # lo 4 chars 345 movl %edx,X86_VENDOR_ID+4 # next 4 chars 346 movl %ecx,X86_VENDOR_ID+8 # last 4 chars 347 348 orl %eax,%eax # do we have processor info as well? 349 je is486 350 351 movl $1,%eax # Use the CPUID instruction to get CPU type 352 cpuid 353 movb %al,%cl # save reg for future use 354 andb $0x0f,%ah # mask processor family 355 movb %ah,X86 356 andb $0xf0,%al # mask model 357 shrb $4,%al 358 movb %al,X86_MODEL 359 andb $0x0f,%cl # mask mask revision 360 movb %cl,X86_MASK 361 movl %edx,X86_CAPABILITY 362 363is486: movl $0x50022,%ecx # set AM, WP, NE and MP 364 jmp 2f 365 366is386: movl $2,%ecx # set MP 3672: movl %cr0,%eax 368 andl $0x80000011,%eax # Save PG,PE,ET 369 orl %ecx,%eax 370 movl %eax,%cr0 371 372 call check_x87 373 lgdt early_gdt_descr 374 lidt idt_descr 375 ljmp $(__KERNEL_CS),$1f 3761: movl $(__KERNEL_DS),%eax # reload all the segment registers 377 movl %eax,%ss # after changing gdt. 378 movl %eax,%fs # gets reset once there's real percpu 379 380 movl $(__USER_DS),%eax # DS/ES contains default USER segment 381 movl %eax,%ds 382 movl %eax,%es 383 384 xorl %eax,%eax # Clear GS and LDT 385 movl %eax,%gs 386 lldt %ax 387 388 cld # gcc2 wants the direction flag cleared at all times 389 pushl $0 # fake return address for unwinder 390#ifdef CONFIG_SMP 391 movb ready, %cl 392 movb $1, ready 393 cmpb $0,%cl # the first CPU calls start_kernel 394 je 1f 395 movl $(__KERNEL_PERCPU), %eax 396 movl %eax,%fs # set this cpu's percpu 397 jmp initialize_secondary # all other CPUs call initialize_secondary 3981: 399#endif /* CONFIG_SMP */ 400 jmp start_kernel 401 402/* 403 * We depend on ET to be correct. This checks for 287/387. 404 */ 405check_x87: 406 movb $0,X86_HARD_MATH 407 clts 408 fninit 409 fstsw %ax 410 cmpb $0,%al 411 je 1f 412 movl %cr0,%eax /* no coprocessor: have to set bits */ 413 xorl $4,%eax /* set EM */ 414 movl %eax,%cr0 415 ret 416 ALIGN 4171: movb $1,X86_HARD_MATH 418 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 419 ret 420 421/* 422 * setup_idt 423 * 424 * sets up a idt with 256 entries pointing to 425 * ignore_int, interrupt gates. It doesn't actually load 426 * idt - that can be done only after paging has been enabled 427 * and the kernel moved to PAGE_OFFSET. Interrupts 428 * are enabled elsewhere, when we can be relatively 429 * sure everything is ok. 430 * 431 * Warning: %esi is live across this function. 432 */ 433setup_idt: 434 lea ignore_int,%edx 435 movl $(__KERNEL_CS << 16),%eax 436 movw %dx,%ax /* selector = 0x0010 = cs */ 437 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 438 439 lea idt_table,%edi 440 mov $256,%ecx 441rp_sidt: 442 movl %eax,(%edi) 443 movl %edx,4(%edi) 444 addl $8,%edi 445 dec %ecx 446 jne rp_sidt 447 448.macro set_early_handler handler,trapno 449 lea \handler,%edx 450 movl $(__KERNEL_CS << 16),%eax 451 movw %dx,%ax 452 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 453 lea idt_table,%edi 454 movl %eax,8*\trapno(%edi) 455 movl %edx,8*\trapno+4(%edi) 456.endm 457 458 set_early_handler handler=early_divide_err,trapno=0 459 set_early_handler handler=early_illegal_opcode,trapno=6 460 set_early_handler handler=early_protection_fault,trapno=13 461 set_early_handler handler=early_page_fault,trapno=14 462 463 ret 464 465early_divide_err: 466 xor %edx,%edx 467 pushl $0 /* fake errcode */ 468 jmp early_fault 469 470early_illegal_opcode: 471 movl $6,%edx 472 pushl $0 /* fake errcode */ 473 jmp early_fault 474 475early_protection_fault: 476 movl $13,%edx 477 jmp early_fault 478 479early_page_fault: 480 movl $14,%edx 481 jmp early_fault 482 483early_fault: 484 cld 485#ifdef CONFIG_PRINTK 486 pusha 487 movl $(__KERNEL_DS),%eax 488 movl %eax,%ds 489 movl %eax,%es 490 cmpl $2,early_recursion_flag 491 je hlt_loop 492 incl early_recursion_flag 493 movl %cr2,%eax 494 pushl %eax 495 pushl %edx /* trapno */ 496 pushl $fault_msg 497#ifdef CONFIG_EARLY_PRINTK 498 call early_printk 499#else 500 call printk 501#endif 502#endif 503 call dump_stack 504hlt_loop: 505 hlt 506 jmp hlt_loop 507 508/* This is the default interrupt "handler" :-) */ 509 ALIGN 510ignore_int: 511 cld 512#ifdef CONFIG_PRINTK 513 pushl %eax 514 pushl %ecx 515 pushl %edx 516 pushl %es 517 pushl %ds 518 movl $(__KERNEL_DS),%eax 519 movl %eax,%ds 520 movl %eax,%es 521 cmpl $2,early_recursion_flag 522 je hlt_loop 523 incl early_recursion_flag 524 pushl 16(%esp) 525 pushl 24(%esp) 526 pushl 32(%esp) 527 pushl 40(%esp) 528 pushl $int_msg 529#ifdef CONFIG_EARLY_PRINTK 530 call early_printk 531#else 532 call printk 533#endif 534 addl $(5*4),%esp 535 popl %ds 536 popl %es 537 popl %edx 538 popl %ecx 539 popl %eax 540#endif 541 iret 542 543.section .text 544/* 545 * Real beginning of normal "text" segment 546 */ 547ENTRY(stext) 548ENTRY(_stext) 549 550/* 551 * BSS section 552 */ 553.section ".bss.page_aligned","wa" 554 .align PAGE_SIZE_asm 555ENTRY(swapper_pg_dir) 556 .fill 1024,4,0 557ENTRY(swapper_pg_pmd) 558 .fill 1024,4,0 559ENTRY(empty_zero_page) 560 .fill 4096,1,0 561 562/* 563 * This starts the data section. 564 */ 565.data 566ENTRY(stack_start) 567 .long init_thread_union+THREAD_SIZE 568 .long __BOOT_DS 569 570ready: .byte 0 571 572early_recursion_flag: 573 .long 0 574 575int_msg: 576 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 577 578fault_msg: 579 .ascii \ 580/* fault info: */ "BUG: Int %d: CR2 %p\n" \ 581/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ 582 " EBX %p EDX %p ECX %p EAX %p\n" \ 583/* fault frame: */ " err %p EIP %p CS %p flg %p\n" \ 584 \ 585 "Stack: %p %p %p %p %p %p %p %p\n" \ 586 " %p %p %p %p %p %p %p %p\n" \ 587 " %p %p %p %p %p %p %p %p\n" 588 589#include "../../x86/xen/xen-head.S" 590 591/* 592 * The IDT and GDT 'descriptors' are a strange 48-bit object 593 * only used by the lidt and lgdt instructions. They are not 594 * like usual segment descriptors - they consist of a 16-bit 595 * segment size, and 32-bit linear address value: 596 */ 597 598.globl boot_gdt_descr 599.globl idt_descr 600 601 ALIGN 602# early boot GDT descriptor (must use 1:1 address mapping) 603 .word 0 # 32 bit align gdt_desc.address 604boot_gdt_descr: 605 .word __BOOT_DS+7 606 .long boot_gdt - __PAGE_OFFSET 607 608 .word 0 # 32-bit align idt_desc.address 609idt_descr: 610 .word IDT_ENTRIES*8-1 # idt contains 256 entries 611 .long idt_table 612 613# boot GDT descriptor (later on used by CPU#0): 614 .word 0 # 32 bit align gdt_desc.address 615ENTRY(early_gdt_descr) 616 .word GDT_ENTRIES*8-1 617 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 618 619/* 620 * The boot_gdt must mirror the equivalent in setup.S and is 621 * used only for booting. 622 */ 623 .align L1_CACHE_BYTES 624ENTRY(boot_gdt) 625 .fill GDT_ENTRY_BOOT_CS,8,0 626 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ 627 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ 628