1/* 2 * linux/arch/i386/kernel/head.S -- the 32-bit startup code. 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * Enhanced CPU detection and feature setting code by Mike Jagdis 7 * and Martin Mares, November 1997. 8 */ 9 10.text 11#include <linux/threads.h> 12#include <linux/linkage.h> 13#include <asm/segment.h> 14#include <asm/page.h> 15#include <asm/pgtable.h> 16#include <asm/desc.h> 17#include <asm/cache.h> 18#include <asm/thread_info.h> 19#include <asm/asm-offsets.h> 20#include <asm/setup.h> 21 22/* 23 * References to members of the new_cpu_data structure. 24 */ 25 26#define X86 new_cpu_data+CPUINFO_x86 27#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor 28#define X86_MODEL new_cpu_data+CPUINFO_x86_model 29#define X86_MASK new_cpu_data+CPUINFO_x86_mask 30#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math 31#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level 32#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability 33#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id 34 35/* 36 * This is how much memory *in addition to the memory covered up to 37 * and including _end* we need mapped initially. 38 * We need: 39 * - one bit for each possible page, but only in low memory, which means 40 * 2^32/4096/8 = 128K worst case (4G/4G split.) 41 * - enough space to map all low memory, which means 42 * (2^32/4096) / 1024 pages (worst case, non PAE) 43 * (2^32/4096) / 512 + 4 pages (worst case for PAE) 44 * - a few pages for allocator use before the kernel pagetable has 45 * been set up 46 * 47 * Modulo rounding, each megabyte assigned here requires a kilobyte of 48 * memory, which is currently unreclaimed. 49 * 50 * This should be a multiple of a page. 51 */ 52LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) 53 54/* 55 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate 56 * pagetables from above the 16MB DMA limit, so we'll have to set 57 * up pagetables 16MB more (worst-case): 58 */ 59#ifdef CONFIG_DEBUG_PAGEALLOC 60LOW_PAGES = LOW_PAGES + 0x1000000 61#endif 62 63#if PTRS_PER_PMD > 1 64PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD 65#else 66PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) 67#endif 68BOOTBITMAP_SIZE = LOW_PAGES / 8 69ALLOCATOR_SLOP = 4 70 71INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm 72 73/* 74 * 32-bit kernel entrypoint; only used by the boot CPU. On entry, 75 * %esi points to the real-mode code as a 32-bit pointer. 76 * CS and DS must be 4 GB flat segments, but we don't depend on 77 * any particular GDT layout, because we load our own as soon as we 78 * can. 79 */ 80.section .text.head,"ax",@progbits 81ENTRY(startup_32) 82 /* check to see if KEEP_SEGMENTS flag is meaningful */ 83 cmpw $0x207, BP_version(%esi) 84 jb 1f 85 86 /* test KEEP_SEGMENTS flag to see if the bootloader is asking 87 us to not reload segments */ 88 testb $(1<<6), BP_loadflags(%esi) 89 jnz 2f 90 91/* 92 * Set segments to known values. 93 */ 941: lgdt boot_gdt_descr - __PAGE_OFFSET 95 movl $(__BOOT_DS),%eax 96 movl %eax,%ds 97 movl %eax,%es 98 movl %eax,%fs 99 movl %eax,%gs 1002: 101 102/* 103 * Clear BSS first so that there are no surprises... 104 */ 105 cld 106 xorl %eax,%eax 107 movl $__bss_start - __PAGE_OFFSET,%edi 108 movl $__bss_stop - __PAGE_OFFSET,%ecx 109 subl %edi,%ecx 110 shrl $2,%ecx 111 rep ; stosl 112/* 113 * Copy bootup parameters out of the way. 114 * Note: %esi still has the pointer to the real-mode data. 115 * With the kexec as boot loader, parameter segment might be loaded beyond 116 * kernel image and might not even be addressable by early boot page tables. 117 * (kexec on panic case). Hence copy out the parameters before initializing 118 * page tables. 119 */ 120 movl $(boot_params - __PAGE_OFFSET),%edi 121 movl $(PARAM_SIZE/4),%ecx 122 cld 123 rep 124 movsl 125 movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi 126 andl %esi,%esi 127 jz 1f # No comand line 128 movl $(boot_command_line - __PAGE_OFFSET),%edi 129 movl $(COMMAND_LINE_SIZE/4),%ecx 130 rep 131 movsl 1321: 133 134#ifdef CONFIG_PARAVIRT 135 cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET) 136 jb default_entry 137 138 /* Paravirt-compatible boot parameters. Look to see what architecture 139 we're booting under. */ 140 movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax 141 cmpl $num_subarch_entries, %eax 142 jae bad_subarch 143 144 movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax 145 subl $__PAGE_OFFSET, %eax 146 jmp *%eax 147 148bad_subarch: 149WEAK(lguest_entry) 150WEAK(xen_entry) 151 /* Unknown implementation; there's really 152 nothing we can do at this point. */ 153 ud2a 154.data 155subarch_entries: 156 .long default_entry /* normal x86/PC */ 157 .long lguest_entry /* lguest hypervisor */ 158 .long xen_entry /* Xen hypervisor */ 159num_subarch_entries = (. - subarch_entries) / 4 160.previous 161#endif /* CONFIG_PARAVIRT */ 162 163/* 164 * Initialize page tables. This creates a PDE and a set of page 165 * tables, which are located immediately beyond _end. The variable 166 * init_pg_tables_end is set up to point to the first "safe" location. 167 * Mappings are created both at virtual address 0 (identity mapping) 168 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END. 169 * 170 * Warning: don't use %esi or the stack in this code. However, %esp 171 * can be used as a GPR if you really need it... 172 */ 173page_pde_offset = (__PAGE_OFFSET >> 20); 174 175default_entry: 176 movl $(pg0 - __PAGE_OFFSET), %edi 177 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx 178 movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ 17910: 180 leal 0x007(%edi),%ecx /* Create PDE entry */ 181 movl %ecx,(%edx) /* Store identity PDE entry */ 182 movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ 183 addl $4,%edx 184 movl $1024, %ecx 18511: 186 stosl 187 addl $0x1000,%eax 188 loop 11b 189 /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ 190 /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ 191 leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp 192 cmpl %ebp,%eax 193 jb 10b 194 movl %edi,(init_pg_tables_end - __PAGE_OFFSET) 195 196 /* Do an early initialization of the fixmap area */ 197 movl $(swapper_pg_dir - __PAGE_OFFSET), %edx 198 movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax 199 addl $0x67, %eax /* 0x67 == _PAGE_TABLE */ 200 movl %eax, 4092(%edx) 201 202 xorl %ebx,%ebx /* This is the boot CPU (BSP) */ 203 jmp 3f 204/* 205 * Non-boot CPU entry point; entered from trampoline.S 206 * We can't lgdt here, because lgdt itself uses a data segment, but 207 * we know the trampoline has already loaded the boot_gdt for us. 208 * 209 * If cpu hotplug is not supported then this code can go in init section 210 * which will be freed later 211 */ 212 213#ifndef CONFIG_HOTPLUG_CPU 214.section .init.text,"ax",@progbits 215#endif 216 217#ifdef CONFIG_SMP 218ENTRY(startup_32_smp) 219 cld 220 movl $(__BOOT_DS),%eax 221 movl %eax,%ds 222 movl %eax,%es 223 movl %eax,%fs 224 movl %eax,%gs 225 226/* 227 * New page tables may be in 4Mbyte page mode and may 228 * be using the global pages. 229 * 230 * NOTE! If we are on a 486 we may have no cr4 at all! 231 * So we do not try to touch it unless we really have 232 * some bits in it to set. This won't work if the BSP 233 * implements cr4 but this AP does not -- very unlikely 234 * but be warned! The same applies to the pse feature 235 * if not equally supported. --macro 236 * 237 * NOTE! We have to correct for the fact that we're 238 * not yet offset PAGE_OFFSET.. 239 */ 240#define cr4_bits mmu_cr4_features-__PAGE_OFFSET 241 movl cr4_bits,%edx 242 andl %edx,%edx 243 jz 6f 244 movl %cr4,%eax # Turn on paging options (PSE,PAE,..) 245 orl %edx,%eax 246 movl %eax,%cr4 247 248 btl $5, %eax # check if PAE is enabled 249 jnc 6f 250 251 /* Check if extended functions are implemented */ 252 movl $0x80000000, %eax 253 cpuid 254 cmpl $0x80000000, %eax 255 jbe 6f 256 mov $0x80000001, %eax 257 cpuid 258 /* Execute Disable bit supported? */ 259 btl $20, %edx 260 jnc 6f 261 262 /* Setup EFER (Extended Feature Enable Register) */ 263 movl $0xc0000080, %ecx 264 rdmsr 265 266 btsl $11, %eax 267 /* Make changes effective */ 268 wrmsr 269 2706: 271 /* This is a secondary processor (AP) */ 272 xorl %ebx,%ebx 273 incl %ebx 274 275#endif /* CONFIG_SMP */ 2763: 277 278/* 279 * Enable paging 280 */ 281 movl $swapper_pg_dir-__PAGE_OFFSET,%eax 282 movl %eax,%cr3 /* set the page table pointer.. */ 283 movl %cr0,%eax 284 orl $0x80000000,%eax 285 movl %eax,%cr0 /* ..and set paging (PG) bit */ 286 ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 2871: 288 /* Set up the stack pointer */ 289 lss stack_start,%esp 290 291/* 292 * Initialize eflags. Some BIOS's leave bits like NT set. This would 293 * confuse the debugger if this code is traced. 294 * XXX - best to initialize before switching to protected mode. 295 */ 296 pushl $0 297 popfl 298 299#ifdef CONFIG_SMP 300 andl %ebx,%ebx 301 jz 1f /* Initial CPU cleans BSS */ 302 jmp checkCPUtype 3031: 304#endif /* CONFIG_SMP */ 305 306/* 307 * start system 32-bit setup. We need to re-do some of the things done 308 * in 16-bit mode for the "real" operations. 309 */ 310 call setup_idt 311 312checkCPUtype: 313 314 movl $-1,X86_CPUID # -1 for no CPUID initially 315 316/* check if it is 486 or 386. */ 317/* 318 * XXX - this does a lot of unnecessary setup. Alignment checks don't 319 * apply at our cpl of 0 and the stack ought to be aligned already, and 320 * we don't need to preserve eflags. 321 */ 322 323 movb $3,X86 # at least 386 324 pushfl # push EFLAGS 325 popl %eax # get EFLAGS 326 movl %eax,%ecx # save original EFLAGS 327 xorl $0x240000,%eax # flip AC and ID bits in EFLAGS 328 pushl %eax # copy to EFLAGS 329 popfl # set EFLAGS 330 pushfl # get new EFLAGS 331 popl %eax # put it in eax 332 xorl %ecx,%eax # change in flags 333 pushl %ecx # restore original EFLAGS 334 popfl 335 testl $0x40000,%eax # check if AC bit changed 336 je is386 337 338 movb $4,X86 # at least 486 339 testl $0x200000,%eax # check if ID bit changed 340 je is486 341 342 /* get vendor info */ 343 xorl %eax,%eax # call CPUID with 0 -> return vendor ID 344 cpuid 345 movl %eax,X86_CPUID # save CPUID level 346 movl %ebx,X86_VENDOR_ID # lo 4 chars 347 movl %edx,X86_VENDOR_ID+4 # next 4 chars 348 movl %ecx,X86_VENDOR_ID+8 # last 4 chars 349 350 orl %eax,%eax # do we have processor info as well? 351 je is486 352 353 movl $1,%eax # Use the CPUID instruction to get CPU type 354 cpuid 355 movb %al,%cl # save reg for future use 356 andb $0x0f,%ah # mask processor family 357 movb %ah,X86 358 andb $0xf0,%al # mask model 359 shrb $4,%al 360 movb %al,X86_MODEL 361 andb $0x0f,%cl # mask mask revision 362 movb %cl,X86_MASK 363 movl %edx,X86_CAPABILITY 364 365is486: movl $0x50022,%ecx # set AM, WP, NE and MP 366 jmp 2f 367 368is386: movl $2,%ecx # set MP 3692: movl %cr0,%eax 370 andl $0x80000011,%eax # Save PG,PE,ET 371 orl %ecx,%eax 372 movl %eax,%cr0 373 374 call check_x87 375 lgdt early_gdt_descr 376 lidt idt_descr 377 ljmp $(__KERNEL_CS),$1f 3781: movl $(__KERNEL_DS),%eax # reload all the segment registers 379 movl %eax,%ss # after changing gdt. 380 movl %eax,%fs # gets reset once there's real percpu 381 382 movl $(__USER_DS),%eax # DS/ES contains default USER segment 383 movl %eax,%ds 384 movl %eax,%es 385 386 xorl %eax,%eax # Clear GS and LDT 387 movl %eax,%gs 388 lldt %ax 389 390 cld # gcc2 wants the direction flag cleared at all times 391 pushl $0 # fake return address for unwinder 392#ifdef CONFIG_SMP 393 movb ready, %cl 394 movb $1, ready 395 cmpb $0,%cl # the first CPU calls start_kernel 396 je 1f 397 movl $(__KERNEL_PERCPU), %eax 398 movl %eax,%fs # set this cpu's percpu 399 jmp initialize_secondary # all other CPUs call initialize_secondary 4001: 401#endif /* CONFIG_SMP */ 402 jmp start_kernel 403 404/* 405 * We depend on ET to be correct. This checks for 287/387. 406 */ 407check_x87: 408 movb $0,X86_HARD_MATH 409 clts 410 fninit 411 fstsw %ax 412 cmpb $0,%al 413 je 1f 414 movl %cr0,%eax /* no coprocessor: have to set bits */ 415 xorl $4,%eax /* set EM */ 416 movl %eax,%cr0 417 ret 418 ALIGN 4191: movb $1,X86_HARD_MATH 420 .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ 421 ret 422 423/* 424 * setup_idt 425 * 426 * sets up a idt with 256 entries pointing to 427 * ignore_int, interrupt gates. It doesn't actually load 428 * idt - that can be done only after paging has been enabled 429 * and the kernel moved to PAGE_OFFSET. Interrupts 430 * are enabled elsewhere, when we can be relatively 431 * sure everything is ok. 432 * 433 * Warning: %esi is live across this function. 434 */ 435setup_idt: 436 lea ignore_int,%edx 437 movl $(__KERNEL_CS << 16),%eax 438 movw %dx,%ax /* selector = 0x0010 = cs */ 439 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 440 441 lea idt_table,%edi 442 mov $256,%ecx 443rp_sidt: 444 movl %eax,(%edi) 445 movl %edx,4(%edi) 446 addl $8,%edi 447 dec %ecx 448 jne rp_sidt 449 450.macro set_early_handler handler,trapno 451 lea \handler,%edx 452 movl $(__KERNEL_CS << 16),%eax 453 movw %dx,%ax 454 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ 455 lea idt_table,%edi 456 movl %eax,8*\trapno(%edi) 457 movl %edx,8*\trapno+4(%edi) 458.endm 459 460 set_early_handler handler=early_divide_err,trapno=0 461 set_early_handler handler=early_illegal_opcode,trapno=6 462 set_early_handler handler=early_protection_fault,trapno=13 463 set_early_handler handler=early_page_fault,trapno=14 464 465 ret 466 467early_divide_err: 468 xor %edx,%edx 469 pushl $0 /* fake errcode */ 470 jmp early_fault 471 472early_illegal_opcode: 473 movl $6,%edx 474 pushl $0 /* fake errcode */ 475 jmp early_fault 476 477early_protection_fault: 478 movl $13,%edx 479 jmp early_fault 480 481early_page_fault: 482 movl $14,%edx 483 jmp early_fault 484 485early_fault: 486 cld 487#ifdef CONFIG_PRINTK 488 pusha 489 movl $(__KERNEL_DS),%eax 490 movl %eax,%ds 491 movl %eax,%es 492 cmpl $2,early_recursion_flag 493 je hlt_loop 494 incl early_recursion_flag 495 movl %cr2,%eax 496 pushl %eax 497 pushl %edx /* trapno */ 498 pushl $fault_msg 499#ifdef CONFIG_EARLY_PRINTK 500 call early_printk 501#else 502 call printk 503#endif 504#endif 505hlt_loop: 506 hlt 507 jmp hlt_loop 508 509/* This is the default interrupt "handler" :-) */ 510 ALIGN 511ignore_int: 512 cld 513#ifdef CONFIG_PRINTK 514 pushl %eax 515 pushl %ecx 516 pushl %edx 517 pushl %es 518 pushl %ds 519 movl $(__KERNEL_DS),%eax 520 movl %eax,%ds 521 movl %eax,%es 522 cmpl $2,early_recursion_flag 523 je hlt_loop 524 incl early_recursion_flag 525 pushl 16(%esp) 526 pushl 24(%esp) 527 pushl 32(%esp) 528 pushl 40(%esp) 529 pushl $int_msg 530#ifdef CONFIG_EARLY_PRINTK 531 call early_printk 532#else 533 call printk 534#endif 535 addl $(5*4),%esp 536 popl %ds 537 popl %es 538 popl %edx 539 popl %ecx 540 popl %eax 541#endif 542 iret 543 544.section .text 545/* 546 * Real beginning of normal "text" segment 547 */ 548ENTRY(stext) 549ENTRY(_stext) 550 551/* 552 * BSS section 553 */ 554.section ".bss.page_aligned","wa" 555 .align PAGE_SIZE_asm 556ENTRY(swapper_pg_dir) 557 .fill 1024,4,0 558ENTRY(swapper_pg_pmd) 559 .fill 1024,4,0 560ENTRY(empty_zero_page) 561 .fill 4096,1,0 562 563/* 564 * This starts the data section. 565 */ 566.data 567ENTRY(stack_start) 568 .long init_thread_union+THREAD_SIZE 569 .long __BOOT_DS 570 571ready: .byte 0 572 573early_recursion_flag: 574 .long 0 575 576int_msg: 577 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 578 579fault_msg: 580 .ascii \ 581/* fault info: */ "BUG: Int %d: CR2 %p\n" \ 582/* pusha regs: */ " EDI %p ESI %p EBP %p ESP %p\n" \ 583 " EBX %p EDX %p ECX %p EAX %p\n" \ 584/* fault frame: */ " err %p EIP %p CS %p flg %p\n" \ 585 \ 586 "Stack: %p %p %p %p %p %p %p %p\n" \ 587 " %p %p %p %p %p %p %p %p\n" \ 588 " %p %p %p %p %p %p %p %p\n" 589 590#include "../../x86/xen/xen-head.S" 591 592/* 593 * The IDT and GDT 'descriptors' are a strange 48-bit object 594 * only used by the lidt and lgdt instructions. They are not 595 * like usual segment descriptors - they consist of a 16-bit 596 * segment size, and 32-bit linear address value: 597 */ 598 599.globl boot_gdt_descr 600.globl idt_descr 601 602 ALIGN 603# early boot GDT descriptor (must use 1:1 address mapping) 604 .word 0 # 32 bit align gdt_desc.address 605boot_gdt_descr: 606 .word __BOOT_DS+7 607 .long boot_gdt - __PAGE_OFFSET 608 609 .word 0 # 32-bit align idt_desc.address 610idt_descr: 611 .word IDT_ENTRIES*8-1 # idt contains 256 entries 612 .long idt_table 613 614# boot GDT descriptor (later on used by CPU#0): 615 .word 0 # 32 bit align gdt_desc.address 616ENTRY(early_gdt_descr) 617 .word GDT_ENTRIES*8-1 618 .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ 619 620/* 621 * The boot_gdt must mirror the equivalent in setup.S and is 622 * used only for booting. 623 */ 624 .align L1_CACHE_BYTES 625ENTRY(boot_gdt) 626 .fill GDT_ENTRY_BOOT_CS,8,0 627 .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ 628 .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ 629