xref: /linux/arch/x86/kernel/head_32.S (revision 643d1f7fe3aa12c8bdea6fa5b4ba874ff6dd601d)
1/*
2 *  linux/arch/i386/kernel/head.S -- the 32-bit startup code.
3 *
4 *  Copyright (C) 1991, 1992  Linus Torvalds
5 *
6 *  Enhanced CPU detection and feature setting code by Mike Jagdis
7 *  and Martin Mares, November 1997.
8 */
9
10.text
11#include <linux/threads.h>
12#include <linux/init.h>
13#include <linux/linkage.h>
14#include <asm/segment.h>
15#include <asm/page.h>
16#include <asm/pgtable.h>
17#include <asm/desc.h>
18#include <asm/cache.h>
19#include <asm/thread_info.h>
20#include <asm/asm-offsets.h>
21#include <asm/setup.h>
22
23/*
24 * References to members of the new_cpu_data structure.
25 */
26
27#define X86		new_cpu_data+CPUINFO_x86
28#define X86_VENDOR	new_cpu_data+CPUINFO_x86_vendor
29#define X86_MODEL	new_cpu_data+CPUINFO_x86_model
30#define X86_MASK	new_cpu_data+CPUINFO_x86_mask
31#define X86_HARD_MATH	new_cpu_data+CPUINFO_hard_math
32#define X86_CPUID	new_cpu_data+CPUINFO_cpuid_level
33#define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
34#define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
35
36/*
37 * This is how much memory *in addition to the memory covered up to
38 * and including _end* we need mapped initially.
39 * We need:
40 *  - one bit for each possible page, but only in low memory, which means
41 *     2^32/4096/8 = 128K worst case (4G/4G split.)
42 *  - enough space to map all low memory, which means
43 *     (2^32/4096) / 1024 pages (worst case, non PAE)
44 *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
45 *  - a few pages for allocator use before the kernel pagetable has
46 *     been set up
47 *
48 * Modulo rounding, each megabyte assigned here requires a kilobyte of
49 * memory, which is currently unreclaimed.
50 *
51 * This should be a multiple of a page.
52 */
53LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
54
55/*
56 * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
57 * pagetables from above the 16MB DMA limit, so we'll have to set
58 * up pagetables 16MB more (worst-case):
59 */
60#ifdef CONFIG_DEBUG_PAGEALLOC
61LOW_PAGES = LOW_PAGES + 0x1000000
62#endif
63
64#if PTRS_PER_PMD > 1
65PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
66#else
67PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
68#endif
69BOOTBITMAP_SIZE = LOW_PAGES / 8
70ALLOCATOR_SLOP = 4
71
72INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
73
74/*
75 * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
76 * %esi points to the real-mode code as a 32-bit pointer.
77 * CS and DS must be 4 GB flat segments, but we don't depend on
78 * any particular GDT layout, because we load our own as soon as we
79 * can.
80 */
81.section .text.head,"ax",@progbits
82ENTRY(startup_32)
83	/* check to see if KEEP_SEGMENTS flag is meaningful */
84	cmpw $0x207, BP_version(%esi)
85	jb 1f
86
87	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
88		us to not reload segments */
89	testb $(1<<6), BP_loadflags(%esi)
90	jnz 2f
91
92/*
93 * Set segments to known values.
94 */
951:	lgdt boot_gdt_descr - __PAGE_OFFSET
96	movl $(__BOOT_DS),%eax
97	movl %eax,%ds
98	movl %eax,%es
99	movl %eax,%fs
100	movl %eax,%gs
1012:
102
103/*
104 * Clear BSS first so that there are no surprises...
105 */
106	cld
107	xorl %eax,%eax
108	movl $__bss_start - __PAGE_OFFSET,%edi
109	movl $__bss_stop - __PAGE_OFFSET,%ecx
110	subl %edi,%ecx
111	shrl $2,%ecx
112	rep ; stosl
113/*
114 * Copy bootup parameters out of the way.
115 * Note: %esi still has the pointer to the real-mode data.
116 * With the kexec as boot loader, parameter segment might be loaded beyond
117 * kernel image and might not even be addressable by early boot page tables.
118 * (kexec on panic case). Hence copy out the parameters before initializing
119 * page tables.
120 */
121	movl $(boot_params - __PAGE_OFFSET),%edi
122	movl $(PARAM_SIZE/4),%ecx
123	cld
124	rep
125	movsl
126	movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
127	andl %esi,%esi
128	jz 1f			# No comand line
129	movl $(boot_command_line - __PAGE_OFFSET),%edi
130	movl $(COMMAND_LINE_SIZE/4),%ecx
131	rep
132	movsl
1331:
134
135#ifdef CONFIG_PARAVIRT
136	cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
137	jb default_entry
138
139	/* Paravirt-compatible boot parameters.  Look to see what architecture
140		we're booting under. */
141	movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
142	cmpl $num_subarch_entries, %eax
143	jae bad_subarch
144
145	movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
146	subl $__PAGE_OFFSET, %eax
147	jmp *%eax
148
149bad_subarch:
150WEAK(lguest_entry)
151WEAK(xen_entry)
152	/* Unknown implementation; there's really
153	   nothing we can do at this point. */
154	ud2a
155
156	__INITDATA
157
158subarch_entries:
159	.long default_entry		/* normal x86/PC */
160	.long lguest_entry		/* lguest hypervisor */
161	.long xen_entry			/* Xen hypervisor */
162num_subarch_entries = (. - subarch_entries) / 4
163.previous
164#endif /* CONFIG_PARAVIRT */
165
166/*
167 * Initialize page tables.  This creates a PDE and a set of page
168 * tables, which are located immediately beyond _end.  The variable
169 * init_pg_tables_end is set up to point to the first "safe" location.
170 * Mappings are created both at virtual address 0 (identity mapping)
171 * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
172 *
173 * Warning: don't use %esi or the stack in this code.  However, %esp
174 * can be used as a GPR if you really need it...
175 */
176page_pde_offset = (__PAGE_OFFSET >> 20);
177
178default_entry:
179	movl $(pg0 - __PAGE_OFFSET), %edi
180	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
181	movl $0x007, %eax			/* 0x007 = PRESENT+RW+USER */
18210:
183	leal 0x007(%edi),%ecx			/* Create PDE entry */
184	movl %ecx,(%edx)			/* Store identity PDE entry */
185	movl %ecx,page_pde_offset(%edx)		/* Store kernel PDE entry */
186	addl $4,%edx
187	movl $1024, %ecx
18811:
189	stosl
190	addl $0x1000,%eax
191	loop 11b
192	/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
193	/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
194	leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
195	cmpl %ebp,%eax
196	jb 10b
197	movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
198
199	/* Do an early initialization of the fixmap area */
200	movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
201	movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
202	addl $0x67, %eax			/* 0x67 == _PAGE_TABLE */
203	movl %eax, 4092(%edx)
204
205	jmp 3f
206/*
207 * Non-boot CPU entry point; entered from trampoline.S
208 * We can't lgdt here, because lgdt itself uses a data segment, but
209 * we know the trampoline has already loaded the boot_gdt for us.
210 *
211 * If cpu hotplug is not supported then this code can go in init section
212 * which will be freed later
213 */
214
215#ifndef CONFIG_HOTPLUG_CPU
216.section .init.text,"ax",@progbits
217#endif
218
219#ifdef CONFIG_SMP
220ENTRY(startup_32_smp)
221	cld
222	movl $(__BOOT_DS),%eax
223	movl %eax,%ds
224	movl %eax,%es
225	movl %eax,%fs
226	movl %eax,%gs
227#endif /* CONFIG_SMP */
2283:
229
230/*
231 *	New page tables may be in 4Mbyte page mode and may
232 *	be using the global pages.
233 *
234 *	NOTE! If we are on a 486 we may have no cr4 at all!
235 *	So we do not try to touch it unless we really have
236 *	some bits in it to set.  This won't work if the BSP
237 *	implements cr4 but this AP does not -- very unlikely
238 *	but be warned!  The same applies to the pse feature
239 *	if not equally supported. --macro
240 *
241 *	NOTE! We have to correct for the fact that we're
242 *	not yet offset PAGE_OFFSET..
243 */
244#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
245	movl cr4_bits,%edx
246	andl %edx,%edx
247	jz 6f
248	movl %cr4,%eax		# Turn on paging options (PSE,PAE,..)
249	orl %edx,%eax
250	movl %eax,%cr4
251
252	btl $5, %eax		# check if PAE is enabled
253	jnc 6f
254
255	/* Check if extended functions are implemented */
256	movl $0x80000000, %eax
257	cpuid
258	cmpl $0x80000000, %eax
259	jbe 6f
260	mov $0x80000001, %eax
261	cpuid
262	/* Execute Disable bit supported? */
263	btl $20, %edx
264	jnc 6f
265
266	/* Setup EFER (Extended Feature Enable Register) */
267	movl $0xc0000080, %ecx
268	rdmsr
269
270	btsl $11, %eax
271	/* Make changes effective */
272	wrmsr
273
2746:
275
276/*
277 * Enable paging
278 */
279	movl $swapper_pg_dir-__PAGE_OFFSET,%eax
280	movl %eax,%cr3		/* set the page table pointer.. */
281	movl %cr0,%eax
282	orl $0x80000000,%eax
283	movl %eax,%cr0		/* ..and set paging (PG) bit */
284	ljmp $__BOOT_CS,$1f	/* Clear prefetch and normalize %eip */
2851:
286	/* Set up the stack pointer */
287	lss stack_start,%esp
288
289/*
290 * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
291 * confuse the debugger if this code is traced.
292 * XXX - best to initialize before switching to protected mode.
293 */
294	pushl $0
295	popfl
296
297#ifdef CONFIG_SMP
298	cmpb $0, ready
299	jz  1f				/* Initial CPU cleans BSS */
300	jmp checkCPUtype
3011:
302#endif /* CONFIG_SMP */
303
304/*
305 * start system 32-bit setup. We need to re-do some of the things done
306 * in 16-bit mode for the "real" operations.
307 */
308	call setup_idt
309
310checkCPUtype:
311
312	movl $-1,X86_CPUID		#  -1 for no CPUID initially
313
314/* check if it is 486 or 386. */
315/*
316 * XXX - this does a lot of unnecessary setup.  Alignment checks don't
317 * apply at our cpl of 0 and the stack ought to be aligned already, and
318 * we don't need to preserve eflags.
319 */
320
321	movb $3,X86		# at least 386
322	pushfl			# push EFLAGS
323	popl %eax		# get EFLAGS
324	movl %eax,%ecx		# save original EFLAGS
325	xorl $0x240000,%eax	# flip AC and ID bits in EFLAGS
326	pushl %eax		# copy to EFLAGS
327	popfl			# set EFLAGS
328	pushfl			# get new EFLAGS
329	popl %eax		# put it in eax
330	xorl %ecx,%eax		# change in flags
331	pushl %ecx		# restore original EFLAGS
332	popfl
333	testl $0x40000,%eax	# check if AC bit changed
334	je is386
335
336	movb $4,X86		# at least 486
337	testl $0x200000,%eax	# check if ID bit changed
338	je is486
339
340	/* get vendor info */
341	xorl %eax,%eax			# call CPUID with 0 -> return vendor ID
342	cpuid
343	movl %eax,X86_CPUID		# save CPUID level
344	movl %ebx,X86_VENDOR_ID		# lo 4 chars
345	movl %edx,X86_VENDOR_ID+4	# next 4 chars
346	movl %ecx,X86_VENDOR_ID+8	# last 4 chars
347
348	orl %eax,%eax			# do we have processor info as well?
349	je is486
350
351	movl $1,%eax		# Use the CPUID instruction to get CPU type
352	cpuid
353	movb %al,%cl		# save reg for future use
354	andb $0x0f,%ah		# mask processor family
355	movb %ah,X86
356	andb $0xf0,%al		# mask model
357	shrb $4,%al
358	movb %al,X86_MODEL
359	andb $0x0f,%cl		# mask mask revision
360	movb %cl,X86_MASK
361	movl %edx,X86_CAPABILITY
362
363is486:	movl $0x50022,%ecx	# set AM, WP, NE and MP
364	jmp 2f
365
366is386:	movl $2,%ecx		# set MP
3672:	movl %cr0,%eax
368	andl $0x80000011,%eax	# Save PG,PE,ET
369	orl %ecx,%eax
370	movl %eax,%cr0
371
372	call check_x87
373	lgdt early_gdt_descr
374	lidt idt_descr
375	ljmp $(__KERNEL_CS),$1f
3761:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
377	movl %eax,%ss			# after changing gdt.
378	movl %eax,%fs			# gets reset once there's real percpu
379
380	movl $(__USER_DS),%eax		# DS/ES contains default USER segment
381	movl %eax,%ds
382	movl %eax,%es
383
384	xorl %eax,%eax			# Clear GS and LDT
385	movl %eax,%gs
386	lldt %ax
387
388	cld			# gcc2 wants the direction flag cleared at all times
389	pushl $0		# fake return address for unwinder
390#ifdef CONFIG_SMP
391	movb ready, %cl
392	movb $1, ready
393	cmpb $0,%cl		# the first CPU calls start_kernel
394	je   1f
395	movl $(__KERNEL_PERCPU), %eax
396	movl %eax,%fs		# set this cpu's percpu
397	jmp initialize_secondary # all other CPUs call initialize_secondary
3981:
399#endif /* CONFIG_SMP */
400	jmp start_kernel
401
402/*
403 * We depend on ET to be correct. This checks for 287/387.
404 */
405check_x87:
406	movb $0,X86_HARD_MATH
407	clts
408	fninit
409	fstsw %ax
410	cmpb $0,%al
411	je 1f
412	movl %cr0,%eax		/* no coprocessor: have to set bits */
413	xorl $4,%eax		/* set EM */
414	movl %eax,%cr0
415	ret
416	ALIGN
4171:	movb $1,X86_HARD_MATH
418	.byte 0xDB,0xE4		/* fsetpm for 287, ignored by 387 */
419	ret
420
421/*
422 *  setup_idt
423 *
424 *  sets up a idt with 256 entries pointing to
425 *  ignore_int, interrupt gates. It doesn't actually load
426 *  idt - that can be done only after paging has been enabled
427 *  and the kernel moved to PAGE_OFFSET. Interrupts
428 *  are enabled elsewhere, when we can be relatively
429 *  sure everything is ok.
430 *
431 *  Warning: %esi is live across this function.
432 */
433setup_idt:
434	lea ignore_int,%edx
435	movl $(__KERNEL_CS << 16),%eax
436	movw %dx,%ax		/* selector = 0x0010 = cs */
437	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
438
439	lea idt_table,%edi
440	mov $256,%ecx
441rp_sidt:
442	movl %eax,(%edi)
443	movl %edx,4(%edi)
444	addl $8,%edi
445	dec %ecx
446	jne rp_sidt
447
448.macro	set_early_handler handler,trapno
449	lea \handler,%edx
450	movl $(__KERNEL_CS << 16),%eax
451	movw %dx,%ax
452	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
453	lea idt_table,%edi
454	movl %eax,8*\trapno(%edi)
455	movl %edx,8*\trapno+4(%edi)
456.endm
457
458	set_early_handler handler=early_divide_err,trapno=0
459	set_early_handler handler=early_illegal_opcode,trapno=6
460	set_early_handler handler=early_protection_fault,trapno=13
461	set_early_handler handler=early_page_fault,trapno=14
462
463	ret
464
465early_divide_err:
466	xor %edx,%edx
467	pushl $0	/* fake errcode */
468	jmp early_fault
469
470early_illegal_opcode:
471	movl $6,%edx
472	pushl $0	/* fake errcode */
473	jmp early_fault
474
475early_protection_fault:
476	movl $13,%edx
477	jmp early_fault
478
479early_page_fault:
480	movl $14,%edx
481	jmp early_fault
482
483early_fault:
484	cld
485#ifdef CONFIG_PRINTK
486	pusha
487	movl $(__KERNEL_DS),%eax
488	movl %eax,%ds
489	movl %eax,%es
490	cmpl $2,early_recursion_flag
491	je hlt_loop
492	incl early_recursion_flag
493	movl %cr2,%eax
494	pushl %eax
495	pushl %edx		/* trapno */
496	pushl $fault_msg
497#ifdef CONFIG_EARLY_PRINTK
498	call early_printk
499#else
500	call printk
501#endif
502#endif
503	call dump_stack
504hlt_loop:
505	hlt
506	jmp hlt_loop
507
508/* This is the default interrupt "handler" :-) */
509	ALIGN
510ignore_int:
511	cld
512#ifdef CONFIG_PRINTK
513	pushl %eax
514	pushl %ecx
515	pushl %edx
516	pushl %es
517	pushl %ds
518	movl $(__KERNEL_DS),%eax
519	movl %eax,%ds
520	movl %eax,%es
521	cmpl $2,early_recursion_flag
522	je hlt_loop
523	incl early_recursion_flag
524	pushl 16(%esp)
525	pushl 24(%esp)
526	pushl 32(%esp)
527	pushl 40(%esp)
528	pushl $int_msg
529#ifdef CONFIG_EARLY_PRINTK
530	call early_printk
531#else
532	call printk
533#endif
534	addl $(5*4),%esp
535	popl %ds
536	popl %es
537	popl %edx
538	popl %ecx
539	popl %eax
540#endif
541	iret
542
543.section .text
544/*
545 * Real beginning of normal "text" segment
546 */
547ENTRY(stext)
548ENTRY(_stext)
549
550/*
551 * BSS section
552 */
553.section ".bss.page_aligned","wa"
554	.align PAGE_SIZE_asm
555ENTRY(swapper_pg_dir)
556	.fill 1024,4,0
557ENTRY(swapper_pg_pmd)
558	.fill 1024,4,0
559ENTRY(empty_zero_page)
560	.fill 4096,1,0
561
562/*
563 * This starts the data section.
564 */
565.data
566ENTRY(stack_start)
567	.long init_thread_union+THREAD_SIZE
568	.long __BOOT_DS
569
570ready:	.byte 0
571
572early_recursion_flag:
573	.long 0
574
575int_msg:
576	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
577
578fault_msg:
579	.ascii								\
580/* fault info: */	"BUG: Int %d: CR2 %p\n"				\
581/* pusha regs: */	"     EDI %p  ESI %p  EBP %p  ESP %p\n"		\
582			"     EBX %p  EDX %p  ECX %p  EAX %p\n"		\
583/* fault frame: */	"     err %p  EIP %p   CS %p  flg %p\n"		\
584									\
585			"Stack: %p %p %p %p %p %p %p %p\n"		\
586			"       %p %p %p %p %p %p %p %p\n"		\
587			"       %p %p %p %p %p %p %p %p\n"
588
589#include "../../x86/xen/xen-head.S"
590
591/*
592 * The IDT and GDT 'descriptors' are a strange 48-bit object
593 * only used by the lidt and lgdt instructions. They are not
594 * like usual segment descriptors - they consist of a 16-bit
595 * segment size, and 32-bit linear address value:
596 */
597
598.globl boot_gdt_descr
599.globl idt_descr
600
601	ALIGN
602# early boot GDT descriptor (must use 1:1 address mapping)
603	.word 0				# 32 bit align gdt_desc.address
604boot_gdt_descr:
605	.word __BOOT_DS+7
606	.long boot_gdt - __PAGE_OFFSET
607
608	.word 0				# 32-bit align idt_desc.address
609idt_descr:
610	.word IDT_ENTRIES*8-1		# idt contains 256 entries
611	.long idt_table
612
613# boot GDT descriptor (later on used by CPU#0):
614	.word 0				# 32 bit align gdt_desc.address
615ENTRY(early_gdt_descr)
616	.word GDT_ENTRIES*8-1
617	.long per_cpu__gdt_page		/* Overwritten for secondary CPUs */
618
619/*
620 * The boot_gdt must mirror the equivalent in setup.S and is
621 * used only for booting.
622 */
623	.align L1_CACHE_BYTES
624ENTRY(boot_gdt)
625	.fill GDT_ENTRY_BOOT_CS,8,0
626	.quad 0x00cf9a000000ffff	/* kernel 4GB code at 0x00000000 */
627	.quad 0x00cf92000000ffff	/* kernel 4GB data at 0x00000000 */
628