/*-
 * Copyright (C) 2007-2009 Semihalf, Rafal Jaworowski <raj@semihalf.com>
 * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
 * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * $FreeBSD$
 */

#include "assym.inc"

#include "opt_hwpmc_hooks.h"

#include <machine/asm.h>
#include <machine/hid.h>
#include <machine/param.h>
#include <machine/spr.h>
#include <machine/pte.h>
#include <machine/trap.h>
#include <machine/vmparam.h>
#include <machine/tlb.h>

#ifdef _CALL_ELF
.abiversion _CALL_ELF
#endif

#define TMPSTACKSZ	16384

#ifdef __powerpc64__
#define GET_TOCBASE(r)  \
	mfspr	r, SPR_SPRG8
#define	TOC_RESTORE	nop
#define	CMPI	cmpdi
#define	CMPL	cmpld
#define	LOAD	ld
#define	LOADX	ldarx
#define	STORE	std
#define	STOREX	stdcx.
#define	STU	stdu
#define	CALLSIZE	48
#define	REDZONE		288
#define	THREAD_REG	%r13
#define	ADDR(x)	\
	.llong	x
#define	WORD_SIZE	8
#else
#define	GET_TOCBASE(r)
#define	TOC_RESTORE
#define	CMPI	cmpwi
#define	CMPL	cmplw
#define	LOAD	lwz
#define	LOADX	lwarx
#define	STOREX	stwcx.
#define	STORE	stw
#define	STU	stwu
#define	CALLSIZE	8
#define	REDZONE		0
#define	THREAD_REG	%r2
#define	ADDR(x)	\
	.long	x
#define	WORD_SIZE	4
#endif

#ifdef __powerpc64__
	/* Placate lld by creating a kboot stub. */
        .section ".text.kboot", "x", @progbits
        b __start
#endif

	.text
	.globl	btext
btext:

/*
 * This symbol is here for the benefit of kvm_mkdb, and is supposed to
 * mark the start of kernel text.
 */
	.globl	kernel_text
kernel_text:

/*
 * Startup entry.  Note, this must be the first thing in the text segment!
 */
	.text
	.globl	__start
__start:

/*
 * Assumptions on the boot loader:
 *  - System memory starts from physical address 0
 *  - It's mapped by a single TLB1 entry
 *  - TLB1 mapping is 1:1 pa to va
 *  - Kernel is loaded at 64MB boundary
 *  - All PID registers are set to the same value
 *  - CPU is running in AS=0
 *
 * Registers contents provided by the loader(8):
 *	r1	: stack pointer
 *	r3	: metadata pointer
 *
 * We rearrange the TLB1 layout as follows:
 *  - Find TLB1 entry we started in
 *  - Make sure it's protected, invalidate other entries
 *  - Create temp entry in the second AS (make sure it's not TLB[1])
 *  - Switch to temp mapping
 *  - Map 64MB of RAM in TLB1[1]
 *  - Use AS=0, set EPN to VM_MIN_KERNEL_ADDRESS and RPN to kernel load address
 *  - Switch to TLB1[1] mapping
 *  - Invalidate temp mapping
 *
 * locore registers use:
 *	r1	: stack pointer
 *	r2	: trace pointer (AP only, for early diagnostics)
 *	r3-r27	: scratch registers
 *	r28	: temp TLB1 entry
 *	r29	: initial TLB1 entry we started in
 *	r30-r31	: arguments (metadata pointer)
 */

/*
 * Keep arguments in r30 & r31 for later use.
 */
	mr	%r30, %r3
	mr	%r31, %r4

/*
 * Initial cleanup
 */
	li	%r3, PSL_DE	/* Keep debug exceptions for CodeWarrior. */
#ifdef __powerpc64__
	oris	%r3, %r3, PSL_CM@h
#endif
	mtmsr	%r3
	isync

/*
 * Initial HIDs configuration
 */
1:
	mfpvr	%r3
	rlwinm	%r3, %r3, 16, 16, 31

	lis	%r4, HID0_E500_DEFAULT_SET@h
	ori	%r4, %r4, HID0_E500_DEFAULT_SET@l

	/* Check for e500mc and e5500 */
	cmpli	0, 0, %r3, FSL_E500mc
	bne	2f

	lis	%r4, HID0_E500MC_DEFAULT_SET@h
	ori	%r4, %r4, HID0_E500MC_DEFAULT_SET@l
	b	3f
2:
	cmpli	0, 0, %r3, FSL_E5500
	bne	3f

	lis	%r4, HID0_E5500_DEFAULT_SET@h
	ori	%r4, %r4, HID0_E5500_DEFAULT_SET@l

3:
	mtspr	SPR_HID0, %r4
	isync

/*
 * E500mc and E5500 do not have HID1 register, so skip HID1 setup on
 * this core.
 */
	cmpli	0, 0, %r3, FSL_E500mc
	beq	1f
	cmpli	0, 0, %r3, FSL_E5500
	beq	1f
	cmpli	0, 0, %r3, FSL_E6500
	beq	1f

	lis	%r3, HID1_E500_DEFAULT_SET@h
	ori	%r3, %r3, HID1_E500_DEFAULT_SET@l
	mtspr	SPR_HID1, %r3
	isync
1:
	/* Invalidate all entries in TLB0 */
	li	%r3, 0
	bl	tlb_inval_all

	cmpwi	%r30, 0
	beq	done_mapping

/*
 * Locate the TLB1 entry that maps this code
 */
	bl	1f
1:	mflr	%r3
	bl	tlb1_find_current	/* the entry found is returned in r29 */

	bl	tlb1_inval_all_but_current

/*
 * Create temporary mapping in AS=1 and switch to it
 */
	bl	tlb1_temp_mapping_as1

	mfmsr	%r3
	ori	%r3, %r3, (PSL_IS | PSL_DS)
	bl	2f
2:	mflr	%r4
	addi	%r4, %r4, (3f - 2b)
	mtspr	SPR_SRR0, %r4
	mtspr	SPR_SRR1, %r3
	rfi				/* Switch context */

/*
 * Invalidate initial entry
 */
3:
	mr	%r3, %r29
	bl	tlb1_inval_entry

/*
 * Setup final mapping in TLB1[1] and switch to it
 */
	/* Final kernel mapping, map in 64 MB of RAM */
	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
	li	%r4, 0			/* Entry 0 */
	rlwimi	%r3, %r4, 16, 10, 15
	mtspr	SPR_MAS0, %r3
	isync

	li	%r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
	oris	%r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
	isync

	LOAD_ADDR(%r3, VM_MIN_KERNEL_ADDRESS)
	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
	mtspr	SPR_MAS2, %r3
	isync

	/* Discover phys load address */
	bl	3f
3:	mflr	%r4			/* Use current address */
	rlwinm	%r4, %r4, 0, 0, 5	/* 64MB alignment mask */
	ori	%r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l
	mtspr	SPR_MAS3, %r4		/* Set RPN and protection */
	isync
	li	%r4, 0
	mtspr	SPR_MAS7, %r4
	isync
	tlbwe
	isync
	msync

	/* Switch to the above TLB1[1] mapping */
	bl	4f
4:	mflr	%r4
#ifdef __powerpc64__
	clrldi	%r4, %r4, 38
	clrrdi	%r3, %r3, 12
#else
	rlwinm	%r4, %r4, 0, 6, 31	/* Current offset from kernel load address */
	rlwinm	%r3, %r3, 0, 0, 19
#endif
	add	%r4, %r4, %r3		/* Convert to kernel virtual address */
	addi	%r4, %r4, (5f - 4b)
	li	%r3, PSL_DE		/* Note AS=0 */
#ifdef __powerpc64__
	oris	%r3, %r3, PSL_CM@h
#endif
	mtspr   SPR_SRR0, %r4
	mtspr   SPR_SRR1, %r3
	rfi

/*
 * Invalidate temp mapping
 */
5:
	mr	%r3, %r28
	bl	tlb1_inval_entry

done_mapping:

#ifdef __powerpc64__
	/* Set up the TOC pointer */
	b	0f
	.align 3
0:	nop
	bl	1f
	.llong	__tocbase + 0x8000 - .
1:	mflr	%r2
	ld	%r1,0(%r2)
	add	%r2,%r1,%r2
	mtspr	SPR_SPRG8, %r2
	nop

	/* Get load offset */
	ld	%r31,-0x8000(%r2) /* First TOC entry is TOC base */
	subf    %r31,%r31,%r2	/* Subtract from real TOC base to get base */

	/* Set up the stack pointer */
	bl	1f
	.llong	tmpstack + TMPSTACKSZ - 96 - .
1:	mflr	%r3
	ld	%r1,0(%r3)
	add	%r1,%r1,%r3
/*
 * Relocate kernel
 */
	bl	1f
	.llong _DYNAMIC-.
1:	mflr	%r3
	ld	%r4,0(%r3)
	add	%r3,%r4,%r3
	mr	%r4,%r31
#else
/*
 * Setup a temporary stack
 */
	bl	1f
	.long tmpstack-.
1:	mflr	%r1
	lwz	%r2,0(%r1)
	add	%r1,%r1,%r2
	addi	%r1, %r1, (TMPSTACKSZ - 16)

/*
 * Relocate kernel
 */
	bl      1f
	.long   _DYNAMIC-.
	.long   _GLOBAL_OFFSET_TABLE_-.
1:	mflr    %r5
	lwz	%r3,0(%r5)	/* _DYNAMIC in %r3 */
	add	%r3,%r3,%r5
	lwz	%r4,4(%r5)	/* GOT pointer */
	add	%r4,%r4,%r5
	lwz	%r4,4(%r4)	/* got[0] is _DYNAMIC link addr */
	subf	%r4,%r4,%r3	/* subtract to calculate relocbase */
#endif
	bl	CNAME(elf_reloc_self)
	TOC_RESTORE

/*
 * Initialise exception vector offsets
 */
	bl	CNAME(ivor_setup)
	TOC_RESTORE

/*
 * Set up arguments and jump to system initialization code
 */
	mr	%r3, %r30
	mr	%r4, %r31

	/* Prepare core */
	bl	CNAME(booke_init)
	TOC_RESTORE

	/* Switch to thread0.td_kstack now */
	mr	%r1, %r3
	li	%r3, 0
	STORE	%r3, 0(%r1)

	/* Machine independet part, does not return */
	bl	CNAME(mi_startup)
	TOC_RESTORE
	/* NOT REACHED */
5:	b	5b


#ifdef SMP
/************************************************************************/
/* AP Boot page */
/************************************************************************/
	.text
	.globl	__boot_page
	.align	12
__boot_page:
	/*
	 * The boot page is a special page of memory used during AP bringup.
	 * Before the AP comes out of reset, the physical 4K page holding this
	 * code is arranged to be mapped at 0xfffff000 by use of
	 * platform-dependent registers.
	 *
	 * Alternatively, this page may be executed using an ePAPR-standardized
	 * method -- writing to the address specified in "cpu-release-addr".
	 *
	 * In either case, execution begins at the last instruction of the
	 * page, which is a branch back to the start of the page.
	 *
	 * The code in the page must do initial MMU setup and normalize the
	 * TLBs for regular operation in the correct address space before
	 * reading outside the page.
	 *
	 * This implementation accomplishes this by:
	 * 1) Wiping TLB0 and all TLB1 entries but the one currently in use.
	 * 2) Establishing a temporary 4K TLB1 mapping in AS=1, and switching
	 *    to it with rfi. This entry must NOT be in TLB1 slot 0.
	 *    (This is needed to give the code freedom to clean up AS=0.)
	 * 3) Removing the initial TLB1 entry, leaving us with a single valid
	 *    TLB1 entry, NOT in slot 0.
	 * 4) Installing an AS0 entry in TLB1 slot 0 mapping the 64MB kernel
	 *    segment at its final virtual address. A second rfi is done to
	 *    switch to the final address space. At this point we can finally
	 *    access the rest of the kernel segment safely.
	 * 5) The temporary TLB1 AS=1 entry is removed, finally leaving us in
	 *    a consistent (but minimal) state.
	 * 6) Set up TOC, stack, and pcpu registers.
	 * 7) Now that we can finally call C code, call pmap_boostrap_ap(),
	 *    which finishes copying in the shared TLB1 entries.
	 *
	 * At this point, the MMU is fully set up, and we can proceed with
	 * running the actual AP bootstrap code.
	 *
	 * Pieces of this code are also used for UP kernel, but in this case
	 * the sections specific to boot page functionality are dropped by
	 * the preprocessor.
	 */
#ifdef __powerpc64__
	nop			/* PPC64 alignment word. 64-bit target. */
#endif
	bl	1f		/* 32-bit target. */

	.globl	bp_trace
bp_trace:
	ADDR(0)			/* Trace pointer (%r31). */

	.globl	bp_kernload
bp_kernload:
	.llong 0		/* Kern phys. load address. */

	.globl	bp_virtaddr
bp_virtaddr:
	ADDR(0)			/* Virt. address of __boot_page. */

/*
 * Initial configuration
 */
1:
	mflr    %r31		/* r31 hold the address of bp_trace */

	/* Set HIDs */
	mfpvr	%r3
	rlwinm	%r3, %r3, 16, 16, 31

	/* HID0 for E500 is default */
	lis	%r4, HID0_E500_DEFAULT_SET@h
	ori	%r4, %r4, HID0_E500_DEFAULT_SET@l

	cmpli	0, 0, %r3, FSL_E500mc
	bne	2f
	lis	%r4, HID0_E500MC_DEFAULT_SET@h
	ori	%r4, %r4, HID0_E500MC_DEFAULT_SET@l
	b	3f
2:
	cmpli	0, 0, %r3, FSL_E5500
	bne	3f
	lis	%r4, HID0_E5500_DEFAULT_SET@h
	ori	%r4, %r4, HID0_E5500_DEFAULT_SET@l
3:
	mtspr	SPR_HID0, %r4
	isync

	/* Enable branch prediction */
	li	%r3, BUCSR_BPEN
	mtspr	SPR_BUCSR, %r3
	isync

	/* Invalidate all entries in TLB0 */
	li	%r3, 0
	bl	tlb_inval_all

/*
 * Find TLB1 entry which is translating us now
 */
	bl	2f
2:	mflr	%r3
	bl	tlb1_find_current	/* the entry number found is in r29 */

	bl	tlb1_inval_all_but_current

/*
 * Create temporary translation in AS=1 and switch to it
 */

	bl	tlb1_temp_mapping_as1

	mfmsr	%r3
	ori	%r3, %r3, (PSL_IS | PSL_DS)
#ifdef __powerpc64__
	oris	%r3, %r3, PSL_CM@h	/* Ensure we're in 64-bit after RFI */
#endif
	bl	3f
3:	mflr	%r4
	addi	%r4, %r4, (4f - 3b)
	mtspr	SPR_SRR0, %r4
	mtspr	SPR_SRR1, %r3
	rfi				/* Switch context */

/*
 * Invalidate initial entry
 */
4:
	mr	%r3, %r29
	bl	tlb1_inval_entry

/*
 * Setup final mapping in TLB1[0] and switch to it
 */
	/* Final kernel mapping, map in 64 MB of RAM */
	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
	li	%r4, 0			/* Entry 0 */
	rlwimi	%r3, %r4, 16, 4, 15
	mtspr	SPR_MAS0, %r3
	isync

	li	%r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l
	oris	%r3, %r3, (MAS1_VALID | MAS1_IPROT)@h
	mtspr	SPR_MAS1, %r3		/* note TS was not filled, so it's TS=0 */
	isync

	LOAD_ADDR(%r3, VM_MIN_KERNEL_ADDRESS)
	ori	%r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */
	mtspr	SPR_MAS2, %r3
	isync

	/* Retrieve kernel load [physical] address from bp_kernload */
5:
	mflr	%r3
#ifdef __powerpc64__
	clrrdi	%r3, %r3, PAGE_SHIFT	/* trunc_page(%r3) */
#else
	clrrwi	%r3, %r3, PAGE_SHIFT	/* trunc_page(%r3) */
#endif
	/* Load lower half of the kernel loadaddr. */
	lwz	%r4, (bp_kernload - __boot_page + 4)(%r3)
	LOAD	%r5, (bp_virtaddr - __boot_page)(%r3)

	/* Set RPN and protection */
	ori	%r4, %r4, (MAS3_SX | MAS3_SW | MAS3_SR)@l
	mtspr	SPR_MAS3, %r4
	isync
	lwz	%r4, (bp_kernload - __boot_page)(%r3)
	mtspr	SPR_MAS7, %r4
	isync
	tlbwe
	isync
	msync

	/* Switch to the final mapping */
	bl	6f
6:	mflr	%r3
	rlwinm	%r3, %r3, 0, 0xfff	/* Offset from boot page start */
	add	%r3, %r3, %r5		/* Make this a virtual address */
	addi	%r3, %r3, (7f - 6b)	/* And figure out return address. */
#ifdef __powerpc64__
	lis	%r4, PSL_CM@h		/* Note AS=0 */
#else
	li	%r4, 0			/* Note AS=0 */
#endif
	mtspr	SPR_SRR0, %r3
	mtspr	SPR_SRR1, %r4
	rfi
7:

/*
 * At this point we're running at virtual addresses VM_MIN_KERNEL_ADDRESS and
 * beyond so it's allowed to directly access all locations the kernel was linked
 * against.
 */

/*
 * Invalidate temp mapping
 */
	mr	%r3, %r28
	bl	tlb1_inval_entry

#ifdef __powerpc64__
	/* Set up the TOC pointer */
	b	0f
	.align 3
0:	nop
	bl	1f
	.llong	__tocbase + 0x8000 - .
1:	mflr	%r2
	ld	%r1,0(%r2)
	add	%r2,%r1,%r2
	mtspr	SPR_SPRG8, %r2

	/* Set up the stack pointer */
	addis	%r1,%r2,TOC_REF(tmpstack)@ha
	ld	%r1,TOC_REF(tmpstack)@l(%r1)
	addi	%r1,%r1,TMPSTACKSZ-96
#else
/*
 * Setup a temporary stack
 */
	bl	1f
	.long tmpstack-.
1:	mflr	%r1
	lwz	%r2,0(%r1)
	add	%r1,%r1,%r2
	stw	%r1, 0(%r1)
	addi	%r1, %r1, (TMPSTACKSZ - 16)
#endif

/*
 * Initialise exception vector offsets
 */
	bl	CNAME(ivor_setup)
	TOC_RESTORE

	/*
	 * Assign our pcpu instance
	 */
	bl	1f
	.long ap_pcpu-.
1:	mflr	%r4
	lwz	%r3, 0(%r4)
	add	%r3, %r3, %r4
	LOAD	%r3, 0(%r3)
	mtsprg0	%r3

	bl	CNAME(pmap_bootstrap_ap)
	TOC_RESTORE

	bl	CNAME(cpudep_ap_bootstrap)
	TOC_RESTORE
	/* Switch to the idle thread's kstack */
	mr	%r1, %r3
	
	bl	CNAME(machdep_ap_bootstrap)
	TOC_RESTORE

	/* NOT REACHED */
6:	b	6b
#endif /* SMP */

#if defined (BOOKE_E500)
/*
 * Invalidate all entries in the given TLB.
 *
 * r3	TLBSEL
 */
tlb_inval_all:
	rlwinm	%r3, %r3, 3, (1 << 3)	/* TLBSEL */
	ori	%r3, %r3, (1 << 2)	/* INVALL */
	tlbivax	0, %r3
	isync
	msync

	tlbsync
	msync
	blr

/*
 * expects address to look up in r3, returns entry number in r29
 *
 * FIXME: the hidden assumption is we are now running in AS=0, but we should
 * retrieve actual AS from MSR[IS|DS] and put it in MAS6[SAS]
 */
tlb1_find_current:
	mfspr	%r17, SPR_PID0
	slwi	%r17, %r17, MAS6_SPID0_SHIFT
	mtspr	SPR_MAS6, %r17
	isync
	tlbsx	0, %r3
	mfspr	%r17, SPR_MAS0
	rlwinm	%r29, %r17, 16, 26, 31		/* MAS0[ESEL] -> r29 */

	/* Make sure we have IPROT set on the entry */
	mfspr	%r17, SPR_MAS1
	oris	%r17, %r17, MAS1_IPROT@h
	mtspr	SPR_MAS1, %r17
	isync
	tlbwe
	isync
	msync
	blr

/*
 * Invalidates a single entry in TLB1.
 *
 * r3		ESEL
 * r4-r5	scratched
 */
tlb1_inval_entry:
	lis	%r4, MAS0_TLBSEL1@h	/* Select TLB1 */
	rlwimi	%r4, %r3, 16, 10, 15	/* Select our entry */
	mtspr	SPR_MAS0, %r4
	isync
	tlbre
	li	%r5, 0			/* MAS1[V] = 0 */
	mtspr	SPR_MAS1, %r5
	isync
	tlbwe
	isync
	msync
	blr

/*
 * r29		current entry number
 * r28		returned temp entry
 * r3-r5	scratched
 */
tlb1_temp_mapping_as1:
	/* Read our current translation */
	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
	rlwimi	%r3, %r29, 16, 10, 15	/* Select our current entry */
	mtspr	SPR_MAS0, %r3
	isync
	tlbre

	/*
	 * Prepare and write temp entry
	 *
	 * FIXME this is not robust against overflow i.e. when the current
	 * entry is the last in TLB1
	 */
	lis	%r3, MAS0_TLBSEL1@h	/* Select TLB1 */
	addi	%r28, %r29, 1		/* Use next entry. */
	rlwimi	%r3, %r28, 16, 10, 15	/* Select temp entry */
	mtspr	SPR_MAS0, %r3
	isync
	mfspr	%r5, SPR_MAS1
	li	%r4, 1			/* AS=1 */
	rlwimi	%r5, %r4, 12, 19, 19
	li	%r4, 0			/* Global mapping, TID=0 */
	rlwimi	%r5, %r4, 16, 8, 15
	oris	%r5, %r5, (MAS1_VALID | MAS1_IPROT)@h
	mtspr	SPR_MAS1, %r5
	isync
	mflr	%r3
	li	%r4, 0
	mtspr	SPR_MAS7, %r4
	mtlr	%r3
	isync
	tlbwe
	isync
	msync
	blr

/*
 * Loops over TLB1, invalidates all entries skipping the one which currently
 * maps this code.
 *
 * r29		current entry
 * r3-r5	scratched
 */
tlb1_inval_all_but_current:
	mfspr	%r3, SPR_TLB1CFG	/* Get number of entries */
	andi.	%r3, %r3, TLBCFG_NENTRY_MASK@l
	li	%r4, 0			/* Start from Entry 0 */
1:	lis	%r5, MAS0_TLBSEL1@h
	rlwimi	%r5, %r4, 16, 10, 15
	mtspr	SPR_MAS0, %r5
	isync
	tlbre
	mfspr	%r5, SPR_MAS1
	cmpw	%r4, %r29		/* our current entry? */
	beq	2f
	rlwinm	%r5, %r5, 0, 2, 31	/* clear VALID and IPROT bits */
	mtspr	SPR_MAS1, %r5
	isync
	tlbwe
	isync
	msync
2:	addi	%r4, %r4, 1
	cmpw	%r4, %r3		/* Check if this is the last entry */
	bne	1b
	blr
#endif

#ifdef SMP
.globl __boot_tlb1
	/*
	 * The __boot_tlb1 table is used to hold BSP TLB1 entries
	 * marked with _TLB_ENTRY_SHARED flag during AP bootstrap.
	 * The BSP fills in the table in tlb_ap_prep() function. Next,
	 * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap().
	 */
__boot_tlb1:
	.space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE

__boot_page_padding:
	/*
	 * Boot page needs to be exactly 4K, with the last word of this page
	 * acting as the reset vector, so we need to stuff the remainder.
	 * Upon release from holdoff CPU fetches the last word of the boot
	 * page.
	 */
	.space	4092 - (__boot_page_padding - __boot_page)
	b	__boot_page
	/*
	 * This is the end of the boot page.
	 * During AP startup, the previous instruction is at 0xfffffffc
	 * virtual (i.e. the reset vector.)
	 */
#endif /* SMP */

/************************************************************************/
/* locore subroutines */
/************************************************************************/

/*
 * Cache disable/enable/inval sequences according
 * to section 2.16 of E500CORE RM.
 */
ENTRY(dcache_inval)
	/* Invalidate d-cache */
	mfspr	%r3, SPR_L1CSR0
	ori	%r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l
	msync
	isync
	mtspr	SPR_L1CSR0, %r3
	isync
1:	mfspr	%r3, SPR_L1CSR0
	andi.	%r3, %r3, L1CSR0_DCFI
	bne	1b
	blr
END(dcache_inval)

ENTRY(dcache_disable)
	/* Disable d-cache */
	mfspr	%r3, SPR_L1CSR0
	li	%r4, L1CSR0_DCE@l
	not	%r4, %r4
	and	%r3, %r3, %r4
	msync
	isync
	mtspr	SPR_L1CSR0, %r3
	isync
	blr
END(dcache_disable)

ENTRY(dcache_enable)
	/* Enable d-cache */
	mfspr	%r3, SPR_L1CSR0
	oris	%r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h
	ori	%r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l
	msync
	isync
	mtspr	SPR_L1CSR0, %r3
	isync
	blr
END(dcache_enable)

ENTRY(icache_inval)
	/* Invalidate i-cache */
	mfspr	%r3, SPR_L1CSR1
	ori	%r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l
	isync
	mtspr	SPR_L1CSR1, %r3
	isync
1:	mfspr	%r3, SPR_L1CSR1
	andi.	%r3, %r3, L1CSR1_ICFI
	bne	1b
	blr
END(icache_inval)

ENTRY(icache_disable)
	/* Disable i-cache */
	mfspr	%r3, SPR_L1CSR1
	li	%r4, L1CSR1_ICE@l
	not	%r4, %r4
	and	%r3, %r3, %r4
	isync
	mtspr	SPR_L1CSR1, %r3
	isync
	blr
END(icache_disable)

ENTRY(icache_enable)
	/* Enable i-cache */
	mfspr	%r3, SPR_L1CSR1
	oris	%r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h
	ori	%r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l
	isync
	mtspr	SPR_L1CSR1, %r3
	isync
	blr
END(icache_enable)

/*
 * L2 cache disable/enable/inval sequences for E500mc.
 */

ENTRY(l2cache_inval)
	mfspr	%r3, SPR_L2CSR0
	oris	%r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@h
	ori	%r3, %r3, (L2CSR0_L2FI | L2CSR0_L2LFC)@l
	isync
	mtspr	SPR_L2CSR0, %r3
	isync
1:	mfspr   %r3, SPR_L2CSR0
	andis.	%r3, %r3, L2CSR0_L2FI@h
	bne	1b
	blr
END(l2cache_inval)

ENTRY(l2cache_enable)
	mfspr	%r3, SPR_L2CSR0
	oris	%r3, %r3, (L2CSR0_L2E | L2CSR0_L2PE)@h
	isync
	mtspr	SPR_L2CSR0, %r3
	isync
	blr
END(l2cache_enable)

/*
 * Branch predictor setup.
 */
ENTRY(bpred_enable)
	mfspr	%r3, SPR_BUCSR
	ori	%r3, %r3, BUCSR_BBFI
	isync
	mtspr	SPR_BUCSR, %r3
	isync
	ori	%r3, %r3, BUCSR_BPEN
	isync
	mtspr	SPR_BUCSR, %r3
	isync
	blr
END(bpred_enable)

/*
 * XXX: This should be moved to a shared AIM/booke asm file, if one ever is
 * created.
 */
ENTRY(get_spr)
	/* Note: The spr number is patched at runtime */
	mfspr	%r3, 0
	blr
END(get_spr)

/************************************************************************/
/* Data section								*/
/************************************************************************/
	.data
	.align 3
GLOBAL(__startkernel)
	ADDR(begin)
GLOBAL(__endkernel)
	ADDR(end)
	.align	4
tmpstack:
	.space	TMPSTACKSZ
tmpstackbound:
	.space 10240	/* XXX: this really should not be necessary */
#ifdef __powerpc64__
TOC_ENTRY(tmpstack)
#ifdef SMP
TOC_ENTRY(bp_kernload)
#endif
#endif

/*
 * Compiled KERNBASE locations
 */
	.globl	kernbase
	.set	kernbase, KERNBASE

#include <powerpc/booke/trap_subr.S>