xref: /linux/arch/powerpc/kernel/misc_32.S (revision 9ce7677cfd7cd871adb457c80bea3b581b839641)
1/*
2 * This file contains miscellaneous low-level functions.
3 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
6 * and Paul Mackerras.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 */
14
15#include <linux/config.h>
16#include <linux/sys.h>
17#include <asm/unistd.h>
18#include <asm/errno.h>
19#include <asm/reg.h>
20#include <asm/page.h>
21#include <asm/cache.h>
22#include <asm/cputable.h>
23#include <asm/mmu.h>
24#include <asm/ppc_asm.h>
25#include <asm/thread_info.h>
26#include <asm/asm-offsets.h>
27
28	.text
29
30/*
31 * This returns the high 64 bits of the product of two 64-bit numbers.
32 */
33_GLOBAL(mulhdu)
34	cmpwi	r6,0
35	cmpwi	cr1,r3,0
36	mr	r10,r4
37	mulhwu	r4,r4,r5
38	beq	1f
39	mulhwu	r0,r10,r6
40	mullw	r7,r10,r5
41	addc	r7,r0,r7
42	addze	r4,r4
431:	beqlr	cr1		/* all done if high part of A is 0 */
44	mr	r10,r3
45	mullw	r9,r3,r5
46	mulhwu	r3,r3,r5
47	beq	2f
48	mullw	r0,r10,r6
49	mulhwu	r8,r10,r6
50	addc	r7,r0,r7
51	adde	r4,r4,r8
52	addze	r3,r3
532:	addc	r4,r4,r9
54	addze	r3,r3
55	blr
56
57/*
58 * Returns (address we're running at) - (address we were linked at)
59 * for use before the text and data are mapped to KERNELBASE.
60 */
61_GLOBAL(reloc_offset)
62	mflr	r0
63	bl	1f
641:	mflr	r3
65	LOADADDR(r4,1b)
66	subf	r3,r4,r3
67	mtlr	r0
68	blr
69
70/*
71 * add_reloc_offset(x) returns x + reloc_offset().
72 */
73_GLOBAL(add_reloc_offset)
74	mflr	r0
75	bl	1f
761:	mflr	r5
77	LOADADDR(r4,1b)
78	subf	r5,r4,r5
79	add	r3,r3,r5
80	mtlr	r0
81	blr
82
83/*
84 * sub_reloc_offset(x) returns x - reloc_offset().
85 */
86_GLOBAL(sub_reloc_offset)
87	mflr	r0
88	bl	1f
891:	mflr	r5
90	lis	r4,1b@ha
91	addi	r4,r4,1b@l
92	subf	r5,r4,r5
93	subf	r3,r5,r3
94	mtlr	r0
95	blr
96
97/*
98 * reloc_got2 runs through the .got2 section adding an offset
99 * to each entry.
100 */
101_GLOBAL(reloc_got2)
102	mflr	r11
103	lis	r7,__got2_start@ha
104	addi	r7,r7,__got2_start@l
105	lis	r8,__got2_end@ha
106	addi	r8,r8,__got2_end@l
107	subf	r8,r7,r8
108	srwi.	r8,r8,2
109	beqlr
110	mtctr	r8
111	bl	1f
1121:	mflr	r0
113	lis	r4,1b@ha
114	addi	r4,r4,1b@l
115	subf	r0,r4,r0
116	add	r7,r0,r7
1172:	lwz	r0,0(r7)
118	add	r0,r0,r3
119	stw	r0,0(r7)
120	addi	r7,r7,4
121	bdnz	2b
122	mtlr	r11
123	blr
124
125/*
126 * identify_cpu,
127 * called with r3 = data offset and r4 = CPU number
128 * doesn't change r3
129 */
130_GLOBAL(identify_cpu)
131	addis	r8,r3,cpu_specs@ha
132	addi	r8,r8,cpu_specs@l
133	mfpvr	r7
1341:
135	lwz	r5,CPU_SPEC_PVR_MASK(r8)
136	and	r5,r5,r7
137	lwz	r6,CPU_SPEC_PVR_VALUE(r8)
138	cmplw	0,r6,r5
139	beq	1f
140	addi	r8,r8,CPU_SPEC_ENTRY_SIZE
141	b	1b
1421:
143	addis	r6,r3,cur_cpu_spec@ha
144	addi	r6,r6,cur_cpu_spec@l
145	sub	r8,r8,r3
146	stw	r8,0(r6)
147	blr
148
149/*
150 * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
151 * and writes nop's over sections of code that don't apply for this cpu.
152 * r3 = data offset (not changed)
153 */
154_GLOBAL(do_cpu_ftr_fixups)
155	/* Get CPU 0 features */
156	addis	r6,r3,cur_cpu_spec@ha
157	addi	r6,r6,cur_cpu_spec@l
158	lwz	r4,0(r6)
159	add	r4,r4,r3
160	lwz	r4,CPU_SPEC_FEATURES(r4)
161
162	/* Get the fixup table */
163	addis	r6,r3,__start___ftr_fixup@ha
164	addi	r6,r6,__start___ftr_fixup@l
165	addis	r7,r3,__stop___ftr_fixup@ha
166	addi	r7,r7,__stop___ftr_fixup@l
167
168	/* Do the fixup */
1691:	cmplw	0,r6,r7
170	bgelr
171	addi	r6,r6,16
172	lwz	r8,-16(r6)	/* mask */
173	and	r8,r8,r4
174	lwz	r9,-12(r6)	/* value */
175	cmplw	0,r8,r9
176	beq	1b
177	lwz	r8,-8(r6)	/* section begin */
178	lwz	r9,-4(r6)	/* section end */
179	subf.	r9,r8,r9
180	beq	1b
181	/* write nops over the section of code */
182	/* todo: if large section, add a branch at the start of it */
183	srwi	r9,r9,2
184	mtctr	r9
185	add	r8,r8,r3
186	lis	r0,0x60000000@h	/* nop */
1873:	stw	r0,0(r8)
188	andi.	r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
189	beq	2f
190	dcbst	0,r8		/* suboptimal, but simpler */
191	sync
192	icbi	0,r8
1932:	addi	r8,r8,4
194	bdnz	3b
195	sync			/* additional sync needed on g4 */
196	isync
197	b	1b
198
199/*
200 * call_setup_cpu - call the setup_cpu function for this cpu
201 * r3 = data offset, r24 = cpu number
202 *
203 * Setup function is called with:
204 *   r3 = data offset
205 *   r4 = ptr to CPU spec (relocated)
206 */
207_GLOBAL(call_setup_cpu)
208	addis	r4,r3,cur_cpu_spec@ha
209	addi	r4,r4,cur_cpu_spec@l
210	lwz	r4,0(r4)
211	add	r4,r4,r3
212	lwz	r5,CPU_SPEC_SETUP(r4)
213	cmpi	0,r5,0
214	add	r5,r5,r3
215	beqlr
216	mtctr	r5
217	bctr
218
219#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx)
220
221/* This gets called by via-pmu.c to switch the PLL selection
222 * on 750fx CPU. This function should really be moved to some
223 * other place (as most of the cpufreq code in via-pmu
224 */
225_GLOBAL(low_choose_750fx_pll)
226	/* Clear MSR:EE */
227	mfmsr	r7
228	rlwinm	r0,r7,0,17,15
229	mtmsr	r0
230
231	/* If switching to PLL1, disable HID0:BTIC */
232	cmplwi	cr0,r3,0
233	beq	1f
234	mfspr	r5,SPRN_HID0
235	rlwinm	r5,r5,0,27,25
236	sync
237	mtspr	SPRN_HID0,r5
238	isync
239	sync
240
2411:
242	/* Calc new HID1 value */
243	mfspr	r4,SPRN_HID1	/* Build a HID1:PS bit from parameter */
244	rlwinm	r5,r3,16,15,15	/* Clear out HID1:PS from value read */
245	rlwinm	r4,r4,0,16,14	/* Could have I used rlwimi here ? */
246	or	r4,r4,r5
247	mtspr	SPRN_HID1,r4
248
249	/* Store new HID1 image */
250	rlwinm	r6,r1,0,0,18
251	lwz	r6,TI_CPU(r6)
252	slwi	r6,r6,2
253	addis	r6,r6,nap_save_hid1@ha
254	stw	r4,nap_save_hid1@l(r6)
255
256	/* If switching to PLL0, enable HID0:BTIC */
257	cmplwi	cr0,r3,0
258	bne	1f
259	mfspr	r5,SPRN_HID0
260	ori	r5,r5,HID0_BTIC
261	sync
262	mtspr	SPRN_HID0,r5
263	isync
264	sync
265
2661:
267	/* Return */
268	mtmsr	r7
269	blr
270
271_GLOBAL(low_choose_7447a_dfs)
272	/* Clear MSR:EE */
273	mfmsr	r7
274	rlwinm	r0,r7,0,17,15
275	mtmsr	r0
276
277	/* Calc new HID1 value */
278	mfspr	r4,SPRN_HID1
279	insrwi	r4,r3,1,9	/* insert parameter into bit 9 */
280	sync
281	mtspr	SPRN_HID1,r4
282	sync
283	isync
284
285	/* Return */
286	mtmsr	r7
287	blr
288
289#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */
290
291/*
292 * complement mask on the msr then "or" some values on.
293 *     _nmask_and_or_msr(nmask, value_to_or)
294 */
295_GLOBAL(_nmask_and_or_msr)
296	mfmsr	r0		/* Get current msr */
297	andc	r0,r0,r3	/* And off the bits set in r3 (first parm) */
298	or	r0,r0,r4	/* Or on the bits in r4 (second parm) */
299	SYNC			/* Some chip revs have problems here... */
300	mtmsr	r0		/* Update machine state */
301	isync
302	blr			/* Done */
303
304
305/*
306 * Flush MMU TLB
307 */
308_GLOBAL(_tlbia)
309#if defined(CONFIG_40x)
310	sync			/* Flush to memory before changing mapping */
311	tlbia
312	isync			/* Flush shadow TLB */
313#elif defined(CONFIG_44x)
314	li	r3,0
315	sync
316
317	/* Load high watermark */
318	lis	r4,tlb_44x_hwater@ha
319	lwz	r5,tlb_44x_hwater@l(r4)
320
3211:	tlbwe	r3,r3,PPC44x_TLB_PAGEID
322	addi	r3,r3,1
323	cmpw	0,r3,r5
324	ble	1b
325
326	isync
327#elif defined(CONFIG_FSL_BOOKE)
328	/* Invalidate all entries in TLB0 */
329	li	r3, 0x04
330	tlbivax	0,3
331	/* Invalidate all entries in TLB1 */
332	li	r3, 0x0c
333	tlbivax	0,3
334	/* Invalidate all entries in TLB2 */
335	li	r3, 0x14
336	tlbivax	0,3
337	/* Invalidate all entries in TLB3 */
338	li	r3, 0x1c
339	tlbivax	0,3
340	msync
341#ifdef CONFIG_SMP
342	tlbsync
343#endif /* CONFIG_SMP */
344#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
345#if defined(CONFIG_SMP)
346	rlwinm	r8,r1,0,0,18
347	lwz	r8,TI_CPU(r8)
348	oris	r8,r8,10
349	mfmsr	r10
350	SYNC
351	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
352	rlwinm	r0,r0,0,28,26		/* clear DR */
353	mtmsr	r0
354	SYNC_601
355	isync
356	lis	r9,mmu_hash_lock@h
357	ori	r9,r9,mmu_hash_lock@l
358	tophys(r9,r9)
35910:	lwarx	r7,0,r9
360	cmpwi	0,r7,0
361	bne-	10b
362	stwcx.	r8,0,r9
363	bne-	10b
364	sync
365	tlbia
366	sync
367	TLBSYNC
368	li	r0,0
369	stw	r0,0(r9)		/* clear mmu_hash_lock */
370	mtmsr	r10
371	SYNC_601
372	isync
373#else /* CONFIG_SMP */
374	sync
375	tlbia
376	sync
377#endif /* CONFIG_SMP */
378#endif /* ! defined(CONFIG_40x) */
379	blr
380
381/*
382 * Flush MMU TLB for a particular address
383 */
384_GLOBAL(_tlbie)
385#if defined(CONFIG_40x)
386	tlbsx.	r3, 0, r3
387	bne	10f
388	sync
389	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear.
390	 * Since 25 is the V bit in the TLB_TAG, loading this value will invalidate
391	 * the TLB entry. */
392	tlbwe	r3, r3, TLB_TAG
393	isync
39410:
395#elif defined(CONFIG_44x)
396	mfspr	r4,SPRN_MMUCR
397	mfspr	r5,SPRN_PID			/* Get PID */
398	rlwimi	r4,r5,0,24,31			/* Set TID */
399	mtspr	SPRN_MMUCR,r4
400
401	tlbsx.	r3, 0, r3
402	bne	10f
403	sync
404	/* There are only 64 TLB entries, so r3 < 64,
405	 * which means bit 22, is clear.  Since 22 is
406	 * the V bit in the TLB_PAGEID, loading this
407	 * value will invalidate the TLB entry.
408	 */
409	tlbwe	r3, r3, PPC44x_TLB_PAGEID
410	isync
41110:
412#elif defined(CONFIG_FSL_BOOKE)
413	rlwinm	r4, r3, 0, 0, 19
414	ori	r5, r4, 0x08	/* TLBSEL = 1 */
415	ori	r6, r4, 0x10	/* TLBSEL = 2 */
416	ori	r7, r4, 0x18	/* TLBSEL = 3 */
417	tlbivax	0, r4
418	tlbivax	0, r5
419	tlbivax	0, r6
420	tlbivax	0, r7
421	msync
422#if defined(CONFIG_SMP)
423	tlbsync
424#endif /* CONFIG_SMP */
425#else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
426#if defined(CONFIG_SMP)
427	rlwinm	r8,r1,0,0,18
428	lwz	r8,TI_CPU(r8)
429	oris	r8,r8,11
430	mfmsr	r10
431	SYNC
432	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
433	rlwinm	r0,r0,0,28,26		/* clear DR */
434	mtmsr	r0
435	SYNC_601
436	isync
437	lis	r9,mmu_hash_lock@h
438	ori	r9,r9,mmu_hash_lock@l
439	tophys(r9,r9)
44010:	lwarx	r7,0,r9
441	cmpwi	0,r7,0
442	bne-	10b
443	stwcx.	r8,0,r9
444	bne-	10b
445	eieio
446	tlbie	r3
447	sync
448	TLBSYNC
449	li	r0,0
450	stw	r0,0(r9)		/* clear mmu_hash_lock */
451	mtmsr	r10
452	SYNC_601
453	isync
454#else /* CONFIG_SMP */
455	tlbie	r3
456	sync
457#endif /* CONFIG_SMP */
458#endif /* ! CONFIG_40x */
459	blr
460
461/*
462 * Flush instruction cache.
463 * This is a no-op on the 601.
464 */
465_GLOBAL(flush_instruction_cache)
466#if defined(CONFIG_8xx)
467	isync
468	lis	r5, IDC_INVALL@h
469	mtspr	SPRN_IC_CST, r5
470#elif defined(CONFIG_4xx)
471#ifdef CONFIG_403GCX
472	li      r3, 512
473	mtctr   r3
474	lis     r4, KERNELBASE@h
4751:	iccci   0, r4
476	addi    r4, r4, 16
477	bdnz    1b
478#else
479	lis	r3, KERNELBASE@h
480	iccci	0,r3
481#endif
482#elif CONFIG_FSL_BOOKE
483BEGIN_FTR_SECTION
484	mfspr   r3,SPRN_L1CSR0
485	ori     r3,r3,L1CSR0_CFI|L1CSR0_CLFC
486	/* msync; isync recommended here */
487	mtspr   SPRN_L1CSR0,r3
488	isync
489	blr
490END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
491	mfspr	r3,SPRN_L1CSR1
492	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
493	mtspr	SPRN_L1CSR1,r3
494#else
495	mfspr	r3,SPRN_PVR
496	rlwinm	r3,r3,16,16,31
497	cmpwi	0,r3,1
498	beqlr			/* for 601, do nothing */
499	/* 603/604 processor - use invalidate-all bit in HID0 */
500	mfspr	r3,SPRN_HID0
501	ori	r3,r3,HID0_ICFI
502	mtspr	SPRN_HID0,r3
503#endif /* CONFIG_8xx/4xx */
504	isync
505	blr
506
507/*
508 * Write any modified data cache blocks out to memory
509 * and invalidate the corresponding instruction cache blocks.
510 * This is a no-op on the 601.
511 *
512 * flush_icache_range(unsigned long start, unsigned long stop)
513 */
514_GLOBAL(__flush_icache_range)
515BEGIN_FTR_SECTION
516	blr				/* for 601, do nothing */
517END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
518	li	r5,L1_CACHE_BYTES-1
519	andc	r3,r3,r5
520	subf	r4,r3,r4
521	add	r4,r4,r5
522	srwi.	r4,r4,L1_CACHE_SHIFT
523	beqlr
524	mtctr	r4
525	mr	r6,r3
5261:	dcbst	0,r3
527	addi	r3,r3,L1_CACHE_BYTES
528	bdnz	1b
529	sync				/* wait for dcbst's to get to ram */
530	mtctr	r4
5312:	icbi	0,r6
532	addi	r6,r6,L1_CACHE_BYTES
533	bdnz	2b
534	sync				/* additional sync needed on g4 */
535	isync
536	blr
537/*
538 * Write any modified data cache blocks out to memory.
539 * Does not invalidate the corresponding cache lines (especially for
540 * any corresponding instruction cache).
541 *
542 * clean_dcache_range(unsigned long start, unsigned long stop)
543 */
544_GLOBAL(clean_dcache_range)
545	li	r5,L1_CACHE_BYTES-1
546	andc	r3,r3,r5
547	subf	r4,r3,r4
548	add	r4,r4,r5
549	srwi.	r4,r4,L1_CACHE_SHIFT
550	beqlr
551	mtctr	r4
552
5531:	dcbst	0,r3
554	addi	r3,r3,L1_CACHE_BYTES
555	bdnz	1b
556	sync				/* wait for dcbst's to get to ram */
557	blr
558
559/*
560 * Write any modified data cache blocks out to memory and invalidate them.
561 * Does not invalidate the corresponding instruction cache blocks.
562 *
563 * flush_dcache_range(unsigned long start, unsigned long stop)
564 */
565_GLOBAL(flush_dcache_range)
566	li	r5,L1_CACHE_BYTES-1
567	andc	r3,r3,r5
568	subf	r4,r3,r4
569	add	r4,r4,r5
570	srwi.	r4,r4,L1_CACHE_SHIFT
571	beqlr
572	mtctr	r4
573
5741:	dcbf	0,r3
575	addi	r3,r3,L1_CACHE_BYTES
576	bdnz	1b
577	sync				/* wait for dcbst's to get to ram */
578	blr
579
580/*
581 * Like above, but invalidate the D-cache.  This is used by the 8xx
582 * to invalidate the cache so the PPC core doesn't get stale data
583 * from the CPM (no cache snooping here :-).
584 *
585 * invalidate_dcache_range(unsigned long start, unsigned long stop)
586 */
587_GLOBAL(invalidate_dcache_range)
588	li	r5,L1_CACHE_BYTES-1
589	andc	r3,r3,r5
590	subf	r4,r3,r4
591	add	r4,r4,r5
592	srwi.	r4,r4,L1_CACHE_SHIFT
593	beqlr
594	mtctr	r4
595
5961:	dcbi	0,r3
597	addi	r3,r3,L1_CACHE_BYTES
598	bdnz	1b
599	sync				/* wait for dcbi's to get to ram */
600	blr
601
602/*
603 * Flush a particular page from the data cache to RAM.
604 * Note: this is necessary because the instruction cache does *not*
605 * snoop from the data cache.
606 * This is a no-op on the 601 which has a unified cache.
607 *
608 *	void __flush_dcache_icache(void *page)
609 */
610_GLOBAL(__flush_dcache_icache)
611BEGIN_FTR_SECTION
612	blr					/* for 601, do nothing */
613END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
614	rlwinm	r3,r3,0,0,19			/* Get page base address */
615	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
616	mtctr	r4
617	mr	r6,r3
6180:	dcbst	0,r3				/* Write line to ram */
619	addi	r3,r3,L1_CACHE_BYTES
620	bdnz	0b
621	sync
622	mtctr	r4
6231:	icbi	0,r6
624	addi	r6,r6,L1_CACHE_BYTES
625	bdnz	1b
626	sync
627	isync
628	blr
629
630/*
631 * Flush a particular page from the data cache to RAM, identified
632 * by its physical address.  We turn off the MMU so we can just use
633 * the physical address (this may be a highmem page without a kernel
634 * mapping).
635 *
636 *	void __flush_dcache_icache_phys(unsigned long physaddr)
637 */
638_GLOBAL(__flush_dcache_icache_phys)
639BEGIN_FTR_SECTION
640	blr					/* for 601, do nothing */
641END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
642	mfmsr	r10
643	rlwinm	r0,r10,0,28,26			/* clear DR */
644	mtmsr	r0
645	isync
646	rlwinm	r3,r3,0,0,19			/* Get page base address */
647	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
648	mtctr	r4
649	mr	r6,r3
6500:	dcbst	0,r3				/* Write line to ram */
651	addi	r3,r3,L1_CACHE_BYTES
652	bdnz	0b
653	sync
654	mtctr	r4
6551:	icbi	0,r6
656	addi	r6,r6,L1_CACHE_BYTES
657	bdnz	1b
658	sync
659	mtmsr	r10				/* restore DR */
660	isync
661	blr
662
663/*
664 * Clear pages using the dcbz instruction, which doesn't cause any
665 * memory traffic (except to write out any cache lines which get
666 * displaced).  This only works on cacheable memory.
667 *
668 * void clear_pages(void *page, int order) ;
669 */
670_GLOBAL(clear_pages)
671	li	r0,4096/L1_CACHE_BYTES
672	slw	r0,r0,r4
673	mtctr	r0
674#ifdef CONFIG_8xx
675	li	r4, 0
6761:	stw	r4, 0(r3)
677	stw	r4, 4(r3)
678	stw	r4, 8(r3)
679	stw	r4, 12(r3)
680#else
6811:	dcbz	0,r3
682#endif
683	addi	r3,r3,L1_CACHE_BYTES
684	bdnz	1b
685	blr
686
687/*
688 * Copy a whole page.  We use the dcbz instruction on the destination
689 * to reduce memory traffic (it eliminates the unnecessary reads of
690 * the destination into cache).  This requires that the destination
691 * is cacheable.
692 */
693#define COPY_16_BYTES		\
694	lwz	r6,4(r4);	\
695	lwz	r7,8(r4);	\
696	lwz	r8,12(r4);	\
697	lwzu	r9,16(r4);	\
698	stw	r6,4(r3);	\
699	stw	r7,8(r3);	\
700	stw	r8,12(r3);	\
701	stwu	r9,16(r3)
702
703_GLOBAL(copy_page)
704	addi	r3,r3,-4
705	addi	r4,r4,-4
706
707#ifdef CONFIG_8xx
708	/* don't use prefetch on 8xx */
709    	li	r0,4096/L1_CACHE_BYTES
710	mtctr	r0
7111:	COPY_16_BYTES
712	bdnz	1b
713	blr
714
715#else	/* not 8xx, we can prefetch */
716	li	r5,4
717
718#if MAX_COPY_PREFETCH > 1
719	li	r0,MAX_COPY_PREFETCH
720	li	r11,4
721	mtctr	r0
72211:	dcbt	r11,r4
723	addi	r11,r11,L1_CACHE_BYTES
724	bdnz	11b
725#else /* MAX_COPY_PREFETCH == 1 */
726	dcbt	r5,r4
727	li	r11,L1_CACHE_BYTES+4
728#endif /* MAX_COPY_PREFETCH */
729	li	r0,4096/L1_CACHE_BYTES - MAX_COPY_PREFETCH
730	crclr	4*cr0+eq
7312:
732	mtctr	r0
7331:
734	dcbt	r11,r4
735	dcbz	r5,r3
736	COPY_16_BYTES
737#if L1_CACHE_BYTES >= 32
738	COPY_16_BYTES
739#if L1_CACHE_BYTES >= 64
740	COPY_16_BYTES
741	COPY_16_BYTES
742#if L1_CACHE_BYTES >= 128
743	COPY_16_BYTES
744	COPY_16_BYTES
745	COPY_16_BYTES
746	COPY_16_BYTES
747#endif
748#endif
749#endif
750	bdnz	1b
751	beqlr
752	crnot	4*cr0+eq,4*cr0+eq
753	li	r0,MAX_COPY_PREFETCH
754	li	r11,4
755	b	2b
756#endif	/* CONFIG_8xx */
757
758/*
759 * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
760 * void atomic_set_mask(atomic_t mask, atomic_t *addr);
761 */
762_GLOBAL(atomic_clear_mask)
76310:	lwarx	r5,0,r4
764	andc	r5,r5,r3
765	PPC405_ERR77(0,r4)
766	stwcx.	r5,0,r4
767	bne-	10b
768	blr
769_GLOBAL(atomic_set_mask)
77010:	lwarx	r5,0,r4
771	or	r5,r5,r3
772	PPC405_ERR77(0,r4)
773	stwcx.	r5,0,r4
774	bne-	10b
775	blr
776
777/*
778 * I/O string operations
779 *
780 * insb(port, buf, len)
781 * outsb(port, buf, len)
782 * insw(port, buf, len)
783 * outsw(port, buf, len)
784 * insl(port, buf, len)
785 * outsl(port, buf, len)
786 * insw_ns(port, buf, len)
787 * outsw_ns(port, buf, len)
788 * insl_ns(port, buf, len)
789 * outsl_ns(port, buf, len)
790 *
791 * The *_ns versions don't do byte-swapping.
792 */
793_GLOBAL(_insb)
794	cmpwi	0,r5,0
795	mtctr	r5
796	subi	r4,r4,1
797	blelr-
79800:	lbz	r5,0(r3)
799	eieio
800	stbu	r5,1(r4)
801	bdnz	00b
802	blr
803
804_GLOBAL(_outsb)
805	cmpwi	0,r5,0
806	mtctr	r5
807	subi	r4,r4,1
808	blelr-
80900:	lbzu	r5,1(r4)
810	stb	r5,0(r3)
811	eieio
812	bdnz	00b
813	blr
814
815_GLOBAL(_insw)
816	cmpwi	0,r5,0
817	mtctr	r5
818	subi	r4,r4,2
819	blelr-
82000:	lhbrx	r5,0,r3
821	eieio
822	sthu	r5,2(r4)
823	bdnz	00b
824	blr
825
826_GLOBAL(_outsw)
827	cmpwi	0,r5,0
828	mtctr	r5
829	subi	r4,r4,2
830	blelr-
83100:	lhzu	r5,2(r4)
832	eieio
833	sthbrx	r5,0,r3
834	bdnz	00b
835	blr
836
837_GLOBAL(_insl)
838	cmpwi	0,r5,0
839	mtctr	r5
840	subi	r4,r4,4
841	blelr-
84200:	lwbrx	r5,0,r3
843	eieio
844	stwu	r5,4(r4)
845	bdnz	00b
846	blr
847
848_GLOBAL(_outsl)
849	cmpwi	0,r5,0
850	mtctr	r5
851	subi	r4,r4,4
852	blelr-
85300:	lwzu	r5,4(r4)
854	stwbrx	r5,0,r3
855	eieio
856	bdnz	00b
857	blr
858
859_GLOBAL(__ide_mm_insw)
860_GLOBAL(_insw_ns)
861	cmpwi	0,r5,0
862	mtctr	r5
863	subi	r4,r4,2
864	blelr-
86500:	lhz	r5,0(r3)
866	eieio
867	sthu	r5,2(r4)
868	bdnz	00b
869	blr
870
871_GLOBAL(__ide_mm_outsw)
872_GLOBAL(_outsw_ns)
873	cmpwi	0,r5,0
874	mtctr	r5
875	subi	r4,r4,2
876	blelr-
87700:	lhzu	r5,2(r4)
878	sth	r5,0(r3)
879	eieio
880	bdnz	00b
881	blr
882
883_GLOBAL(__ide_mm_insl)
884_GLOBAL(_insl_ns)
885	cmpwi	0,r5,0
886	mtctr	r5
887	subi	r4,r4,4
888	blelr-
88900:	lwz	r5,0(r3)
890	eieio
891	stwu	r5,4(r4)
892	bdnz	00b
893	blr
894
895_GLOBAL(__ide_mm_outsl)
896_GLOBAL(_outsl_ns)
897	cmpwi	0,r5,0
898	mtctr	r5
899	subi	r4,r4,4
900	blelr-
90100:	lwzu	r5,4(r4)
902	stw	r5,0(r3)
903	eieio
904	bdnz	00b
905	blr
906
907/*
908 * Extended precision shifts.
909 *
910 * Updated to be valid for shift counts from 0 to 63 inclusive.
911 * -- Gabriel
912 *
913 * R3/R4 has 64 bit value
914 * R5    has shift count
915 * result in R3/R4
916 *
917 *  ashrdi3: arithmetic right shift (sign propagation)
918 *  lshrdi3: logical right shift
919 *  ashldi3: left shift
920 */
921_GLOBAL(__ashrdi3)
922	subfic	r6,r5,32
923	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
924	addi	r7,r5,32	# could be xori, or addi with -32
925	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
926	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
927	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
928	or	r4,r4,r6	# LSW |= t1
929	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
930	sraw	r3,r3,r5	# MSW = MSW >> count
931	or	r4,r4,r7	# LSW |= t2
932	blr
933
934_GLOBAL(__ashldi3)
935	subfic	r6,r5,32
936	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
937	addi	r7,r5,32	# could be xori, or addi with -32
938	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
939	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
940	or	r3,r3,r6	# MSW |= t1
941	slw	r4,r4,r5	# LSW = LSW << count
942	or	r3,r3,r7	# MSW |= t2
943	blr
944
945_GLOBAL(__lshrdi3)
946	subfic	r6,r5,32
947	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
948	addi	r7,r5,32	# could be xori, or addi with -32
949	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
950	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
951	or	r4,r4,r6	# LSW |= t1
952	srw	r3,r3,r5	# MSW = MSW >> count
953	or	r4,r4,r7	# LSW |= t2
954	blr
955
956_GLOBAL(abs)
957	srawi	r4,r3,31
958	xor	r3,r3,r4
959	sub	r3,r3,r4
960	blr
961
962_GLOBAL(_get_SP)
963	mr	r3,r1		/* Close enough */
964	blr
965
966/*
967 * Create a kernel thread
968 *   kernel_thread(fn, arg, flags)
969 */
970_GLOBAL(kernel_thread)
971	stwu	r1,-16(r1)
972	stw	r30,8(r1)
973	stw	r31,12(r1)
974	mr	r30,r3		/* function */
975	mr	r31,r4		/* argument */
976	ori	r3,r5,CLONE_VM	/* flags */
977	oris	r3,r3,CLONE_UNTRACED>>16
978	li	r4,0		/* new sp (unused) */
979	li	r0,__NR_clone
980	sc
981	cmpwi	0,r3,0		/* parent or child? */
982	bne	1f		/* return if parent */
983	li	r0,0		/* make top-level stack frame */
984	stwu	r0,-16(r1)
985	mtlr	r30		/* fn addr in lr */
986	mr	r3,r31		/* load arg and call fn */
987	PPC440EP_ERR42
988	blrl
989	li	r0,__NR_exit	/* exit if function returns */
990	li	r3,0
991	sc
9921:	lwz	r30,8(r1)
993	lwz	r31,12(r1)
994	addi	r1,r1,16
995	blr
996
997_GLOBAL(execve)
998	li	r0,__NR_execve
999	sc
1000	bnslr
1001	neg	r3,r3
1002	blr
1003
1004/*
1005 * This routine is just here to keep GCC happy - sigh...
1006 */
1007_GLOBAL(__main)
1008	blr
1009