xref: /titanic_41/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 05b96de272314c66a0e27fe7df41c9a9e1038c5a)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#else	/* __lint */
63#include "assym.h"
64#endif	/* __lint */
65#include <sys/dditypes.h>
66
67/*
68 * on_fault()
69 * Catch lofault faults. Like setjmp except it returns one
70 * if code following causes uncorrectable fault. Turned off
71 * by calling no_fault().
72 */
73
74#if defined(__lint)
75
76/* ARGSUSED */
77int
78on_fault(label_t *ljb)
79{ return (0); }
80
81void
82no_fault(void)
83{}
84
85#else	/* __lint */
86
87#if defined(__amd64)
88
89	ENTRY(on_fault)
90	movq	%gs:CPU_THREAD, %rsi
91	leaq	catch_fault(%rip), %rdx
92	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
93	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
94	jmp	setjmp				/* let setjmp do the rest */
95
96catch_fault:
97	movq	%gs:CPU_THREAD, %rsi
98	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
99	xorl	%eax, %eax
100	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
101	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
102	jmp	longjmp				/* let longjmp do the rest */
103	SET_SIZE(on_fault)
104
105	ENTRY(no_fault)
106	movq	%gs:CPU_THREAD, %rsi
107	xorl	%eax, %eax
108	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
109	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
110	ret
111	SET_SIZE(no_fault)
112
113#elif defined(__i386)
114
115	ENTRY(on_fault)
116	movl	%gs:CPU_THREAD, %edx
117	movl	4(%esp), %eax			/* jumpbuf address */
118	leal	catch_fault, %ecx
119	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
120	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
121	jmp	setjmp				/* let setjmp do the rest */
122
123catch_fault:
124	movl	%gs:CPU_THREAD, %edx
125	xorl	%eax, %eax
126	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
127	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
128	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
129	pushl	%ecx
130	call	longjmp				/* let longjmp do the rest */
131	SET_SIZE(on_fault)
132
133	ENTRY(no_fault)
134	movl	%gs:CPU_THREAD, %edx
135	xorl	%eax, %eax
136	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
137	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
138	ret
139	SET_SIZE(no_fault)
140
141#endif	/* __i386 */
142#endif	/* __lint */
143
144/*
145 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
146 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
147 */
148
149#if defined(lint)
150
151void
152on_trap_trampoline(void)
153{}
154
155#else	/* __lint */
156
157#if defined(__amd64)
158
159	ENTRY(on_trap_trampoline)
160	movq	%gs:CPU_THREAD, %rsi
161	movq	T_ONTRAP(%rsi), %rdi
162	addq	$OT_JMPBUF, %rdi
163	jmp	longjmp
164	SET_SIZE(on_trap_trampoline)
165
166#elif defined(__i386)
167
168	ENTRY(on_trap_trampoline)
169	movl	%gs:CPU_THREAD, %eax
170	movl	T_ONTRAP(%eax), %eax
171	addl	$OT_JMPBUF, %eax
172	pushl	%eax
173	call	longjmp
174	SET_SIZE(on_trap_trampoline)
175
176#endif	/* __i386 */
177#endif	/* __lint */
178
179/*
180 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
181 * more information about the on_trap() mechanism.  If the on_trap_data is the
182 * same as the topmost stack element, we just modify that element.
183 */
184#if defined(lint)
185
186/*ARGSUSED*/
187int
188on_trap(on_trap_data_t *otp, uint_t prot)
189{ return (0); }
190
191#else	/* __lint */
192
193#if defined(__amd64)
194
195	ENTRY(on_trap)
196	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
197	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
198	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
199	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
200	xorl	%ecx, %ecx
201	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
202	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
203	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
204	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
205	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
206	je	0f				/*	don't modify t_ontrap */
207
208	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
209	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
210
2110:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
212	jmp	setjmp
213	SET_SIZE(on_trap)
214
215#elif defined(__i386)
216
217	ENTRY(on_trap)
218	movl	4(%esp), %eax			/* %eax = otp */
219	movl	8(%esp), %edx			/* %edx = prot */
220
221	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
222	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
223	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
224	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
225	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
226	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
227	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
228	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
229	cmpl	%eax, %ecx			/* if (otp == %ecx) */
230	je	0f				/*    don't modify t_ontrap */
231
232	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
233	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
234
2350:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
236	movl	%eax, 4(%esp)			/* put %eax back on the stack */
237	jmp	setjmp				/* let setjmp do the rest */
238	SET_SIZE(on_trap)
239
240#endif	/* __i386 */
241#endif	/* __lint */
242
243/*
244 * Setjmp and longjmp implement non-local gotos using state vectors
245 * type label_t.
246 */
247
248#if defined(__lint)
249
250/* ARGSUSED */
251int
252setjmp(label_t *lp)
253{ return (0); }
254
255/* ARGSUSED */
256void
257longjmp(label_t *lp)
258{}
259
260#else	/* __lint */
261
262#if LABEL_PC != 0
263#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
264#endif	/* LABEL_PC != 0 */
265
266#if defined(__amd64)
267
268	ENTRY(setjmp)
269	movq	%rsp, LABEL_SP(%rdi)
270	movq	%rbp, LABEL_RBP(%rdi)
271	movq	%rbx, LABEL_RBX(%rdi)
272	movq	%r12, LABEL_R12(%rdi)
273	movq	%r13, LABEL_R13(%rdi)
274	movq	%r14, LABEL_R14(%rdi)
275	movq	%r15, LABEL_R15(%rdi)
276	movq	(%rsp), %rdx		/* return address */
277	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
278	xorl	%eax, %eax		/* return 0 */
279	ret
280	SET_SIZE(setjmp)
281
282	ENTRY(longjmp)
283	movq	LABEL_SP(%rdi), %rsp
284	movq	LABEL_RBP(%rdi), %rbp
285	movq	LABEL_RBX(%rdi), %rbx
286	movq	LABEL_R12(%rdi), %r12
287	movq	LABEL_R13(%rdi), %r13
288	movq	LABEL_R14(%rdi), %r14
289	movq	LABEL_R15(%rdi), %r15
290	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
291	movq	%rdx, (%rsp)
292	xorl	%eax, %eax
293	incl	%eax			/* return 1 */
294	ret
295	SET_SIZE(longjmp)
296
297#elif defined(__i386)
298
299	ENTRY(setjmp)
300	movl	4(%esp), %edx		/* address of save area */
301	movl	%ebp, LABEL_EBP(%edx)
302	movl	%ebx, LABEL_EBX(%edx)
303	movl	%esi, LABEL_ESI(%edx)
304	movl	%edi, LABEL_EDI(%edx)
305	movl	%esp, 4(%edx)
306	movl	(%esp), %ecx		/* %eip (return address) */
307	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
308	subl	%eax, %eax		/* return 0 */
309	ret
310	SET_SIZE(setjmp)
311
312	ENTRY(longjmp)
313	movl	4(%esp), %edx		/* address of save area */
314	movl	LABEL_EBP(%edx), %ebp
315	movl	LABEL_EBX(%edx), %ebx
316	movl	LABEL_ESI(%edx), %esi
317	movl	LABEL_EDI(%edx), %edi
318	movl	4(%edx), %esp
319	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
320	movl	$1, %eax
321	addl	$4, %esp		/* pop ret adr */
322	jmp	*%ecx			/* indirect */
323	SET_SIZE(longjmp)
324
325#endif	/* __i386 */
326#endif	/* __lint */
327
328/*
329 * if a() calls b() calls caller(),
330 * caller() returns return address in a().
331 * (Note: We assume a() and b() are C routines which do the normal entry/exit
332 *  sequence.)
333 */
334
335#if defined(__lint)
336
337caddr_t
338caller(void)
339{ return (0); }
340
341#else	/* __lint */
342
343#if defined(__amd64)
344
345	ENTRY(caller)
346	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
347	ret
348	SET_SIZE(caller)
349
350#elif defined(__i386)
351
352	ENTRY(caller)
353	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
354	ret
355	SET_SIZE(caller)
356
357#endif	/* __i386 */
358#endif	/* __lint */
359
360/*
361 * if a() calls callee(), callee() returns the
362 * return address in a();
363 */
364
365#if defined(__lint)
366
367caddr_t
368callee(void)
369{ return (0); }
370
371#else	/* __lint */
372
373#if defined(__amd64)
374
375	ENTRY(callee)
376	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
377	ret
378	SET_SIZE(callee)
379
380#elif defined(__i386)
381
382	ENTRY(callee)
383	movl	(%esp), %eax		/* callee()'s return pc, in a() */
384	ret
385	SET_SIZE(callee)
386
387#endif	/* __i386 */
388#endif	/* __lint */
389
390/*
391 * return the current frame pointer
392 */
393
394#if defined(__lint)
395
396greg_t
397getfp(void)
398{ return (0); }
399
400#else	/* __lint */
401
402#if defined(__amd64)
403
404	ENTRY(getfp)
405	movq	%rbp, %rax
406	ret
407	SET_SIZE(getfp)
408
409#elif defined(__i386)
410
411	ENTRY(getfp)
412	movl	%ebp, %eax
413	ret
414	SET_SIZE(getfp)
415
416#endif	/* __i386 */
417#endif	/* __lint */
418
419/*
420 * Invalidate a single page table entry in the TLB
421 */
422
423#if defined(__lint)
424
425/* ARGSUSED */
426void
427mmu_tlbflush_entry(caddr_t m)
428{}
429
430#else	/* __lint */
431
432#if defined(__amd64)
433
434	ENTRY(mmu_tlbflush_entry)
435	invlpg	(%rdi)
436	ret
437	SET_SIZE(mmu_tlbflush_entry)
438
439#elif defined(__i386)
440
441	ENTRY(mmu_tlbflush_entry)
442	movl	4(%esp), %eax
443	invlpg	(%eax)
444	ret
445	SET_SIZE(mmu_tlbflush_entry)
446
447#endif	/* __i386 */
448#endif	/* __lint */
449
450
451/*
452 * Get/Set the value of various control registers
453 */
454
455#if defined(__lint)
456
457ulong_t
458getcr0(void)
459{ return (0); }
460
461/* ARGSUSED */
462void
463setcr0(ulong_t value)
464{}
465
466ulong_t
467getcr2(void)
468{ return (0); }
469
470ulong_t
471getcr3(void)
472{ return (0); }
473
474/* ARGSUSED */
475void
476setcr3(ulong_t val)
477{}
478
479void
480reload_cr3(void)
481{}
482
483ulong_t
484getcr4(void)
485{ return (0); }
486
487/* ARGSUSED */
488void
489setcr4(ulong_t val)
490{}
491
492#if defined(__amd64)
493
494ulong_t
495getcr8(void)
496{ return (0); }
497
498/* ARGSUSED */
499void
500setcr8(ulong_t val)
501{}
502
503#endif	/* __amd64 */
504
505#else	/* __lint */
506
507#if defined(__amd64)
508
509	ENTRY(getcr0)
510	movq	%cr0, %rax
511	ret
512	SET_SIZE(getcr0)
513
514	ENTRY(setcr0)
515	movq	%rdi, %cr0
516	ret
517	SET_SIZE(setcr0)
518
519	ENTRY(getcr2)
520	movq	%cr2, %rax
521	ret
522	SET_SIZE(getcr2)
523
524	ENTRY(getcr3)
525	movq	%cr3, %rax
526	ret
527	SET_SIZE(getcr3)
528
529	ENTRY(setcr3)
530	movq	%rdi, %cr3
531	ret
532	SET_SIZE(setcr3)
533
534	ENTRY(reload_cr3)
535	movq	%cr3, %rdi
536	movq	%rdi, %cr3
537	ret
538	SET_SIZE(reload_cr3)
539
540	ENTRY(getcr4)
541	movq	%cr4, %rax
542	ret
543	SET_SIZE(getcr4)
544
545	ENTRY(setcr4)
546	movq	%rdi, %cr4
547	ret
548	SET_SIZE(setcr4)
549
550	ENTRY(getcr8)
551	movq	%cr8, %rax
552	ret
553	SET_SIZE(getcr8)
554
555	ENTRY(setcr8)
556	movq	%rdi, %cr8
557	ret
558	SET_SIZE(setcr8)
559
560#elif defined(__i386)
561
562        ENTRY(getcr0)
563        movl    %cr0, %eax
564        ret
565	SET_SIZE(getcr0)
566
567        ENTRY(setcr0)
568        movl    4(%esp), %eax
569        movl    %eax, %cr0
570        ret
571	SET_SIZE(setcr0)
572
573        ENTRY(getcr2)
574        movl    %cr2, %eax
575        ret
576	SET_SIZE(getcr2)
577
578	ENTRY(getcr3)
579	movl    %cr3, %eax
580	ret
581	SET_SIZE(getcr3)
582
583        ENTRY(setcr3)
584        movl    4(%esp), %eax
585        movl    %eax, %cr3
586        ret
587	SET_SIZE(setcr3)
588
589	ENTRY(reload_cr3)
590	movl    %cr3, %eax
591	movl    %eax, %cr3
592	ret
593	SET_SIZE(reload_cr3)
594
595	ENTRY(getcr4)
596	movl    %cr4, %eax
597	ret
598	SET_SIZE(getcr4)
599
600        ENTRY(setcr4)
601        movl    4(%esp), %eax
602        movl    %eax, %cr4
603        ret
604	SET_SIZE(setcr4)
605
606#endif	/* __i386 */
607#endif	/* __lint */
608
609#if defined(__lint)
610
611/*ARGSUSED*/
612uint32_t
613__cpuid_insn(struct cpuid_regs *regs)
614{ return (0); }
615
616#else	/* __lint */
617
618#if defined(__amd64)
619
620	ENTRY(__cpuid_insn)
621	movq	%rbx, %r8
622	movq	%rcx, %r9
623	movq	%rdx, %r11
624	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
625	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
626	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
627	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
628	cpuid
629	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
630	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
631	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
632	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
633	movq	%r8, %rbx
634	movq	%r9, %rcx
635	movq	%r11, %rdx
636	ret
637	SET_SIZE(__cpuid_insn)
638
639#elif defined(__i386)
640
641        ENTRY(__cpuid_insn)
642	pushl	%ebp
643	movl	0x8(%esp), %ebp		/* %ebp = regs */
644	pushl	%ebx
645	pushl	%ecx
646	pushl	%edx
647	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
648	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
649	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
650	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
651	cpuid
652	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
653	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
654	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
655	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
656	popl	%edx
657	popl	%ecx
658	popl	%ebx
659	popl	%ebp
660	ret
661	SET_SIZE(__cpuid_insn)
662
663#endif	/* __i386 */
664#endif	/* __lint */
665
666/*
667 * Insert entryp after predp in a doubly linked list.
668 */
669
670#if defined(__lint)
671
672/*ARGSUSED*/
673void
674_insque(caddr_t entryp, caddr_t predp)
675{}
676
677#else	/* __lint */
678
679#if defined(__amd64)
680
681	ENTRY(_insque)
682	movq	(%rsi), %rax		/* predp->forw 			*/
683	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
684	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
685	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
686	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
687	ret
688	SET_SIZE(_insque)
689
690#elif defined(__i386)
691
692	ENTRY(_insque)
693	movl	8(%esp), %edx
694	movl	4(%esp), %ecx
695	movl	(%edx), %eax		/* predp->forw			*/
696	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
697	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
698	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
699	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
700	ret
701	SET_SIZE(_insque)
702
703#endif	/* __i386 */
704#endif	/* __lint */
705
706/*
707 * Remove entryp from a doubly linked list
708 */
709
710#if defined(__lint)
711
712/*ARGSUSED*/
713void
714_remque(caddr_t entryp)
715{}
716
717#else	/* __lint */
718
719#if defined(__amd64)
720
721	ENTRY(_remque)
722	movq	(%rdi), %rax		/* entry->forw */
723	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
724	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
725	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
726	ret
727	SET_SIZE(_remque)
728
729#elif defined(__i386)
730
731	ENTRY(_remque)
732	movl	4(%esp), %ecx
733	movl	(%ecx), %eax		/* entry->forw */
734	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
735	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
736	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
737	ret
738	SET_SIZE(_remque)
739
740#endif	/* __i386 */
741#endif	/* __lint */
742
743/*
744 * Returns the number of
745 * non-NULL bytes in string argument.
746 */
747
748#if defined(__lint)
749
750/* ARGSUSED */
751size_t
752strlen(const char *str)
753{ return (0); }
754
755#else	/* __lint */
756
757#if defined(__amd64)
758
759/*
760 * This is close to a simple transliteration of a C version of this
761 * routine.  We should either just -make- this be a C version, or
762 * justify having it in assembler by making it significantly faster.
763 *
764 * size_t
765 * strlen(const char *s)
766 * {
767 *	const char *s0;
768 * #if defined(DEBUG)
769 *	if ((uintptr_t)s < KERNELBASE)
770 *		panic(.str_panic_msg);
771 * #endif
772 *	for (s0 = s; *s; s++)
773 *		;
774 *	return (s - s0);
775 * }
776 */
777
778	ENTRY(strlen)
779#ifdef DEBUG
780	movq	kernelbase(%rip), %rax
781	cmpq	%rax, %rdi
782	jae	str_valid
783	pushq	%rbp
784	movq	%rsp, %rbp
785	leaq	.str_panic_msg(%rip), %rdi
786	xorl	%eax, %eax
787	call	panic
788#endif	/* DEBUG */
789str_valid:
790	cmpb	$0, (%rdi)
791	movq	%rdi, %rax
792	je	.null_found
793	.align	4
794.strlen_loop:
795	incq	%rdi
796	cmpb	$0, (%rdi)
797	jne	.strlen_loop
798.null_found:
799	subq	%rax, %rdi
800	movq	%rdi, %rax
801	ret
802	SET_SIZE(strlen)
803
804#elif defined(__i386)
805
806	ENTRY(strlen)
807#ifdef DEBUG
808	movl	kernelbase, %eax
809	cmpl	%eax, 4(%esp)
810	jae	str_valid
811	pushl	%ebp
812	movl	%esp, %ebp
813	pushl	$.str_panic_msg
814	call	panic
815#endif /* DEBUG */
816
817str_valid:
818	movl	4(%esp), %eax		/* %eax = string address */
819	testl	$3, %eax		/* if %eax not word aligned */
820	jnz	.not_word_aligned	/* goto .not_word_aligned */
821	.align	4
822.word_aligned:
823	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
824	movl	$0x7f7f7f7f, %ecx
825	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
826	addl	$4, %eax		/* next word */
827	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
828	orl	%edx, %ecx		/* %ecx |= %edx */
829	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
830	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
831	je	.word_aligned		/* goto .word_aligned */
832	subl	$4, %eax		/* post-incremented */
833.not_word_aligned:
834	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
835	je	.null_found		/* goto .null_found */
836	incl	%eax			/* next byte */
837	testl	$3, %eax		/* if %eax not word aligned */
838	jnz	.not_word_aligned	/* goto .not_word_aligned */
839	jmp	.word_aligned		/* goto .word_aligned */
840	.align	4
841.null_found:
842	subl	4(%esp), %eax		/* %eax -= string address */
843	ret
844	SET_SIZE(strlen)
845
846#endif	/* __i386 */
847
848#ifdef DEBUG
849	.text
850.str_panic_msg:
851	.string "strlen: argument below kernelbase"
852#endif /* DEBUG */
853
854#endif	/* __lint */
855
856	/*
857	 * Berkley 4.3 introduced symbolically named interrupt levels
858	 * as a way deal with priority in a machine independent fashion.
859	 * Numbered priorities are machine specific, and should be
860	 * discouraged where possible.
861	 *
862	 * Note, for the machine specific priorities there are
863	 * examples listed for devices that use a particular priority.
864	 * It should not be construed that all devices of that
865	 * type should be at that priority.  It is currently were
866	 * the current devices fit into the priority scheme based
867	 * upon time criticalness.
868	 *
869	 * The underlying assumption of these assignments is that
870	 * IPL 10 is the highest level from which a device
871	 * routine can call wakeup.  Devices that interrupt from higher
872	 * levels are restricted in what they can do.  If they need
873	 * kernels services they should schedule a routine at a lower
874	 * level (via software interrupt) to do the required
875	 * processing.
876	 *
877	 * Examples of this higher usage:
878	 *	Level	Usage
879	 *	14	Profiling clock (and PROM uart polling clock)
880	 *	12	Serial ports
881	 *
882	 * The serial ports request lower level processing on level 6.
883	 *
884	 * Also, almost all splN routines (where N is a number or a
885	 * mnemonic) will do a RAISE(), on the assumption that they are
886	 * never used to lower our priority.
887	 * The exceptions are:
888	 *	spl8()		Because you can't be above 15 to begin with!
889	 *	splzs()		Because this is used at boot time to lower our
890	 *			priority, to allow the PROM to poll the uart.
891	 *	spl0()		Used to lower priority to 0.
892	 */
893
894#if defined(__lint)
895
896int spl0(void)		{ return (0); }
897int spl6(void)		{ return (0); }
898int spl7(void)		{ return (0); }
899int spl8(void)		{ return (0); }
900int splhigh(void)	{ return (0); }
901int splhi(void)		{ return (0); }
902int splzs(void)		{ return (0); }
903
904#else	/* __lint */
905
906/* reg = cpu->cpu_m.cpu_pri; */
907#define	GETIPL_NOGS(reg, cpup)	\
908	movl	CPU_PRI(cpup), reg;
909
910/* cpu->cpu_m.cpu_pri; */
911#define	SETIPL_NOGS(val, cpup)	\
912	movl	val, CPU_PRI(cpup);
913
914/* reg = cpu->cpu_m.cpu_pri; */
915#define	GETIPL(reg)	\
916	movl	%gs:CPU_PRI, reg;
917
918/* cpu->cpu_m.cpu_pri; */
919#define	SETIPL(val)	\
920	movl	val, %gs:CPU_PRI;
921
922/*
923 * Macro to raise processor priority level.
924 * Avoid dropping processor priority if already at high level.
925 * Also avoid going below CPU->cpu_base_spl, which could've just been set by
926 * a higher-level interrupt thread that just blocked.
927 */
928#if defined(__amd64)
929
930#define	RAISE(level) \
931	cli;			\
932	LOADCPU(%rcx);		\
933	movl	$/**/level, %edi;\
934	GETIPL_NOGS(%eax, %rcx);\
935	cmpl 	%eax, %edi;	\
936	jg	spl;		\
937	jmp	setsplhisti
938
939#elif defined(__i386)
940
941#define	RAISE(level) \
942	cli;			\
943	LOADCPU(%ecx);		\
944	movl	$/**/level, %edx;\
945	GETIPL_NOGS(%eax, %ecx);\
946	cmpl 	%eax, %edx;	\
947	jg	spl;		\
948	jmp	setsplhisti
949
950#endif	/* __i386 */
951
952/*
953 * Macro to set the priority to a specified level.
954 * Avoid dropping the priority below CPU->cpu_base_spl.
955 */
956#if defined(__amd64)
957
958#define	SETPRI(level) \
959	cli;				\
960	LOADCPU(%rcx);			\
961	movl	$/**/level, %edi;	\
962	jmp	spl
963
964#elif defined(__i386)
965
966#define SETPRI(level) \
967	cli;				\
968	LOADCPU(%ecx);			\
969	movl	$/**/level, %edx;	\
970	jmp	spl
971
972#endif	/* __i386 */
973
974	/* locks out all interrupts, including memory errors */
975	ENTRY(spl8)
976	SETPRI(15)
977	SET_SIZE(spl8)
978
979	/* just below the level that profiling runs */
980	ENTRY(spl7)
981	RAISE(13)
982	SET_SIZE(spl7)
983
984	/* sun specific - highest priority onboard serial i/o asy ports */
985	ENTRY(splzs)
986	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
987	SET_SIZE(splzs)
988
989	/*
990	 * should lock out clocks and all interrupts,
991	 * as you can see, there are exceptions
992	 */
993
994#if defined(__amd64)
995
996	.align	16
997	ENTRY(splhi)
998	ALTENTRY(splhigh)
999	ALTENTRY(spl6)
1000	ALTENTRY(i_ddi_splhigh)
1001	cli
1002	LOADCPU(%rcx)
1003	movl	$DISP_LEVEL, %edi
1004	movl	CPU_PRI(%rcx), %eax
1005	cmpl	%eax, %edi
1006	jle	setsplhisti
1007	SETIPL_NOGS(%edi, %rcx)
1008	/*
1009	 * If we aren't using cr8 to control ipl then we patch this
1010	 * with a jump to slow_setsplhi
1011	 */
1012	ALTENTRY(setsplhi_patch)
1013	movq	CPU_PRI_DATA(%rcx), %r11 /* get pri data ptr */
1014	movzb	(%r11, %rdi, 1), %rdx	/* get apic mask for this ipl */
1015	movq	%rdx, %cr8		/* set new apic priority */
1016	/*
1017	 * enable interrupts
1018	 */
1019setsplhisti:
1020	nop	/* patch this to a sti when a proper setspl routine appears */
1021	ret
1022
1023	ALTENTRY(slow_setsplhi)
1024	pushq	%rbp
1025	movq	%rsp, %rbp
1026	subq	$16, %rsp
1027	movl	%eax, -4(%rbp)		/* save old ipl */
1028	call	*setspl(%rip)
1029	movl	-4(%rbp), %eax		/* return old ipl */
1030	leave
1031	jmp	setsplhisti
1032
1033	SET_SIZE(i_ddi_splhigh)
1034	SET_SIZE(spl6)
1035	SET_SIZE(splhigh)
1036	SET_SIZE(splhi)
1037
1038#elif defined(__i386)
1039
1040	.align	16
1041	ENTRY(splhi)
1042	ALTENTRY(splhigh)
1043	ALTENTRY(spl6)
1044	ALTENTRY(i_ddi_splhigh)
1045	cli
1046	LOADCPU(%ecx)
1047	movl	$DISP_LEVEL, %edx
1048	movl	CPU_PRI(%ecx), %eax
1049	cmpl	%eax, %edx
1050	jle	setsplhisti
1051	SETIPL_NOGS(%edx, %ecx)		/* set new ipl */
1052
1053	pushl   %eax                    /* save old ipl */
1054	pushl	%edx			/* pass new ipl */
1055	call	*setspl
1056	popl	%ecx			/* dummy pop */
1057	popl    %eax                    /* return old ipl */
1058	/*
1059	 * enable interrupts
1060	 *
1061	 * (we patch this to an sti once a proper setspl routine
1062	 * is installed)
1063	 */
1064setsplhisti:
1065	nop	/* patch this to a sti when a proper setspl routine appears */
1066	ret
1067	SET_SIZE(i_ddi_splhigh)
1068	SET_SIZE(spl6)
1069	SET_SIZE(splhigh)
1070	SET_SIZE(splhi)
1071
1072#endif	/* __i386 */
1073
1074	/* allow all interrupts */
1075	ENTRY(spl0)
1076	SETPRI(0)
1077	SET_SIZE(spl0)
1078
1079#endif	/* __lint */
1080
1081/*
1082 * splr is like splx but will only raise the priority and never drop it
1083 */
1084#if defined(__lint)
1085
1086/* ARGSUSED */
1087int
1088splr(int level)
1089{ return (0); }
1090
1091#else	/* __lint */
1092
1093#if defined(__amd64)
1094
1095	ENTRY(splr)
1096	cli
1097	LOADCPU(%rcx)
1098	GETIPL_NOGS(%eax, %rcx)
1099	cmpl	%eax, %edi		/* if new level > current level */
1100	jg	spl			/* then set ipl to new level */
1101splr_setsti:
1102	nop	/* patch this to a sti when a proper setspl routine appears */
1103	ret				/* else return the current level */
1104	SET_SIZE(splr)
1105
1106#elif defined(__i386)
1107
1108	ENTRY(splr)
1109	cli
1110	LOADCPU(%ecx)
1111	movl	4(%esp), %edx		/* get new spl level */
1112	GETIPL_NOGS(%eax, %ecx)
1113	cmpl 	%eax, %edx		/* if new level > current level */
1114	jg	spl			/* then set ipl to new level */
1115splr_setsti:
1116	nop	/* patch this to a sti when a proper setspl routine appears */
1117	ret				/* else return the current level */
1118	SET_SIZE(splr)
1119
1120#endif	/* __i386 */
1121#endif	/* __lint */
1122
1123
1124
1125/*
1126 * splx - set PIL back to that indicated by the level passed as an argument,
1127 * or to the CPU's base priority, whichever is higher.
1128 * Needs to be fall through to spl to save cycles.
1129 * Algorithm for spl:
1130 *
1131 *      turn off interrupts
1132 *
1133 *	if (CPU->cpu_base_spl > newipl)
1134 *		newipl = CPU->cpu_base_spl;
1135 *      oldipl = CPU->cpu_pridata->c_ipl;
1136 *      CPU->cpu_pridata->c_ipl = newipl;
1137 *
1138 *	/indirectly call function to set spl values (usually setpicmasks)
1139 *      setspl();  // load new masks into pics
1140 *
1141 * Be careful not to set priority lower than CPU->cpu_base_pri,
1142 * even though it seems we're raising the priority, it could be set
1143 * higher at any time by an interrupt routine, so we must block interrupts
1144 * and look at CPU->cpu_base_pri
1145 */
1146#if defined(__lint)
1147
1148/* ARGSUSED */
1149void
1150splx(int level)
1151{}
1152
1153#else	/* __lint */
1154
1155#if defined(__amd64)
1156
1157	ENTRY(splx)
1158	ALTENTRY(i_ddi_splx)
1159	cli				/* disable interrupts */
1160	LOADCPU(%rcx)
1161	/*FALLTHRU*/
1162	.align	4
1163spl:
1164	/*
1165	 * New priority level is in %edi, cpu struct pointer is in %rcx
1166	 */
1167	GETIPL_NOGS(%eax, %rcx)		/* get current ipl */
1168	cmpl   %edi, CPU_BASE_SPL(%rcx) /* if (base spl > new ipl) */
1169	ja     set_to_base_spl		/* then use base_spl */
1170
1171setprilev:
1172	SETIPL_NOGS(%edi, %rcx)		/* set new ipl */
1173	/*
1174	 * If we aren't using cr8 to control ipl then we patch this
1175	 * with a jump to slow_spl
1176	 */
1177	ALTENTRY(spl_patch)
1178	movq	CPU_PRI_DATA(%rcx), %r11 /* get pri data ptr */
1179	movzb	(%r11, %rdi, 1), %rdx	/* get apic mask for this ipl */
1180	movq	%rdx, %cr8		/* set new apic priority */
1181	xorl	%edx, %edx
1182	bsrl	CPU_SOFTINFO(%rcx), %edx /* fls(cpu->cpu_softinfo.st_pending) */
1183	cmpl	%edi, %edx		/* new ipl vs. st_pending */
1184	jle	setsplsti
1185
1186	pushq	%rbp
1187	movq	%rsp, %rbp
1188	/* stack now 16-byte aligned */
1189	pushq	%rax			/* save old spl */
1190	pushq	%rdi			/* save new ipl too */
1191	jmp	fakesoftint
1192
1193setsplsti:
1194	nop	/* patch this to a sti when a proper setspl routine appears */
1195	ret
1196
1197	ALTENTRY(slow_spl)
1198	pushq	%rbp
1199	movq	%rsp, %rbp
1200	/* stack now 16-byte aligned */
1201
1202	pushq	%rax			/* save old spl */
1203	pushq	%rdi			/* save new ipl too */
1204
1205	call	*setspl(%rip)
1206
1207	LOADCPU(%rcx)
1208	movl	CPU_SOFTINFO(%rcx), %eax
1209	orl	%eax, %eax
1210	jz	slow_setsplsti
1211
1212	bsrl	%eax, %edx		/* fls(cpu->cpu_softinfo.st_pending) */
1213	cmpl	0(%rsp), %edx		/* new ipl vs. st_pending */
1214	jg	fakesoftint
1215
1216	ALTENTRY(fakesoftint_return)
1217	/*
1218	 * enable interrupts
1219	 */
1220slow_setsplsti:
1221	nop	/* patch this to a sti when a proper setspl routine appears */
1222	popq	%rdi
1223	popq	%rax			/* return old ipl */
1224	leave
1225	ret
1226	SET_SIZE(fakesoftint_return)
1227
1228set_to_base_spl:
1229	movl	CPU_BASE_SPL(%rcx), %edi
1230	jmp	setprilev
1231	SET_SIZE(spl)
1232	SET_SIZE(i_ddi_splx)
1233	SET_SIZE(splx)
1234
1235#elif defined(__i386)
1236
1237	ENTRY(splx)
1238	ALTENTRY(i_ddi_splx)
1239	cli                             /* disable interrupts */
1240	LOADCPU(%ecx)
1241	movl	4(%esp), %edx		/* get new spl level */
1242	/*FALLTHRU*/
1243
1244	.align	4
1245	ALTENTRY(spl)
1246	/*
1247	 * New priority level is in %edx
1248	 * (doing this early to avoid an AGI in the next instruction)
1249	 */
1250	GETIPL_NOGS(%eax, %ecx)		/* get current ipl */
1251	cmpl	%edx, CPU_BASE_SPL(%ecx) /* if ( base spl > new ipl) */
1252	ja	set_to_base_spl		/* then use base_spl */
1253
1254setprilev:
1255	SETIPL_NOGS(%edx, %ecx)		/* set new ipl */
1256
1257	pushl   %eax                    /* save old ipl */
1258	pushl	%edx			/* pass new ipl */
1259	call	*setspl
1260
1261	LOADCPU(%ecx)
1262	movl	CPU_SOFTINFO(%ecx), %eax
1263	orl	%eax, %eax
1264	jz	setsplsti
1265
1266	/*
1267	 * Before dashing off, check that setsplsti has been patched.
1268	 */
1269	cmpl	$NOP_INSTR, setsplsti
1270	je	setsplsti
1271
1272	bsrl	%eax, %edx
1273	cmpl	0(%esp), %edx
1274	jg	fakesoftint
1275
1276	ALTENTRY(fakesoftint_return)
1277	/*
1278	 * enable interrupts
1279	 */
1280setsplsti:
1281	nop	/* patch this to a sti when a proper setspl routine appears */
1282	popl	%eax
1283	popl    %eax			/ return old ipl
1284	ret
1285	SET_SIZE(fakesoftint_return)
1286
1287set_to_base_spl:
1288	movl	CPU_BASE_SPL(%ecx), %edx
1289	jmp	setprilev
1290	SET_SIZE(spl)
1291	SET_SIZE(i_ddi_splx)
1292	SET_SIZE(splx)
1293
1294#endif	/* __i386 */
1295#endif	/* __lint */
1296
1297#if defined(__lint)
1298
1299void
1300install_spl(void)
1301{}
1302
1303#else	/* __lint */
1304
1305#if defined(__amd64)
1306
1307	ENTRY_NP(install_spl)
1308	movq	%cr0, %rax
1309	movq	%rax, %rdx
1310	movl	$_BITNOT(CR0_WP), %ecx
1311	movslq	%ecx, %rcx
1312	andq	%rcx, %rax		/* we don't want to take a fault */
1313	movq	%rax, %cr0
1314	jmp	1f
13151:	movb	$STI_INSTR, setsplsti(%rip)
1316	movb	$STI_INSTR, slow_setsplsti(%rip)
1317	movb	$STI_INSTR, setsplhisti(%rip)
1318	movb	$STI_INSTR, splr_setsti(%rip)
1319	testl	$1, intpri_use_cr8(%rip)	/* are using %cr8 ? */
1320	jz	2f				/* no, go patch more */
1321	movq	%rdx, %cr0
1322	ret
13232:
1324	/*
1325	 * Patch spl functions to use slow spl method
1326	 */
1327	leaq	setsplhi_patch(%rip), %rdi	/* get patch point addr */
1328	leaq	slow_setsplhi(%rip), %rax	/* jmp target */
1329	subq	%rdi, %rax			/* calculate jmp distance */
1330	subq	$2, %rax			/* minus size of jmp instr */
1331	shlq	$8, %rax			/* construct jmp instr */
1332	addq	$JMP_INSTR, %rax
1333	movw	%ax, setsplhi_patch(%rip)	/* patch in the jmp */
1334	leaq	spl_patch(%rip), %rdi		/* get patch point addr */
1335	leaq	slow_spl(%rip), %rax		/* jmp target */
1336	subq	%rdi, %rax			/* calculate jmp distance */
1337	subq	$2, %rax			/* minus size of jmp instr */
1338	shlq	$8, %rax			/* construct jmp instr */
1339	addq	$JMP_INSTR, %rax
1340	movw	%ax, spl_patch(%rip)		/* patch in the jmp */
1341	/*
1342	 * Ensure %cr8 is zero since we aren't using it
1343	 */
1344	xorl	%eax, %eax
1345	movq	%rax, %cr8
1346	movq	%rdx, %cr0
1347	ret
1348	SET_SIZE(install_spl)
1349
1350#elif defined(__i386)
1351
1352	ENTRY_NP(install_spl)
1353	movl	%cr0, %eax
1354	movl	%eax, %edx
1355	andl	$_BITNOT(CR0_WP), %eax	/* we don't want to take a fault */
1356	movl	%eax, %cr0
1357	jmp	1f
13581:	movb	$STI_INSTR, setsplsti
1359	movb	$STI_INSTR, setsplhisti
1360	movb	$STI_INSTR, splr_setsti
1361	movl	%edx, %cr0
1362	ret
1363	SET_SIZE(install_spl)
1364
1365#endif	/* __i386 */
1366#endif	/* __lint */
1367
1368
1369/*
1370 * Get current processor interrupt level
1371 */
1372
1373#if defined(__lint)
1374
1375int
1376getpil(void)
1377{ return (0); }
1378
1379#else	/* __lint */
1380
1381#if defined(__amd64)
1382
1383	ENTRY(getpil)
1384	GETIPL(%eax)			/* priority level into %eax */
1385	ret
1386	SET_SIZE(getpil)
1387
1388#elif defined(__i386)
1389
1390	ENTRY(getpil)
1391	GETIPL(%eax)			/* priority level into %eax */
1392	ret
1393	SET_SIZE(getpil)
1394
1395#endif	/* __i386 */
1396#endif	/* __lint */
1397
1398#if defined(__i386)
1399
1400/*
1401 * Read and write the %gs register
1402 */
1403
1404#if defined(__lint)
1405
1406/*ARGSUSED*/
1407uint16_t
1408getgs(void)
1409{ return (0); }
1410
1411/*ARGSUSED*/
1412void
1413setgs(uint16_t sel)
1414{}
1415
1416#else	/* __lint */
1417
1418	ENTRY(getgs)
1419	clr	%eax
1420	movw	%gs, %ax
1421	ret
1422	SET_SIZE(getgs)
1423
1424	ENTRY(setgs)
1425	movw	4(%esp), %gs
1426	ret
1427	SET_SIZE(setgs)
1428
1429#endif	/* __lint */
1430#endif	/* __i386 */
1431
1432#if defined(__lint)
1433
1434void
1435pc_reset(void)
1436{}
1437
1438#else	/* __lint */
1439
1440	ENTRY(wait_500ms)
1441	push	%ebx
1442	movl	$50000, %ebx
14431:
1444	call	tenmicrosec
1445	decl	%ebx
1446	jnz	1b
1447	pop	%ebx
1448	ret
1449	SET_SIZE(wait_500ms)
1450
1451#define	RESET_METHOD_KBC	1
1452#define	RESET_METHOD_PORT92	2
1453#define RESET_METHOD_PCI	4
1454
1455	DGDEF3(pc_reset_methods, 4, 8)
1456	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1457
1458	ENTRY(pc_reset)
1459
1460#if defined(__i386)
1461	testl	$RESET_METHOD_KBC, pc_reset_methods
1462#elif defined(__amd64)
1463	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1464#endif
1465	jz	1f
1466
1467	/
1468	/ Try the classic keyboard controller-triggered reset.
1469	/
1470	movw	$0x64, %dx
1471	movb	$0xfe, %al
1472	outb	(%dx)
1473
1474	/ Wait up to 500 milliseconds here for the keyboard controller
1475	/ to pull the reset line.  On some systems where the keyboard
1476	/ controller is slow to pull the reset line, the next reset method
1477	/ may be executed (which may be bad if those systems hang when the
1478	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1479	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1480
1481	call	wait_500ms
1482
14831:
1484#if defined(__i386)
1485	testl	$RESET_METHOD_PORT92, pc_reset_methods
1486#elif defined(__amd64)
1487	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1488#endif
1489	jz	3f
1490
1491	/
1492	/ Try port 0x92 fast reset
1493	/
1494	movw	$0x92, %dx
1495	inb	(%dx)
1496	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1497	je	1f
1498	testb	$1, %al		/ If bit 0
1499	jz	2f		/ is clear, jump to perform the reset
1500	andb	$0xfe, %al	/ otherwise,
1501	outb	(%dx)		/ clear bit 0 first, then
15022:
1503	orb	$1, %al		/ Set bit 0
1504	outb	(%dx)		/ and reset the system
15051:
1506
1507	call	wait_500ms
1508
15093:
1510#if defined(__i386)
1511	testl	$RESET_METHOD_PCI, pc_reset_methods
1512#elif defined(__amd64)
1513	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1514#endif
1515	jz	4f
1516
1517	/ Try the PCI (soft) reset vector (should work on all modern systems,
1518	/ but has been shown to cause problems on 450NX systems, and some newer
1519	/ systems (e.g. ATI IXP400-equipped systems))
1520	/ When resetting via this method, 2 writes are required.  The first
1521	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1522	/ power cycle).
1523	/ The reset occurs on the second write, during bit 2's transition from
1524	/ 0->1.
1525	movw	$0xcf9, %dx
1526	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1527	outb	(%dx)
1528	movb	$0x6, %al
1529	outb	(%dx)
1530
1531	call	wait_500ms
1532
15334:
1534	/
1535	/ port 0xcf9 failed also.  Last-ditch effort is to
1536	/ triple-fault the CPU.
1537	/
1538#if defined(__amd64)
1539	pushq	$0x0
1540	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1541	lidt	(%rsp)
1542#elif defined(__i386)
1543	pushl	$0x0
1544	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1545	lidt	(%esp)
1546#endif
1547	int	$0x0		/ Trigger interrupt, generate triple-fault
1548
1549	cli
1550	hlt			/ Wait forever
1551	/*NOTREACHED*/
1552	SET_SIZE(pc_reset)
1553
1554#endif	/* __lint */
1555
1556/*
1557 * C callable in and out routines
1558 */
1559
1560#if defined(__lint)
1561
1562/* ARGSUSED */
1563void
1564outl(int port_address, uint32_t val)
1565{}
1566
1567#else	/* __lint */
1568
1569#if defined(__amd64)
1570
1571	ENTRY(outl)
1572	movw	%di, %dx
1573	movl	%esi, %eax
1574	outl	(%dx)
1575	ret
1576	SET_SIZE(outl)
1577
1578#elif defined(__i386)
1579
1580	.set	PORT, 4
1581	.set	VAL, 8
1582
1583	ENTRY(outl)
1584	movw	PORT(%esp), %dx
1585	movl	VAL(%esp), %eax
1586	outl	(%dx)
1587	ret
1588	SET_SIZE(outl)
1589
1590#endif	/* __i386 */
1591#endif	/* __lint */
1592
1593#if defined(__lint)
1594
1595/* ARGSUSED */
1596void
1597outw(int port_address, uint16_t val)
1598{}
1599
1600#else	/* __lint */
1601
1602#if defined(__amd64)
1603
1604	ENTRY(outw)
1605	movw	%di, %dx
1606	movw	%si, %ax
1607	D16 outl (%dx)		/* XX64 why not outw? */
1608	ret
1609	SET_SIZE(outw)
1610
1611#elif defined(__i386)
1612
1613	ENTRY(outw)
1614	movw	PORT(%esp), %dx
1615	movw	VAL(%esp), %ax
1616	D16 outl (%dx)
1617	ret
1618	SET_SIZE(outw)
1619
1620#endif	/* __i386 */
1621#endif	/* __lint */
1622
1623#if defined(__lint)
1624
1625/* ARGSUSED */
1626void
1627outb(int port_address, uint8_t val)
1628{}
1629
1630#else	/* __lint */
1631
1632#if defined(__amd64)
1633
1634	ENTRY(outb)
1635	movw	%di, %dx
1636	movb	%sil, %al
1637	outb	(%dx)
1638	ret
1639	SET_SIZE(outb)
1640
1641#elif defined(__i386)
1642
1643	ENTRY(outb)
1644	movw	PORT(%esp), %dx
1645	movb	VAL(%esp), %al
1646	outb	(%dx)
1647	ret
1648	SET_SIZE(outb)
1649
1650#endif	/* __i386 */
1651#endif	/* __lint */
1652
1653#if defined(__lint)
1654
1655/* ARGSUSED */
1656uint32_t
1657inl(int port_address)
1658{ return (0); }
1659
1660#else	/* __lint */
1661
1662#if defined(__amd64)
1663
1664	ENTRY(inl)
1665	xorl	%eax, %eax
1666	movw	%di, %dx
1667	inl	(%dx)
1668	ret
1669	SET_SIZE(inl)
1670
1671#elif defined(__i386)
1672
1673	ENTRY(inl)
1674	movw	PORT(%esp), %dx
1675	inl	(%dx)
1676	ret
1677	SET_SIZE(inl)
1678
1679#endif	/* __i386 */
1680#endif	/* __lint */
1681
1682#if defined(__lint)
1683
1684/* ARGSUSED */
1685uint16_t
1686inw(int port_address)
1687{ return (0); }
1688
1689#else	/* __lint */
1690
1691#if defined(__amd64)
1692
1693	ENTRY(inw)
1694	xorl	%eax, %eax
1695	movw	%di, %dx
1696	D16 inl	(%dx)
1697	ret
1698	SET_SIZE(inw)
1699
1700#elif defined(__i386)
1701
1702	ENTRY(inw)
1703	subl	%eax, %eax
1704	movw	PORT(%esp), %dx
1705	D16 inl	(%dx)
1706	ret
1707	SET_SIZE(inw)
1708
1709#endif	/* __i386 */
1710#endif	/* __lint */
1711
1712
1713#if defined(__lint)
1714
1715/* ARGSUSED */
1716uint8_t
1717inb(int port_address)
1718{ return (0); }
1719
1720#else	/* __lint */
1721
1722#if defined(__amd64)
1723
1724	ENTRY(inb)
1725	xorl	%eax, %eax
1726	movw	%di, %dx
1727	inb	(%dx)
1728	ret
1729	SET_SIZE(inb)
1730
1731#elif defined(__i386)
1732
1733	ENTRY(inb)
1734	subl    %eax, %eax
1735	movw	PORT(%esp), %dx
1736	inb	(%dx)
1737	ret
1738	SET_SIZE(inb)
1739
1740#endif	/* __i386 */
1741#endif	/* __lint */
1742
1743
1744#if defined(__lint)
1745
1746/* ARGSUSED */
1747void
1748repoutsw(int port, uint16_t *addr, int cnt)
1749{}
1750
1751#else	/* __lint */
1752
1753#if defined(__amd64)
1754
1755	ENTRY(repoutsw)
1756	movl	%edx, %ecx
1757	movw	%di, %dx
1758	rep
1759	  D16 outsl
1760	ret
1761	SET_SIZE(repoutsw)
1762
1763#elif defined(__i386)
1764
1765	/*
1766	 * The arguments and saved registers are on the stack in the
1767	 *  following order:
1768	 *      |  cnt  |  +16
1769	 *      | *addr |  +12
1770	 *      | port  |  +8
1771	 *      |  eip  |  +4
1772	 *      |  esi  |  <-- %esp
1773	 * If additional values are pushed onto the stack, make sure
1774	 * to adjust the following constants accordingly.
1775	 */
1776	.set	PORT, 8
1777	.set	ADDR, 12
1778	.set	COUNT, 16
1779
1780	ENTRY(repoutsw)
1781	pushl	%esi
1782	movl	PORT(%esp), %edx
1783	movl	ADDR(%esp), %esi
1784	movl	COUNT(%esp), %ecx
1785	rep
1786	  D16 outsl
1787	popl	%esi
1788	ret
1789	SET_SIZE(repoutsw)
1790
1791#endif	/* __i386 */
1792#endif	/* __lint */
1793
1794
1795#if defined(__lint)
1796
1797/* ARGSUSED */
1798void
1799repinsw(int port_addr, uint16_t *addr, int cnt)
1800{}
1801
1802#else	/* __lint */
1803
1804#if defined(__amd64)
1805
1806	ENTRY(repinsw)
1807	movl	%edx, %ecx
1808	movw	%di, %dx
1809	rep
1810	  D16 insl
1811	ret
1812	SET_SIZE(repinsw)
1813
1814#elif defined(__i386)
1815
1816	ENTRY(repinsw)
1817	pushl	%edi
1818	movl	PORT(%esp), %edx
1819	movl	ADDR(%esp), %edi
1820	movl	COUNT(%esp), %ecx
1821	rep
1822	  D16 insl
1823	popl	%edi
1824	ret
1825	SET_SIZE(repinsw)
1826
1827#endif	/* __i386 */
1828#endif	/* __lint */
1829
1830
1831#if defined(__lint)
1832
1833/* ARGSUSED */
1834void
1835repinsb(int port, uint8_t *addr, int count)
1836{}
1837
1838#else	/* __lint */
1839
1840#if defined(__amd64)
1841
1842	ENTRY(repinsb)
1843	movl	%edx, %ecx
1844	movw	%di, %dx
1845	movq	%rsi, %rdi
1846	rep
1847	  insb
1848	ret
1849	SET_SIZE(repinsb)
1850
1851#elif defined(__i386)
1852
1853	/*
1854	 * The arguments and saved registers are on the stack in the
1855	 *  following order:
1856	 *      |  cnt  |  +16
1857	 *      | *addr |  +12
1858	 *      | port  |  +8
1859	 *      |  eip  |  +4
1860	 *      |  esi  |  <-- %esp
1861	 * If additional values are pushed onto the stack, make sure
1862	 * to adjust the following constants accordingly.
1863	 */
1864	.set	IO_PORT, 8
1865	.set	IO_ADDR, 12
1866	.set	IO_COUNT, 16
1867
1868	ENTRY(repinsb)
1869	pushl	%edi
1870	movl	IO_ADDR(%esp), %edi
1871	movl	IO_COUNT(%esp), %ecx
1872	movl	IO_PORT(%esp), %edx
1873	rep
1874	  insb
1875	popl	%edi
1876	ret
1877	SET_SIZE(repinsb)
1878
1879#endif	/* __i386 */
1880#endif	/* __lint */
1881
1882
1883/*
1884 * Input a stream of 32-bit words.
1885 * NOTE: count is a DWORD count.
1886 */
1887#if defined(__lint)
1888
1889/* ARGSUSED */
1890void
1891repinsd(int port, uint32_t *addr, int count)
1892{}
1893
1894#else	/* __lint */
1895
1896#if defined(__amd64)
1897
1898	ENTRY(repinsd)
1899	movl	%edx, %ecx
1900	movw	%di, %dx
1901	movq	%rsi, %rdi
1902	rep
1903	  insl
1904	ret
1905	SET_SIZE(repinsd)
1906
1907#elif defined(__i386)
1908
1909	ENTRY(repinsd)
1910	pushl	%edi
1911	movl	IO_ADDR(%esp), %edi
1912	movl	IO_COUNT(%esp), %ecx
1913	movl	IO_PORT(%esp), %edx
1914	rep
1915	  insl
1916	popl	%edi
1917	ret
1918	SET_SIZE(repinsd)
1919
1920#endif	/* __i386 */
1921#endif	/* __lint */
1922
1923/*
1924 * Output a stream of bytes
1925 * NOTE: count is a byte count
1926 */
1927#if defined(__lint)
1928
1929/* ARGSUSED */
1930void
1931repoutsb(int port, uint8_t *addr, int count)
1932{}
1933
1934#else	/* __lint */
1935
1936#if defined(__amd64)
1937
1938	ENTRY(repoutsb)
1939	movl	%edx, %ecx
1940	movw	%di, %dx
1941	rep
1942	  outsb
1943	ret
1944	SET_SIZE(repoutsb)
1945
1946#elif defined(__i386)
1947
1948	ENTRY(repoutsb)
1949	pushl	%esi
1950	movl	IO_ADDR(%esp), %esi
1951	movl	IO_COUNT(%esp), %ecx
1952	movl	IO_PORT(%esp), %edx
1953	rep
1954	  outsb
1955	popl	%esi
1956	ret
1957	SET_SIZE(repoutsb)
1958
1959#endif	/* __i386 */
1960#endif	/* __lint */
1961
1962/*
1963 * Output a stream of 32-bit words
1964 * NOTE: count is a DWORD count
1965 */
1966#if defined(__lint)
1967
1968/* ARGSUSED */
1969void
1970repoutsd(int port, uint32_t *addr, int count)
1971{}
1972
1973#else	/* __lint */
1974
1975#if defined(__amd64)
1976
1977	ENTRY(repoutsd)
1978	movl	%edx, %ecx
1979	movw	%di, %dx
1980	rep
1981	  outsl
1982	ret
1983	SET_SIZE(repoutsd)
1984
1985#elif defined(__i386)
1986
1987	ENTRY(repoutsd)
1988	pushl	%esi
1989	movl	IO_ADDR(%esp), %esi
1990	movl	IO_COUNT(%esp), %ecx
1991	movl	IO_PORT(%esp), %edx
1992	rep
1993	  outsl
1994	popl	%esi
1995	ret
1996	SET_SIZE(repoutsd)
1997
1998#endif	/* __i386 */
1999#endif	/* __lint */
2000
2001/*
2002 * void int20(void)
2003 */
2004
2005#if defined(__lint)
2006
2007void
2008int20(void)
2009{}
2010
2011#else	/* __lint */
2012
2013	ENTRY(int20)
2014	movl	boothowto, %eax
2015	andl	$RB_DEBUG, %eax
2016	jz	1f
2017
2018	int	$20
20191:
2020	rep;	ret	/* use 2 byte return instruction when branch target */
2021			/* AMD Software Optimization Guide - Section 6.2 */
2022	SET_SIZE(int20)
2023
2024#endif	/* __lint */
2025
2026#if defined(__lint)
2027
2028/* ARGSUSED */
2029int
2030scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
2031{ return (0); }
2032
2033#else	/* __lint */
2034
2035#if defined(__amd64)
2036
2037	ENTRY(scanc)
2038					/* rdi == size */
2039					/* rsi == cp */
2040					/* rdx == table */
2041					/* rcx == mask */
2042	addq	%rsi, %rdi		/* end = &cp[size] */
2043.scanloop:
2044	cmpq	%rdi, %rsi		/* while (cp < end */
2045	jnb	.scandone
2046	movzbq	(%rsi), %r8		/* %r8 = *cp */
2047	incq	%rsi			/* cp++ */
2048	testb	%cl, (%r8, %rdx)
2049	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
2050	decq	%rsi			/* (fix post-increment) */
2051.scandone:
2052	movl	%edi, %eax
2053	subl	%esi, %eax		/* return (end - cp) */
2054	ret
2055	SET_SIZE(scanc)
2056
2057#elif defined(__i386)
2058
2059	ENTRY(scanc)
2060	pushl	%edi
2061	pushl	%esi
2062	movb	24(%esp), %cl		/* mask = %cl */
2063	movl	16(%esp), %esi		/* cp = %esi */
2064	movl	20(%esp), %edx		/* table = %edx */
2065	movl	%esi, %edi
2066	addl	12(%esp), %edi		/* end = &cp[size]; */
2067.scanloop:
2068	cmpl	%edi, %esi		/* while (cp < end */
2069	jnb	.scandone
2070	movzbl	(%esi),  %eax		/* %al = *cp */
2071	incl	%esi			/* cp++ */
2072	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
2073	testb	%al, %cl
2074	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
2075	dec	%esi			/* post-incremented */
2076.scandone:
2077	movl	%edi, %eax
2078	subl	%esi, %eax		/* return (end - cp) */
2079	popl	%esi
2080	popl	%edi
2081	ret
2082	SET_SIZE(scanc)
2083
2084#endif	/* __i386 */
2085#endif	/* __lint */
2086
2087/*
2088 * Replacement functions for ones that are normally inlined.
2089 * In addition to the copy in i86.il, they are defined here just in case.
2090 */
2091
2092#if defined(__lint)
2093
2094int
2095intr_clear(void)
2096{ return 0; }
2097
2098int
2099clear_int_flag(void)
2100{ return 0; }
2101
2102#else	/* __lint */
2103
2104#if defined(__amd64)
2105
2106	ENTRY(intr_clear)
2107	ENTRY(clear_int_flag)
2108	pushfq
2109	cli
2110	popq	%rax
2111	ret
2112	SET_SIZE(clear_int_flag)
2113	SET_SIZE(intr_clear)
2114
2115#elif defined(__i386)
2116
2117	ENTRY(intr_clear)
2118	ENTRY(clear_int_flag)
2119	pushfl
2120	cli
2121	popl	%eax
2122	ret
2123	SET_SIZE(clear_int_flag)
2124	SET_SIZE(intr_clear)
2125
2126#endif	/* __i386 */
2127#endif	/* __lint */
2128
2129#if defined(__lint)
2130
2131struct cpu *
2132curcpup(void)
2133{ return 0; }
2134
2135#else	/* __lint */
2136
2137#if defined(__amd64)
2138
2139	ENTRY(curcpup)
2140	movq	%gs:CPU_SELF, %rax
2141	ret
2142	SET_SIZE(curcpup)
2143
2144#elif defined(__i386)
2145
2146	ENTRY(curcpup)
2147	movl	%gs:CPU_SELF, %eax
2148	ret
2149	SET_SIZE(curcpup)
2150
2151#endif	/* __i386 */
2152#endif	/* __lint */
2153
2154#if defined(__lint)
2155
2156/* ARGSUSED */
2157uint32_t
2158htonl(uint32_t i)
2159{ return (0); }
2160
2161/* ARGSUSED */
2162uint32_t
2163ntohl(uint32_t i)
2164{ return (0); }
2165
2166#else	/* __lint */
2167
2168#if defined(__amd64)
2169
2170	/* XX64 there must be shorter sequences for this */
2171	ENTRY(htonl)
2172	ALTENTRY(ntohl)
2173	movl	%edi, %eax
2174	bswap	%eax
2175	ret
2176	SET_SIZE(ntohl)
2177	SET_SIZE(htonl)
2178
2179#elif defined(__i386)
2180
2181	ENTRY(htonl)
2182	ALTENTRY(ntohl)
2183	movl	4(%esp), %eax
2184	bswap	%eax
2185	ret
2186	SET_SIZE(ntohl)
2187	SET_SIZE(htonl)
2188
2189#endif	/* __i386 */
2190#endif	/* __lint */
2191
2192#if defined(__lint)
2193
2194/* ARGSUSED */
2195uint16_t
2196htons(uint16_t i)
2197{ return (0); }
2198
2199/* ARGSUSED */
2200uint16_t
2201ntohs(uint16_t i)
2202{ return (0); }
2203
2204
2205#else	/* __lint */
2206
2207#if defined(__amd64)
2208
2209	/* XX64 there must be better sequences for this */
2210	ENTRY(htons)
2211	ALTENTRY(ntohs)
2212	movl	%edi, %eax
2213	bswap	%eax
2214	shrl	$16, %eax
2215	ret
2216	SET_SIZE(ntohs)
2217	SET_SIZE(htons)
2218
2219#elif defined(__i386)
2220
2221	ENTRY(htons)
2222	ALTENTRY(ntohs)
2223	movl	4(%esp), %eax
2224	bswap	%eax
2225	shrl	$16, %eax
2226	ret
2227	SET_SIZE(ntohs)
2228	SET_SIZE(htons)
2229
2230#endif	/* __i386 */
2231#endif	/* __lint */
2232
2233
2234#if defined(__lint)
2235
2236/* ARGSUSED */
2237void
2238intr_restore(uint_t i)
2239{ return; }
2240
2241/* ARGSUSED */
2242void
2243restore_int_flag(int i)
2244{ return; }
2245
2246#else	/* __lint */
2247
2248#if defined(__amd64)
2249
2250	ENTRY(intr_restore)
2251	ENTRY(restore_int_flag)
2252	pushq	%rdi
2253	popfq
2254	ret
2255	SET_SIZE(restore_int_flag)
2256	SET_SIZE(intr_restore)
2257
2258#elif defined(__i386)
2259
2260	ENTRY(intr_restore)
2261	ENTRY(restore_int_flag)
2262	pushl	4(%esp)
2263	popfl
2264	ret
2265	SET_SIZE(restore_int_flag)
2266	SET_SIZE(intr_restore)
2267
2268#endif	/* __i386 */
2269#endif	/* __lint */
2270
2271#if defined(__lint)
2272
2273void
2274sti(void)
2275{}
2276
2277#else	/* __lint */
2278
2279	ENTRY(sti)
2280	sti
2281	ret
2282	SET_SIZE(sti)
2283
2284#endif	/* __lint */
2285
2286#if defined(__lint)
2287
2288dtrace_icookie_t
2289dtrace_interrupt_disable(void)
2290{ return (0); }
2291
2292#else   /* __lint */
2293
2294#if defined(__amd64)
2295
2296	ENTRY(dtrace_interrupt_disable)
2297	pushfq
2298	popq	%rax
2299	cli
2300	ret
2301	SET_SIZE(dtrace_interrupt_disable)
2302
2303#elif defined(__i386)
2304
2305	ENTRY(dtrace_interrupt_disable)
2306	pushfl
2307	popl	%eax
2308	cli
2309	ret
2310	SET_SIZE(dtrace_interrupt_disable)
2311
2312#endif	/* __i386 */
2313#endif	/* __lint */
2314
2315#if defined(__lint)
2316
2317/*ARGSUSED*/
2318void
2319dtrace_interrupt_enable(dtrace_icookie_t cookie)
2320{}
2321
2322#else	/* __lint */
2323
2324#if defined(__amd64)
2325
2326	ENTRY(dtrace_interrupt_enable)
2327	pushq	%rdi
2328	popfq
2329	ret
2330	SET_SIZE(dtrace_interrupt_enable)
2331
2332#elif defined(__i386)
2333
2334	ENTRY(dtrace_interrupt_enable)
2335	movl	4(%esp), %eax
2336	pushl	%eax
2337	popfl
2338	ret
2339	SET_SIZE(dtrace_interrupt_enable)
2340
2341#endif	/* __i386 */
2342#endif	/* __lint */
2343
2344
2345#if defined(lint)
2346
2347void
2348dtrace_membar_producer(void)
2349{}
2350
2351void
2352dtrace_membar_consumer(void)
2353{}
2354
2355#else	/* __lint */
2356
2357	ENTRY(dtrace_membar_producer)
2358	rep;	ret	/* use 2 byte return instruction when branch target */
2359			/* AMD Software Optimization Guide - Section 6.2 */
2360	SET_SIZE(dtrace_membar_producer)
2361
2362	ENTRY(dtrace_membar_consumer)
2363	rep;	ret	/* use 2 byte return instruction when branch target */
2364			/* AMD Software Optimization Guide - Section 6.2 */
2365	SET_SIZE(dtrace_membar_consumer)
2366
2367#endif	/* __lint */
2368
2369#if defined(__lint)
2370
2371kthread_id_t
2372threadp(void)
2373{ return ((kthread_id_t)0); }
2374
2375#else	/* __lint */
2376
2377#if defined(__amd64)
2378
2379	ENTRY(threadp)
2380	movq	%gs:CPU_THREAD, %rax
2381	ret
2382	SET_SIZE(threadp)
2383
2384#elif defined(__i386)
2385
2386	ENTRY(threadp)
2387	movl	%gs:CPU_THREAD, %eax
2388	ret
2389	SET_SIZE(threadp)
2390
2391#endif	/* __i386 */
2392#endif	/* __lint */
2393
2394/*
2395 *   Checksum routine for Internet Protocol Headers
2396 */
2397
2398#if defined(__lint)
2399
2400/* ARGSUSED */
2401unsigned int
2402ip_ocsum(
2403	ushort_t *address,	/* ptr to 1st message buffer */
2404	int halfword_count,	/* length of data */
2405	unsigned int sum)	/* partial checksum */
2406{
2407	int		i;
2408	unsigned int	psum = 0;	/* partial sum */
2409
2410	for (i = 0; i < halfword_count; i++, address++) {
2411		psum += *address;
2412	}
2413
2414	while ((psum >> 16) != 0) {
2415		psum = (psum & 0xffff) + (psum >> 16);
2416	}
2417
2418	psum += sum;
2419
2420	while ((psum >> 16) != 0) {
2421		psum = (psum & 0xffff) + (psum >> 16);
2422	}
2423
2424	return (psum);
2425}
2426
2427#else	/* __lint */
2428
2429#if defined(__amd64)
2430
2431	ENTRY(ip_ocsum)
2432	pushq	%rbp
2433	movq	%rsp, %rbp
2434#ifdef DEBUG
2435	movq	kernelbase(%rip), %rax
2436	cmpq	%rax, %rdi
2437	jnb	1f
2438	xorl	%eax, %eax
2439	movq	%rdi, %rsi
2440	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2441	call	panic
2442	/*NOTREACHED*/
2443.ip_ocsum_panic_msg:
2444	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24451:
2446#endif
2447	movl	%esi, %ecx	/* halfword_count */
2448	movq	%rdi, %rsi	/* address */
2449				/* partial sum in %edx */
2450	xorl	%eax, %eax
2451	testl	%ecx, %ecx
2452	jz	.ip_ocsum_done
2453	testq	$3, %rsi
2454	jnz	.ip_csum_notaligned
2455.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2456.next_iter:
2457	/* XX64 opportunities for prefetch? */
2458	/* XX64 compute csum with 64 bit quantities? */
2459	subl	$32, %ecx
2460	jl	.less_than_32
2461
2462	addl	0(%rsi), %edx
2463.only60:
2464	adcl	4(%rsi), %eax
2465.only56:
2466	adcl	8(%rsi), %edx
2467.only52:
2468	adcl	12(%rsi), %eax
2469.only48:
2470	adcl	16(%rsi), %edx
2471.only44:
2472	adcl	20(%rsi), %eax
2473.only40:
2474	adcl	24(%rsi), %edx
2475.only36:
2476	adcl	28(%rsi), %eax
2477.only32:
2478	adcl	32(%rsi), %edx
2479.only28:
2480	adcl	36(%rsi), %eax
2481.only24:
2482	adcl	40(%rsi), %edx
2483.only20:
2484	adcl	44(%rsi), %eax
2485.only16:
2486	adcl	48(%rsi), %edx
2487.only12:
2488	adcl	52(%rsi), %eax
2489.only8:
2490	adcl	56(%rsi), %edx
2491.only4:
2492	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2493.only0:
2494	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2495	adcl	$0, %eax
2496
2497	addq	$64, %rsi
2498	testl	%ecx, %ecx
2499	jnz	.next_iter
2500
2501.ip_ocsum_done:
2502	addl	%eax, %edx
2503	adcl	$0, %edx
2504	movl	%edx, %eax	/* form a 16 bit checksum by */
2505	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2506	addw	%dx, %ax
2507	adcw	$0, %ax
2508	andl	$0xffff, %eax
2509	leave
2510	ret
2511
2512.ip_csum_notaligned:
2513	xorl	%edi, %edi
2514	movw	(%rsi), %di
2515	addl	%edi, %edx
2516	adcl	$0, %edx
2517	addq	$2, %rsi
2518	decl	%ecx
2519	jmp	.ip_csum_aligned
2520
2521.less_than_32:
2522	addl	$32, %ecx
2523	testl	$1, %ecx
2524	jz	.size_aligned
2525	andl	$0xfe, %ecx
2526	movzwl	(%rsi, %rcx, 2), %edi
2527	addl	%edi, %edx
2528	adcl	$0, %edx
2529.size_aligned:
2530	movl	%ecx, %edi
2531	shrl	$1, %ecx
2532	shl	$1, %edi
2533	subq	$64, %rdi
2534	addq	%rdi, %rsi
2535	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2536	leaq	(%rdi, %rcx, 8), %rdi
2537	xorl	%ecx, %ecx
2538	clc
2539	jmp 	*(%rdi)
2540
2541	.align	8
2542.ip_ocsum_jmptbl:
2543	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2544	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2545	.quad	.only48, .only52, .only56, .only60
2546	SET_SIZE(ip_ocsum)
2547
2548#elif defined(__i386)
2549
2550	ENTRY(ip_ocsum)
2551	pushl	%ebp
2552	movl	%esp, %ebp
2553	pushl	%ebx
2554	pushl	%esi
2555	pushl	%edi
2556	movl	12(%ebp), %ecx	/* count of half words */
2557	movl	16(%ebp), %edx	/* partial checksum */
2558	movl	8(%ebp), %esi
2559	xorl	%eax, %eax
2560	testl	%ecx, %ecx
2561	jz	.ip_ocsum_done
2562
2563	testl	$3, %esi
2564	jnz	.ip_csum_notaligned
2565.ip_csum_aligned:
2566.next_iter:
2567	subl	$32, %ecx
2568	jl	.less_than_32
2569
2570	addl	0(%esi), %edx
2571.only60:
2572	adcl	4(%esi), %eax
2573.only56:
2574	adcl	8(%esi), %edx
2575.only52:
2576	adcl	12(%esi), %eax
2577.only48:
2578	adcl	16(%esi), %edx
2579.only44:
2580	adcl	20(%esi), %eax
2581.only40:
2582	adcl	24(%esi), %edx
2583.only36:
2584	adcl	28(%esi), %eax
2585.only32:
2586	adcl	32(%esi), %edx
2587.only28:
2588	adcl	36(%esi), %eax
2589.only24:
2590	adcl	40(%esi), %edx
2591.only20:
2592	adcl	44(%esi), %eax
2593.only16:
2594	adcl	48(%esi), %edx
2595.only12:
2596	adcl	52(%esi), %eax
2597.only8:
2598	adcl	56(%esi), %edx
2599.only4:
2600	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2601.only0:
2602	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2603	adcl	$0, %eax
2604
2605	addl	$64, %esi
2606	andl	%ecx, %ecx
2607	jnz	.next_iter
2608
2609.ip_ocsum_done:
2610	addl	%eax, %edx
2611	adcl	$0, %edx
2612	movl	%edx, %eax	/* form a 16 bit checksum by */
2613	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2614	addw	%dx, %ax
2615	adcw	$0, %ax
2616	andl	$0xffff, %eax
2617	popl	%edi		/* restore registers */
2618	popl	%esi
2619	popl	%ebx
2620	leave
2621	ret
2622
2623.ip_csum_notaligned:
2624	xorl	%edi, %edi
2625	movw	(%esi), %di
2626	addl	%edi, %edx
2627	adcl	$0, %edx
2628	addl	$2, %esi
2629	decl	%ecx
2630	jmp	.ip_csum_aligned
2631
2632.less_than_32:
2633	addl	$32, %ecx
2634	testl	$1, %ecx
2635	jz	.size_aligned
2636	andl	$0xfe, %ecx
2637	movzwl	(%esi, %ecx, 2), %edi
2638	addl	%edi, %edx
2639	adcl	$0, %edx
2640.size_aligned:
2641	movl	%ecx, %edi
2642	shrl	$1, %ecx
2643	shl	$1, %edi
2644	subl	$64, %edi
2645	addl	%edi, %esi
2646	movl	$.ip_ocsum_jmptbl, %edi
2647	lea	(%edi, %ecx, 4), %edi
2648	xorl	%ecx, %ecx
2649	clc
2650	jmp 	*(%edi)
2651	SET_SIZE(ip_ocsum)
2652
2653	.data
2654	.align	4
2655
2656.ip_ocsum_jmptbl:
2657	.long	.only0, .only4, .only8, .only12, .only16, .only20
2658	.long	.only24, .only28, .only32, .only36, .only40, .only44
2659	.long	.only48, .only52, .only56, .only60
2660
2661
2662#endif	/* __i386 */
2663#endif	/* __lint */
2664
2665/*
2666 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2667 * Provided to manipulate hrtime_t values.
2668 */
2669#if defined(__lint)
2670
2671/* result = a * b; */
2672
2673/* ARGSUSED */
2674unsigned long long
2675mul32(uint_t a, uint_t b)
2676{ return (0); }
2677
2678#else	/* __lint */
2679
2680#if defined(__amd64)
2681
2682	ENTRY(mul32)
2683	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2684	movl	%edi, %eax
2685	mull	%esi
2686	shlq	$32, %rdx
2687	orq	%rdx, %rax
2688	ret
2689	SET_SIZE(mul32)
2690
2691#elif defined(__i386)
2692
2693	ENTRY(mul32)
2694	movl	8(%esp), %eax
2695	movl	4(%esp), %ecx
2696	mull	%ecx
2697	ret
2698	SET_SIZE(mul32)
2699
2700#endif	/* __i386 */
2701#endif	/* __lint */
2702
2703#if defined(notused)
2704#if defined(__lint)
2705/* ARGSUSED */
2706void
2707load_pte64(uint64_t *pte, uint64_t pte_value)
2708{}
2709#else	/* __lint */
2710	.globl load_pte64
2711load_pte64:
2712	movl	4(%esp), %eax
2713	movl	8(%esp), %ecx
2714	movl	12(%esp), %edx
2715	movl	%edx, 4(%eax)
2716	movl	%ecx, (%eax)
2717	ret
2718#endif	/* __lint */
2719#endif	/* notused */
2720
2721#if defined(__lint)
2722
2723/*ARGSUSED*/
2724void
2725scan_memory(caddr_t addr, size_t size)
2726{}
2727
2728#else	/* __lint */
2729
2730#if defined(__amd64)
2731
2732	ENTRY(scan_memory)
2733	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2734	jz	.scanm_done
2735	movq	%rsi, %rcx	/* move count into rep control register */
2736	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2737	rep lodsq		/* scan the memory range */
2738.scanm_done:
2739	rep;	ret	/* use 2 byte return instruction when branch target */
2740			/* AMD Software Optimization Guide - Section 6.2 */
2741	SET_SIZE(scan_memory)
2742
2743#elif defined(__i386)
2744
2745	ENTRY(scan_memory)
2746	pushl	%ecx
2747	pushl	%esi
2748	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2749	shrl	$2, %ecx	/* convert from byte count to word count */
2750	jz	.scanm_done
2751	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2752	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2753	lodsl
2754.scanm_done:
2755	popl	%esi
2756	popl	%ecx
2757	ret
2758	SET_SIZE(scan_memory)
2759
2760#endif	/* __i386 */
2761#endif	/* __lint */
2762
2763
2764#if defined(__lint)
2765
2766/*ARGSUSED */
2767int
2768lowbit(ulong_t i)
2769{ return (0); }
2770
2771#else	/* __lint */
2772
2773#if defined(__amd64)
2774
2775	ENTRY(lowbit)
2776	movl	$-1, %eax
2777	bsfq	%rdi, %rax
2778	incl	%eax
2779	ret
2780	SET_SIZE(lowbit)
2781
2782#elif defined(__i386)
2783
2784	ENTRY(lowbit)
2785	movl	$-1, %eax
2786	bsfl	4(%esp), %eax
2787	incl	%eax
2788	ret
2789	SET_SIZE(lowbit)
2790
2791#endif	/* __i386 */
2792#endif	/* __lint */
2793
2794#if defined(__lint)
2795
2796/*ARGSUSED*/
2797int
2798highbit(ulong_t i)
2799{ return (0); }
2800
2801#else	/* __lint */
2802
2803#if defined(__amd64)
2804
2805	ENTRY(highbit)
2806	movl	$-1, %eax
2807	bsrq	%rdi, %rax
2808	incl	%eax
2809	ret
2810	SET_SIZE(highbit)
2811
2812#elif defined(__i386)
2813
2814	ENTRY(highbit)
2815	movl	$-1, %eax
2816	bsrl	4(%esp), %eax
2817	incl	%eax
2818	ret
2819	SET_SIZE(highbit)
2820
2821#endif	/* __i386 */
2822#endif	/* __lint */
2823
2824#if defined(__lint)
2825
2826/*ARGSUSED*/
2827uint64_t
2828rdmsr(uint_t r)
2829{ return (0); }
2830
2831/*ARGSUSED*/
2832void
2833wrmsr(uint_t r, const uint64_t val)
2834{}
2835
2836void
2837invalidate_cache(void)
2838{}
2839
2840#else  /* __lint */
2841
2842#if defined(__amd64)
2843
2844	ENTRY(rdmsr)
2845	movl	%edi, %ecx
2846	rdmsr
2847	shlq	$32, %rdx
2848	orq	%rdx, %rax
2849	ret
2850	SET_SIZE(rdmsr)
2851
2852	ENTRY(wrmsr)
2853	movq	%rsi, %rdx
2854	shrq	$32, %rdx
2855	movl	%esi, %eax
2856	movl	%edi, %ecx
2857	wrmsr
2858	ret
2859	SET_SIZE(wrmsr)
2860
2861#elif defined(__i386)
2862
2863	ENTRY(rdmsr)
2864	movl	4(%esp), %ecx
2865	rdmsr
2866	ret
2867	SET_SIZE(rdmsr)
2868
2869	ENTRY(wrmsr)
2870	movl	4(%esp), %ecx
2871	movl	8(%esp), %eax
2872	movl	12(%esp), %edx
2873	wrmsr
2874	ret
2875	SET_SIZE(wrmsr)
2876
2877#endif	/* __i386 */
2878
2879	ENTRY(invalidate_cache)
2880	wbinvd
2881	ret
2882	SET_SIZE(invalidate_cache)
2883
2884#endif	/* __lint */
2885
2886#if defined(__lint)
2887
2888/*ARGSUSED*/
2889void getcregs(struct cregs *crp)
2890{}
2891
2892#else	/* __lint */
2893
2894#if defined(__amd64)
2895
2896#define	GETMSR(r, off, d)	\
2897	movl	$r, %ecx;	\
2898	rdmsr;			\
2899	movl	%eax, off(d);	\
2900	movl	%edx, off+4(d)
2901
2902	ENTRY_NP(getcregs)
2903	xorl	%eax, %eax
2904	movq	%rax, CREG_GDT+8(%rdi)
2905	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2906	movq	%rax, CREG_IDT+8(%rdi)
2907	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2908	movq	%rax, CREG_LDT(%rdi)
2909	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2910	movq	%rax, CREG_TASKR(%rdi)
2911	str	CREG_TASKR(%rdi)	/* 2 bytes */
2912	movq	%cr0, %rax
2913	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2914	movq	%cr2, %rax
2915	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2916	movq	%cr3, %rax
2917	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2918	movq	%cr4, %rax
2919	movq	%rax, CREG_CR8(%rdi)	/* cr4 */
2920	movq	%cr8, %rax
2921	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2922	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2923	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2924	SET_SIZE(getcregs)
2925
2926#undef GETMSR
2927
2928#elif defined(__i386)
2929
2930	ENTRY_NP(getcregs)
2931	movl	4(%esp), %edx
2932	movw	$0, CREG_GDT+6(%edx)
2933	movw	$0, CREG_IDT+6(%edx)
2934	sgdt	CREG_GDT(%edx)		/* gdt */
2935	sidt	CREG_IDT(%edx)		/* idt */
2936	sldt	CREG_LDT(%edx)		/* ldt */
2937	str	CREG_TASKR(%edx)	/* task */
2938	movl	%cr0, %eax
2939	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2940	movl	%cr2, %eax
2941	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2942	movl	%cr3, %eax
2943	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2944	testl	$X86_LARGEPAGE, x86_feature
2945	jz	.nocr4
2946	movl	%cr4, %eax
2947	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2948	jmp	.skip
2949.nocr4:
2950	movl	$0, CREG_CR4(%edx)
2951.skip:
2952	rep;	ret	/* use 2 byte return instruction when branch target */
2953			/* AMD Software Optimization Guide - Section 6.2 */
2954	SET_SIZE(getcregs)
2955
2956#endif	/* __i386 */
2957#endif	/* __lint */
2958
2959
2960/*
2961 * A panic trigger is a word which is updated atomically and can only be set
2962 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
2963 * previous value was 0, we succeed and return 1; otherwise return 0.
2964 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
2965 * has its own version of this function to allow it to panic correctly from
2966 * probe context.
2967 */
2968#if defined(__lint)
2969
2970/*ARGSUSED*/
2971int
2972panic_trigger(int *tp)
2973{ return (0); }
2974
2975/*ARGSUSED*/
2976int
2977dtrace_panic_trigger(int *tp)
2978{ return (0); }
2979
2980#else	/* __lint */
2981
2982#if defined(__amd64)
2983
2984	ENTRY_NP(panic_trigger)
2985	xorl	%eax, %eax
2986	movl	$0xdefacedd, %edx
2987	lock
2988	  xchgl	%edx, (%rdi)
2989	cmpl	$0, %edx
2990	je	0f
2991	movl	$0, %eax
2992	ret
29930:	movl	$1, %eax
2994	ret
2995	SET_SIZE(panic_trigger)
2996
2997	ENTRY_NP(dtrace_panic_trigger)
2998	xorl	%eax, %eax
2999	movl	$0xdefacedd, %edx
3000	lock
3001	  xchgl	%edx, (%rdi)
3002	cmpl	$0, %edx
3003	je	0f
3004	movl	$0, %eax
3005	ret
30060:	movl	$1, %eax
3007	ret
3008	SET_SIZE(dtrace_panic_trigger)
3009
3010#elif defined(__i386)
3011
3012	ENTRY_NP(panic_trigger)
3013	movl	4(%esp), %edx		/ %edx = address of trigger
3014	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3015	lock				/ assert lock
3016	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3017	cmpl	$0, %eax		/ if (%eax == 0x0)
3018	je	0f			/   return (1);
3019	movl	$0, %eax		/ else
3020	ret				/   return (0);
30210:	movl	$1, %eax
3022	ret
3023	SET_SIZE(panic_trigger)
3024
3025	ENTRY_NP(dtrace_panic_trigger)
3026	movl	4(%esp), %edx		/ %edx = address of trigger
3027	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3028	lock				/ assert lock
3029	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3030	cmpl	$0, %eax		/ if (%eax == 0x0)
3031	je	0f			/   return (1);
3032	movl	$0, %eax		/ else
3033	ret				/   return (0);
30340:	movl	$1, %eax
3035	ret
3036	SET_SIZE(dtrace_panic_trigger)
3037
3038#endif	/* __i386 */
3039#endif	/* __lint */
3040
3041/*
3042 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3043 * into the panic code implemented in panicsys().  vpanic() is responsible
3044 * for passing through the format string and arguments, and constructing a
3045 * regs structure on the stack into which it saves the current register
3046 * values.  If we are not dying due to a fatal trap, these registers will
3047 * then be preserved in panicbuf as the current processor state.  Before
3048 * invoking panicsys(), vpanic() activates the first panic trigger (see
3049 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3050 * DTrace takes a slightly different panic path if it must panic from probe
3051 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3052 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3053 * branches back into vpanic().
3054 */
3055#if defined(__lint)
3056
3057/*ARGSUSED*/
3058void
3059vpanic(const char *format, va_list alist)
3060{}
3061
3062/*ARGSUSED*/
3063void
3064dtrace_vpanic(const char *format, va_list alist)
3065{}
3066
3067#else	/* __lint */
3068
3069#if defined(__amd64)
3070
3071	ENTRY_NP(vpanic)			/* Initial stack layout: */
3072
3073	pushq	%rbp				/* | %rip | 	0x60	*/
3074	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3075	pushfq					/* | rfl  |	0x50	*/
3076	pushq	%r11				/* | %r11 |	0x48	*/
3077	pushq	%r10				/* | %r10 |	0x40	*/
3078	pushq	%rbx				/* | %rbx |	0x38	*/
3079	pushq	%rax				/* | %rax |	0x30	*/
3080	pushq	%r9				/* | %r9  |	0x28	*/
3081	pushq	%r8				/* | %r8  |	0x20	*/
3082	pushq	%rcx				/* | %rcx |	0x18	*/
3083	pushq	%rdx				/* | %rdx |	0x10	*/
3084	pushq	%rsi				/* | %rsi |	0x8 alist */
3085	pushq	%rdi				/* | %rdi |	0x0 format */
3086
3087	movq	%rsp, %rbx			/* %rbx = current %rsp */
3088
3089	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3090	call	panic_trigger			/* %eax = panic_trigger() */
3091
3092vpanic_common:
3093	cmpl	$0, %eax
3094	je	0f
3095
3096	/*
3097	 * If panic_trigger() was successful, we are the first to initiate a
3098	 * panic: we now switch to the reserved panic_stack before continuing.
3099	 */
3100	leaq	panic_stack(%rip), %rsp
3101	addq	$PANICSTKSIZE, %rsp
31020:	subq	$REGSIZE, %rsp
3103	/*
3104	 * Now that we've got everything set up, store the register values as
3105	 * they were when we entered vpanic() to the designated location in
3106	 * the regs structure we allocated on the stack.
3107	 */
3108	movq	0x0(%rbx), %rcx
3109	movq	%rcx, REGOFF_RDI(%rsp)
3110	movq	0x8(%rbx), %rcx
3111	movq	%rcx, REGOFF_RSI(%rsp)
3112	movq	0x10(%rbx), %rcx
3113	movq	%rcx, REGOFF_RDX(%rsp)
3114	movq	0x18(%rbx), %rcx
3115	movq	%rcx, REGOFF_RCX(%rsp)
3116	movq	0x20(%rbx), %rcx
3117
3118	movq	%rcx, REGOFF_R8(%rsp)
3119	movq	0x28(%rbx), %rcx
3120	movq	%rcx, REGOFF_R9(%rsp)
3121	movq	0x30(%rbx), %rcx
3122	movq	%rcx, REGOFF_RAX(%rsp)
3123	movq	0x38(%rbx), %rcx
3124	movq	%rbx, REGOFF_RBX(%rsp)
3125	movq	0x58(%rbx), %rcx
3126
3127	movq	%rcx, REGOFF_RBP(%rsp)
3128	movq	0x40(%rbx), %rcx
3129	movq	%rcx, REGOFF_R10(%rsp)
3130	movq	0x48(%rbx), %rcx
3131	movq	%rcx, REGOFF_R11(%rsp)
3132	movq	%r12, REGOFF_R12(%rsp)
3133
3134	movq	%r13, REGOFF_R13(%rsp)
3135	movq	%r14, REGOFF_R14(%rsp)
3136	movq	%r15, REGOFF_R15(%rsp)
3137
3138	movl	$MSR_AMD_FSBASE, %ecx
3139	rdmsr
3140	movl	%eax, REGOFF_FSBASE(%rsp)
3141	movl	%edx, REGOFF_FSBASE+4(%rsp)
3142
3143	movl	$MSR_AMD_GSBASE, %ecx
3144	rdmsr
3145	movl	%eax, REGOFF_GSBASE(%rsp)
3146	movl	%edx, REGOFF_GSBASE+4(%rsp)
3147
3148	xorl	%ecx, %ecx
3149	movw	%ds, %cx
3150	movq	%rcx, REGOFF_DS(%rsp)
3151	movw	%es, %cx
3152	movq	%rcx, REGOFF_ES(%rsp)
3153	movw	%fs, %cx
3154	movq	%rcx, REGOFF_FS(%rsp)
3155	movw	%gs, %cx
3156	movq	%rcx, REGOFF_GS(%rsp)
3157
3158	movq	$0, REGOFF_TRAPNO(%rsp)
3159
3160	movq	$0, REGOFF_ERR(%rsp)
3161	leaq	vpanic(%rip), %rcx
3162	movq	%rcx, REGOFF_RIP(%rsp)
3163	movw	%cs, %cx
3164	movzwq	%cx, %rcx
3165	movq	%rcx, REGOFF_CS(%rsp)
3166	movq	0x50(%rbx), %rcx
3167	movq	%rcx, REGOFF_RFL(%rsp)
3168	movq	%rbx, %rcx
3169	addq	$0x60, %rcx
3170	movq	%rcx, REGOFF_RSP(%rsp)
3171	movw	%ss, %cx
3172	movzwq	%cx, %rcx
3173	movq	%rcx, REGOFF_SS(%rsp)
3174
3175	/*
3176	 * panicsys(format, alist, rp, on_panic_stack)
3177	 */
3178	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3179	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3180	movq	%rsp, %rdx			/* struct regs */
3181	movl	%eax, %ecx			/* on_panic_stack */
3182	call	panicsys
3183	addq	$REGSIZE, %rsp
3184	popq	%rdi
3185	popq	%rsi
3186	popq	%rdx
3187	popq	%rcx
3188	popq	%r8
3189	popq	%r9
3190	popq	%rax
3191	popq	%rbx
3192	popq	%r10
3193	popq	%r11
3194	popfq
3195	leave
3196	ret
3197	SET_SIZE(vpanic)
3198
3199	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3200
3201	pushq	%rbp				/* | %rip | 	0x60	*/
3202	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3203	pushfq					/* | rfl  |	0x50	*/
3204	pushq	%r11				/* | %r11 |	0x48	*/
3205	pushq	%r10				/* | %r10 |	0x40	*/
3206	pushq	%rbx				/* | %rbx |	0x38	*/
3207	pushq	%rax				/* | %rax |	0x30	*/
3208	pushq	%r9				/* | %r9  |	0x28	*/
3209	pushq	%r8				/* | %r8  |	0x20	*/
3210	pushq	%rcx				/* | %rcx |	0x18	*/
3211	pushq	%rdx				/* | %rdx |	0x10	*/
3212	pushq	%rsi				/* | %rsi |	0x8 alist */
3213	pushq	%rdi				/* | %rdi |	0x0 format */
3214
3215	movq	%rsp, %rbx			/* %rbx = current %rsp */
3216
3217	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3218	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3219	jmp	vpanic_common
3220
3221	SET_SIZE(dtrace_vpanic)
3222
3223#elif defined(__i386)
3224
3225	ENTRY_NP(vpanic)			/ Initial stack layout:
3226
3227	pushl	%ebp				/ | %eip | 20
3228	movl	%esp, %ebp			/ | %ebp | 16
3229	pushl	%eax				/ | %eax | 12
3230	pushl	%ebx				/ | %ebx |  8
3231	pushl	%ecx				/ | %ecx |  4
3232	pushl	%edx				/ | %edx |  0
3233
3234	movl	%esp, %ebx			/ %ebx = current stack pointer
3235
3236	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3237	pushl	%eax				/ push &panic_quiesce
3238	call	panic_trigger			/ %eax = panic_trigger()
3239	addl	$4, %esp			/ reset stack pointer
3240
3241vpanic_common:
3242	cmpl	$0, %eax			/ if (%eax == 0)
3243	je	0f				/   goto 0f;
3244
3245	/*
3246	 * If panic_trigger() was successful, we are the first to initiate a
3247	 * panic: we now switch to the reserved panic_stack before continuing.
3248	 */
3249	lea	panic_stack, %esp		/ %esp  = panic_stack
3250	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3251
32520:	subl	$REGSIZE, %esp			/ allocate struct regs
3253
3254	/*
3255	 * Now that we've got everything set up, store the register values as
3256	 * they were when we entered vpanic() to the designated location in
3257	 * the regs structure we allocated on the stack.
3258	 */
3259#if !defined(__GNUC_AS__)
3260	movw	%gs, %edx
3261	movl	%edx, REGOFF_GS(%esp)
3262	movw	%fs, %edx
3263	movl	%edx, REGOFF_FS(%esp)
3264	movw	%es, %edx
3265	movl	%edx, REGOFF_ES(%esp)
3266	movw	%ds, %edx
3267	movl	%edx, REGOFF_DS(%esp)
3268#else	/* __GNUC_AS__ */
3269	mov	%gs, %edx
3270	mov	%edx, REGOFF_GS(%esp)
3271	mov	%fs, %edx
3272	mov	%edx, REGOFF_FS(%esp)
3273	mov	%es, %edx
3274	mov	%edx, REGOFF_ES(%esp)
3275	mov	%ds, %edx
3276	mov	%edx, REGOFF_DS(%esp)
3277#endif	/* __GNUC_AS__ */
3278	movl	%edi, REGOFF_EDI(%esp)
3279	movl	%esi, REGOFF_ESI(%esp)
3280	movl	16(%ebx), %ecx
3281	movl	%ecx, REGOFF_EBP(%esp)
3282	movl	%ebx, %ecx
3283	addl	$20, %ecx
3284	movl	%ecx, REGOFF_ESP(%esp)
3285	movl	8(%ebx), %ecx
3286	movl	%ecx, REGOFF_EBX(%esp)
3287	movl	0(%ebx), %ecx
3288	movl	%ecx, REGOFF_EDX(%esp)
3289	movl	4(%ebx), %ecx
3290	movl	%ecx, REGOFF_ECX(%esp)
3291	movl	12(%ebx), %ecx
3292	movl	%ecx, REGOFF_EAX(%esp)
3293	movl	$0, REGOFF_TRAPNO(%esp)
3294	movl	$0, REGOFF_ERR(%esp)
3295	lea	vpanic, %ecx
3296	movl	%ecx, REGOFF_EIP(%esp)
3297#if !defined(__GNUC_AS__)
3298	movw	%cs, %edx
3299#else	/* __GNUC_AS__ */
3300	mov	%cs, %edx
3301#endif	/* __GNUC_AS__ */
3302	movl	%edx, REGOFF_CS(%esp)
3303	pushfl
3304	popl	%ecx
3305	movl	%ecx, REGOFF_EFL(%esp)
3306	movl	$0, REGOFF_UESP(%esp)
3307#if !defined(__GNUC_AS__)
3308	movw	%ss, %edx
3309#else	/* __GNUC_AS__ */
3310	mov	%ss, %edx
3311#endif	/* __GNUC_AS__ */
3312	movl	%edx, REGOFF_SS(%esp)
3313
3314	movl	%esp, %ecx			/ %ecx = &regs
3315	pushl	%eax				/ push on_panic_stack
3316	pushl	%ecx				/ push &regs
3317	movl	12(%ebp), %ecx			/ %ecx = alist
3318	pushl	%ecx				/ push alist
3319	movl	8(%ebp), %ecx			/ %ecx = format
3320	pushl	%ecx				/ push format
3321	call	panicsys			/ panicsys();
3322	addl	$16, %esp			/ pop arguments
3323
3324	addl	$REGSIZE, %esp
3325	popl	%edx
3326	popl	%ecx
3327	popl	%ebx
3328	popl	%eax
3329	leave
3330	ret
3331	SET_SIZE(vpanic)
3332
3333	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3334
3335	pushl	%ebp				/ | %eip | 20
3336	movl	%esp, %ebp			/ | %ebp | 16
3337	pushl	%eax				/ | %eax | 12
3338	pushl	%ebx				/ | %ebx |  8
3339	pushl	%ecx				/ | %ecx |  4
3340	pushl	%edx				/ | %edx |  0
3341
3342	movl	%esp, %ebx			/ %ebx = current stack pointer
3343
3344	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3345	pushl	%eax				/ push &panic_quiesce
3346	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3347	addl	$4, %esp			/ reset stack pointer
3348	jmp	vpanic_common			/ jump back to common code
3349
3350	SET_SIZE(dtrace_vpanic)
3351
3352#endif	/* __i386 */
3353#endif	/* __lint */
3354
3355#if defined(__lint)
3356
3357void
3358hres_tick(void)
3359{}
3360
3361int64_t timedelta;
3362hrtime_t hres_last_tick;
3363timestruc_t hrestime;
3364int64_t hrestime_adj;
3365volatile int hres_lock;
3366uint_t nsec_scale;
3367hrtime_t hrtime_base;
3368
3369#else	/* __lint */
3370
3371	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3372	.NWORD	0, 0
3373
3374	DGDEF3(hrestime_adj, 8, 8)
3375	.long	0, 0
3376
3377	DGDEF3(hres_last_tick, 8, 8)
3378	.long	0, 0
3379
3380	DGDEF3(timedelta, 8, 8)
3381	.long	0, 0
3382
3383	DGDEF3(hres_lock, 4, 8)
3384	.long	0
3385
3386	/*
3387	 * initialized to a non zero value to make pc_gethrtime()
3388	 * work correctly even before clock is initialized
3389	 */
3390	DGDEF3(hrtime_base, 8, 8)
3391	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3392
3393	DGDEF3(adj_shift, 4, 4)
3394	.long	ADJ_SHIFT
3395
3396#if defined(__amd64)
3397
3398	ENTRY_NP(hres_tick)
3399	pushq	%rbp
3400	movq	%rsp, %rbp
3401
3402	/*
3403	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3404	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3405	 * At worst, performing this now instead of under CLOCK_LOCK may
3406	 * introduce some jitter in pc_gethrestime().
3407	 */
3408	call	*gethrtimef(%rip)
3409	movq	%rax, %r8
3410
3411	leaq	hres_lock(%rip), %rax
3412	movb	$-1, %dl
3413.CL1:
3414	xchgb	%dl, (%rax)
3415	testb	%dl, %dl
3416	jz	.CL3			/* got it */
3417.CL2:
3418	cmpb	$0, (%rax)		/* possible to get lock? */
3419	pause
3420	jne	.CL2
3421	jmp	.CL1			/* yes, try again */
3422.CL3:
3423	/*
3424	 * compute the interval since last time hres_tick was called
3425	 * and adjust hrtime_base and hrestime accordingly
3426	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3427	 * a timestruc_t (sec, nsec)
3428	 */
3429	leaq	hres_last_tick(%rip), %rax
3430	movq	%r8, %r11
3431	subq	(%rax), %r8
3432	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3433	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3434	/*
3435	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3436	 */
3437	movq	%r11, (%rax)
3438
3439	call	__adj_hrestime
3440
3441	/*
3442	 * release the hres_lock
3443	 */
3444	incl	hres_lock(%rip)
3445	leave
3446	ret
3447	SET_SIZE(hres_tick)
3448
3449#elif defined(__i386)
3450
3451	ENTRY_NP(hres_tick)
3452	pushl	%ebp
3453	movl	%esp, %ebp
3454	pushl	%esi
3455	pushl	%ebx
3456
3457	/*
3458	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3459	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3460	 * At worst, performing this now instead of under CLOCK_LOCK may
3461	 * introduce some jitter in pc_gethrestime().
3462	 */
3463	call	*gethrtimef
3464	movl	%eax, %ebx
3465	movl	%edx, %esi
3466
3467	movl	$hres_lock, %eax
3468	movl	$-1, %edx
3469.CL1:
3470	xchgb	%dl, (%eax)
3471	testb	%dl, %dl
3472	jz	.CL3			/ got it
3473.CL2:
3474	cmpb	$0, (%eax)		/ possible to get lock?
3475	pause
3476	jne	.CL2
3477	jmp	.CL1			/ yes, try again
3478.CL3:
3479	/*
3480	 * compute the interval since last time hres_tick was called
3481	 * and adjust hrtime_base and hrestime accordingly
3482	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3483	 * timestruc_t (sec, nsec)
3484	 */
3485
3486	lea	hres_last_tick, %eax
3487
3488	movl	%ebx, %edx
3489	movl	%esi, %ecx
3490
3491	subl 	(%eax), %edx
3492	sbbl 	4(%eax), %ecx
3493
3494	addl	%edx, hrtime_base	/ add interval to hrtime_base
3495	adcl	%ecx, hrtime_base+4
3496
3497	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3498
3499	/
3500	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3501	/
3502	movl	%ebx, (%eax)
3503	movl	%esi,  4(%eax)
3504
3505	/ get hrestime at this moment. used as base for pc_gethrestime
3506	/
3507	/ Apply adjustment, if any
3508	/
3509	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3510	/ (max_hres_adj)
3511	/
3512	/ void
3513	/ adj_hrestime()
3514	/ {
3515	/	long long adj;
3516	/
3517	/	if (hrestime_adj == 0)
3518	/		adj = 0;
3519	/	else if (hrestime_adj > 0) {
3520	/		if (hrestime_adj < HRES_ADJ)
3521	/			adj = hrestime_adj;
3522	/		else
3523	/			adj = HRES_ADJ;
3524	/	}
3525	/	else {
3526	/		if (hrestime_adj < -(HRES_ADJ))
3527	/			adj = -(HRES_ADJ);
3528	/		else
3529	/			adj = hrestime_adj;
3530	/	}
3531	/
3532	/	timedelta -= adj;
3533	/	hrestime_adj = timedelta;
3534	/	hrestime.tv_nsec += adj;
3535	/
3536	/	while (hrestime.tv_nsec >= NANOSEC) {
3537	/		one_sec++;
3538	/		hrestime.tv_sec++;
3539	/		hrestime.tv_nsec -= NANOSEC;
3540	/	}
3541	/ }
3542__adj_hrestime:
3543	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3544	movl	hrestime_adj+4, %edx
3545	andl	%esi, %esi
3546	jne	.CL4			/ no
3547	andl	%edx, %edx
3548	jne	.CL4			/ no
3549	subl	%ecx, %ecx		/ yes, adj = 0;
3550	subl	%edx, %edx
3551	jmp	.CL5
3552.CL4:
3553	subl	%ecx, %ecx
3554	subl	%eax, %eax
3555	subl	%esi, %ecx
3556	sbbl	%edx, %eax
3557	andl	%eax, %eax		/ if (hrestime_adj > 0)
3558	jge	.CL6
3559
3560	/ In the following comments, HRES_ADJ is used, while in the code
3561	/ max_hres_adj is used.
3562	/
3563	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3564	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3565	/ on the logical equivalence of:
3566	/
3567	/	!(hrestime_adj < HRES_ADJ)
3568	/
3569	/ and the two step sequence:
3570	/
3571	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3572	/
3573	/ which computes whether or not the least significant 32-bits
3574	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3575	/
3576	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3577	/
3578	/ which generates a carry whenever step 1 is true or the most
3579	/ significant long of the longlong hrestime_adj is non-zero.
3580
3581	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3582	subl	%esi, %ecx
3583	movl	%edx, %eax
3584	adcl	$-1, %eax
3585	jnc	.CL7
3586	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3587	subl	%edx, %edx
3588	jmp	.CL5
3589
3590	/ The following computation is similar to the one above.
3591	/
3592	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3593	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3594	/ on the logical equivalence of:
3595	/
3596	/	(hrestime_adj > -HRES_ADJ)
3597	/
3598	/ and the two step sequence:
3599	/
3600	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3601	/
3602	/ which means the least significant 32-bits of hrestime_adj is
3603	/ greater than -HRES_ADJ, followed by:
3604	/
3605	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3606	/
3607	/ which generates a carry only when step 1 is true and the most
3608	/ significant long of the longlong hrestime_adj is -1.
3609
3610.CL6:					/ hrestime_adj is negative
3611	movl	%esi, %ecx
3612	addl	max_hres_adj, %ecx
3613	movl	%edx, %eax
3614	adcl	$0, %eax
3615	jc	.CL7
3616	xor	%ecx, %ecx
3617	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3618	movl	$-1, %edx
3619	jmp	.CL5
3620.CL7:
3621	movl	%esi, %ecx		/ adj = hrestime_adj;
3622.CL5:
3623	movl	timedelta, %esi
3624	subl	%ecx, %esi
3625	movl	timedelta+4, %eax
3626	sbbl	%edx, %eax
3627	movl	%esi, timedelta
3628	movl	%eax, timedelta+4	/ timedelta -= adj;
3629	movl	%esi, hrestime_adj
3630	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3631	addl	hrestime+4, %ecx
3632
3633	movl	%ecx, %eax		/ eax = tv_nsec
36341:
3635	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3636	jb	.CL8			/ no
3637	incl	one_sec			/ yes,  one_sec++;
3638	incl	hrestime		/ hrestime.tv_sec++;
3639	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3640	jmp	1b			/ check for more seconds
3641
3642.CL8:
3643	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3644	incl	hres_lock		/ release the hres_lock
3645
3646	popl	%ebx
3647	popl	%esi
3648	leave
3649	ret
3650	SET_SIZE(hres_tick)
3651
3652#endif	/* __i386 */
3653#endif	/* __lint */
3654
3655/*
3656 * void prefetch_smap_w(void *)
3657 *
3658 * Prefetch ahead within a linear list of smap structures.
3659 * Not implemented for ia32.  Stub for compatibility.
3660 */
3661
3662#if defined(__lint)
3663
3664/*ARGSUSED*/
3665void prefetch_smap_w(void *smp)
3666{}
3667
3668#else	/* __lint */
3669
3670	ENTRY(prefetch_smap_w)
3671	rep;	ret	/* use 2 byte return instruction when branch target */
3672			/* AMD Software Optimization Guide - Section 6.2 */
3673	SET_SIZE(prefetch_smap_w)
3674
3675#endif	/* __lint */
3676
3677/*
3678 * prefetch_page_r(page_t *)
3679 * issue prefetch instructions for a page_t
3680 */
3681#if defined(__lint)
3682
3683/*ARGSUSED*/
3684void
3685prefetch_page_r(void *pp)
3686{}
3687
3688#else	/* __lint */
3689
3690	ENTRY(prefetch_page_r)
3691	rep;	ret	/* use 2 byte return instruction when branch target */
3692			/* AMD Software Optimization Guide - Section 6.2 */
3693	SET_SIZE(prefetch_page_r)
3694
3695#endif	/* __lint */
3696
3697#if defined(__lint)
3698
3699/*ARGSUSED*/
3700int
3701bcmp(const void *s1, const void *s2, size_t count)
3702{ return (0); }
3703
3704#else   /* __lint */
3705
3706#if defined(__amd64)
3707
3708	ENTRY(bcmp)
3709	pushq	%rbp
3710	movq	%rsp, %rbp
3711#ifdef DEBUG
3712	movq	kernelbase(%rip), %r11
3713	cmpq	%r11, %rdi
3714	jb	0f
3715	cmpq	%r11, %rsi
3716	jnb	1f
37170:	leaq	.bcmp_panic_msg(%rip), %rdi
3718	xorl	%eax, %eax
3719	call	panic
37201:
3721#endif	/* DEBUG */
3722	call	memcmp
3723	testl	%eax, %eax
3724	setne	%dl
3725	leave
3726	movzbl	%dl, %eax
3727	ret
3728	SET_SIZE(bcmp)
3729
3730#elif defined(__i386)
3731
3732#define	ARG_S1		8
3733#define	ARG_S2		12
3734#define	ARG_LENGTH	16
3735
3736	ENTRY(bcmp)
3737#ifdef DEBUG
3738	pushl   %ebp
3739	movl    %esp, %ebp
3740	movl    kernelbase, %eax
3741	cmpl    %eax, ARG_S1(%ebp)
3742	jb	0f
3743	cmpl    %eax, ARG_S2(%ebp)
3744	jnb	1f
37450:	pushl   $.bcmp_panic_msg
3746	call    panic
37471:	popl    %ebp
3748#endif	/* DEBUG */
3749
3750	pushl	%edi		/ save register variable
3751	movl	ARG_S1(%esp), %eax	/ %eax = address of string 1
3752	movl	ARG_S2(%esp), %ecx	/ %ecx = address of string 2
3753	cmpl	%eax, %ecx	/ if the same string
3754	je	.equal		/ goto .equal
3755	movl	ARG_LENGTH(%esp), %edi	/ %edi = length in bytes
3756	cmpl	$4, %edi	/ if %edi < 4
3757	jb	.byte_check	/ goto .byte_check
3758	.align	4
3759.word_loop:
3760	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3761	leal	-4(%edi), %edi	/ %edi -= 4
3762	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3763	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3764	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3765	leal	4(%eax), %eax	/ %eax += 4 (next word)
3766	cmpl	$4, %edi	/ if %edi >= 4
3767	jae	.word_loop	/ goto .word_loop
3768.byte_check:
3769	cmpl	$0, %edi	/ if %edi == 0
3770	je	.equal		/ goto .equal
3771	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3772.word_not_equal:
3773	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3774	.align	4
3775.byte_loop:
3776	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3777	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3778	jne	.not_equal	/ if not equal, goto .not_equal
3779	incl	%ecx		/ %ecx++ (next byte)
3780	incl	%eax		/ %eax++ (next byte)
3781	decl	%edi		/ %edi--
3782	jnz	.byte_loop	/ if not zero, goto .byte_loop
3783.equal:
3784	xorl	%eax, %eax	/ %eax = 0
3785	popl	%edi		/ restore register variable
3786	ret			/ return (NULL)
3787	.align	4
3788.not_equal:
3789	movl	$1, %eax	/ return 1
3790	popl	%edi		/ restore register variable
3791	ret			/ return (NULL)
3792	SET_SIZE(bcmp)
3793
3794#endif	/* __i386 */
3795
3796#ifdef DEBUG
3797	.text
3798.bcmp_panic_msg:
3799	.string "bcmp: arguments below kernelbase"
3800#endif	/* DEBUG */
3801
3802#endif	/* __lint */
3803