xref: /titanic_50/usr/src/uts/intel/ia32/ml/i86_subr.s (revision e8031f0a8ed0e45c6d8847c5e09424e66fd34a4b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#else	/* __lint */
63#include "assym.h"
64#endif	/* __lint */
65#include <sys/dditypes.h>
66
67/*
68 * on_fault()
69 * Catch lofault faults. Like setjmp except it returns one
70 * if code following causes uncorrectable fault. Turned off
71 * by calling no_fault().
72 */
73
74#if defined(__lint)
75
76/* ARGSUSED */
77int
78on_fault(label_t *ljb)
79{ return (0); }
80
81void
82no_fault(void)
83{}
84
85#else	/* __lint */
86
87#if defined(__amd64)
88
89	ENTRY(on_fault)
90	movq	%gs:CPU_THREAD, %rsi
91	leaq	catch_fault(%rip), %rdx
92	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
93	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
94	jmp	setjmp				/* let setjmp do the rest */
95
96catch_fault:
97	movq	%gs:CPU_THREAD, %rsi
98	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
99	xorl	%eax, %eax
100	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
101	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
102	jmp	longjmp				/* let longjmp do the rest */
103	SET_SIZE(on_fault)
104
105	ENTRY(no_fault)
106	movq	%gs:CPU_THREAD, %rsi
107	xorl	%eax, %eax
108	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
109	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
110	ret
111	SET_SIZE(no_fault)
112
113#elif defined(__i386)
114
115	ENTRY(on_fault)
116	movl	%gs:CPU_THREAD, %edx
117	movl	4(%esp), %eax			/* jumpbuf address */
118	leal	catch_fault, %ecx
119	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
120	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
121	jmp	setjmp				/* let setjmp do the rest */
122
123catch_fault:
124	movl	%gs:CPU_THREAD, %edx
125	xorl	%eax, %eax
126	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
127	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
128	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
129	pushl	%ecx
130	call	longjmp				/* let longjmp do the rest */
131	SET_SIZE(on_fault)
132
133	ENTRY(no_fault)
134	movl	%gs:CPU_THREAD, %edx
135	xorl	%eax, %eax
136	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
137	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
138	ret
139	SET_SIZE(no_fault)
140
141#endif	/* __i386 */
142#endif	/* __lint */
143
144/*
145 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
146 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
147 */
148
149#if defined(lint)
150
151void
152on_trap_trampoline(void)
153{}
154
155#else	/* __lint */
156
157#if defined(__amd64)
158
159	ENTRY(on_trap_trampoline)
160	movq	%gs:CPU_THREAD, %rsi
161	movq	T_ONTRAP(%rsi), %rdi
162	addq	$OT_JMPBUF, %rdi
163	jmp	longjmp
164	SET_SIZE(on_trap_trampoline)
165
166#elif defined(__i386)
167
168	ENTRY(on_trap_trampoline)
169	movl	%gs:CPU_THREAD, %eax
170	movl	T_ONTRAP(%eax), %eax
171	addl	$OT_JMPBUF, %eax
172	pushl	%eax
173	call	longjmp
174	SET_SIZE(on_trap_trampoline)
175
176#endif	/* __i386 */
177#endif	/* __lint */
178
179/*
180 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
181 * more information about the on_trap() mechanism.  If the on_trap_data is the
182 * same as the topmost stack element, we just modify that element.
183 */
184#if defined(lint)
185
186/*ARGSUSED*/
187int
188on_trap(on_trap_data_t *otp, uint_t prot)
189{ return (0); }
190
191#else	/* __lint */
192
193#if defined(__amd64)
194
195	ENTRY(on_trap)
196	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
197	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
198	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
199	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
200	xorl	%ecx, %ecx
201	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
202	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
203	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
204	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
205	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
206	je	0f				/*	don't modify t_ontrap */
207
208	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
209	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
210
2110:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
212	jmp	setjmp
213	SET_SIZE(on_trap)
214
215#elif defined(__i386)
216
217	ENTRY(on_trap)
218	movl	4(%esp), %eax			/* %eax = otp */
219	movl	8(%esp), %edx			/* %edx = prot */
220
221	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
222	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
223	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
224	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
225	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
226	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
227	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
228	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
229	cmpl	%eax, %ecx			/* if (otp == %ecx) */
230	je	0f				/*    don't modify t_ontrap */
231
232	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
233	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
234
2350:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
236	movl	%eax, 4(%esp)			/* put %eax back on the stack */
237	jmp	setjmp				/* let setjmp do the rest */
238	SET_SIZE(on_trap)
239
240#endif	/* __i386 */
241#endif	/* __lint */
242
243/*
244 * Setjmp and longjmp implement non-local gotos using state vectors
245 * type label_t.
246 */
247
248#if defined(__lint)
249
250/* ARGSUSED */
251int
252setjmp(label_t *lp)
253{ return (0); }
254
255/* ARGSUSED */
256void
257longjmp(label_t *lp)
258{}
259
260#else	/* __lint */
261
262#if LABEL_PC != 0
263#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
264#endif	/* LABEL_PC != 0 */
265
266#if defined(__amd64)
267
268	ENTRY(setjmp)
269	movq	%rsp, LABEL_SP(%rdi)
270	movq	%rbp, LABEL_RBP(%rdi)
271	movq	%rbx, LABEL_RBX(%rdi)
272	movq	%r12, LABEL_R12(%rdi)
273	movq	%r13, LABEL_R13(%rdi)
274	movq	%r14, LABEL_R14(%rdi)
275	movq	%r15, LABEL_R15(%rdi)
276	movq	(%rsp), %rdx		/* return address */
277	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
278	xorl	%eax, %eax		/* return 0 */
279	ret
280	SET_SIZE(setjmp)
281
282	ENTRY(longjmp)
283	movq	LABEL_SP(%rdi), %rsp
284	movq	LABEL_RBP(%rdi), %rbp
285	movq	LABEL_RBX(%rdi), %rbx
286	movq	LABEL_R12(%rdi), %r12
287	movq	LABEL_R13(%rdi), %r13
288	movq	LABEL_R14(%rdi), %r14
289	movq	LABEL_R15(%rdi), %r15
290	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
291	movq	%rdx, (%rsp)
292	xorl	%eax, %eax
293	incl	%eax			/* return 1 */
294	ret
295	SET_SIZE(longjmp)
296
297#elif defined(__i386)
298
299	ENTRY(setjmp)
300	movl	4(%esp), %edx		/* address of save area */
301	movl	%ebp, LABEL_EBP(%edx)
302	movl	%ebx, LABEL_EBX(%edx)
303	movl	%esi, LABEL_ESI(%edx)
304	movl	%edi, LABEL_EDI(%edx)
305	movl	%esp, 4(%edx)
306	movl	(%esp), %ecx		/* %eip (return address) */
307	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
308	subl	%eax, %eax		/* return 0 */
309	ret
310	SET_SIZE(setjmp)
311
312	ENTRY(longjmp)
313	movl	4(%esp), %edx		/* address of save area */
314	movl	LABEL_EBP(%edx), %ebp
315	movl	LABEL_EBX(%edx), %ebx
316	movl	LABEL_ESI(%edx), %esi
317	movl	LABEL_EDI(%edx), %edi
318	movl	4(%edx), %esp
319	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
320	movl	$1, %eax
321	addl	$4, %esp		/* pop ret adr */
322	jmp	*%ecx			/* indirect */
323	SET_SIZE(longjmp)
324
325#endif	/* __i386 */
326#endif	/* __lint */
327
328/*
329 * if a() calls b() calls caller(),
330 * caller() returns return address in a().
331 * (Note: We assume a() and b() are C routines which do the normal entry/exit
332 *  sequence.)
333 */
334
335#if defined(__lint)
336
337caddr_t
338caller(void)
339{ return (0); }
340
341#else	/* __lint */
342
343#if defined(__amd64)
344
345	ENTRY(caller)
346	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
347	ret
348	SET_SIZE(caller)
349
350#elif defined(__i386)
351
352	ENTRY(caller)
353	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
354	ret
355	SET_SIZE(caller)
356
357#endif	/* __i386 */
358#endif	/* __lint */
359
360/*
361 * if a() calls callee(), callee() returns the
362 * return address in a();
363 */
364
365#if defined(__lint)
366
367caddr_t
368callee(void)
369{ return (0); }
370
371#else	/* __lint */
372
373#if defined(__amd64)
374
375	ENTRY(callee)
376	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
377	ret
378	SET_SIZE(callee)
379
380#elif defined(__i386)
381
382	ENTRY(callee)
383	movl	(%esp), %eax		/* callee()'s return pc, in a() */
384	ret
385	SET_SIZE(callee)
386
387#endif	/* __i386 */
388#endif	/* __lint */
389
390/*
391 * return the current frame pointer
392 */
393
394#if defined(__lint)
395
396greg_t
397getfp(void)
398{ return (0); }
399
400#else	/* __lint */
401
402#if defined(__amd64)
403
404	ENTRY(getfp)
405	movq	%rbp, %rax
406	ret
407	SET_SIZE(getfp)
408
409#elif defined(__i386)
410
411	ENTRY(getfp)
412	movl	%ebp, %eax
413	ret
414	SET_SIZE(getfp)
415
416#endif	/* __i386 */
417#endif	/* __lint */
418
419/*
420 * Invalidate a single page table entry in the TLB
421 */
422
423#if defined(__lint)
424
425/* ARGSUSED */
426void
427mmu_tlbflush_entry(caddr_t m)
428{}
429
430#else	/* __lint */
431
432#if defined(__amd64)
433
434	ENTRY(mmu_tlbflush_entry)
435	invlpg	(%rdi)
436	ret
437	SET_SIZE(mmu_tlbflush_entry)
438
439#elif defined(__i386)
440
441	ENTRY(mmu_tlbflush_entry)
442	movl	4(%esp), %eax
443	invlpg	(%eax)
444	ret
445	SET_SIZE(mmu_tlbflush_entry)
446
447#endif	/* __i386 */
448#endif	/* __lint */
449
450
451/*
452 * Get/Set the value of various control registers
453 */
454
455#if defined(__lint)
456
457ulong_t
458getcr0(void)
459{ return (0); }
460
461/* ARGSUSED */
462void
463setcr0(ulong_t value)
464{}
465
466ulong_t
467getcr2(void)
468{ return (0); }
469
470ulong_t
471getcr3(void)
472{ return (0); }
473
474/* ARGSUSED */
475void
476setcr3(ulong_t val)
477{}
478
479void
480reload_cr3(void)
481{}
482
483ulong_t
484getcr4(void)
485{ return (0); }
486
487/* ARGSUSED */
488void
489setcr4(ulong_t val)
490{}
491
492#if defined(__amd64)
493
494ulong_t
495getcr8(void)
496{ return (0); }
497
498/* ARGSUSED */
499void
500setcr8(ulong_t val)
501{}
502
503#endif	/* __amd64 */
504
505#else	/* __lint */
506
507#if defined(__amd64)
508
509	ENTRY(getcr0)
510	movq	%cr0, %rax
511	ret
512	SET_SIZE(getcr0)
513
514	ENTRY(setcr0)
515	movq	%rdi, %cr0
516	ret
517	SET_SIZE(setcr0)
518
519	ENTRY(getcr2)
520	movq	%cr2, %rax
521	ret
522	SET_SIZE(getcr2)
523
524	ENTRY(getcr3)
525	movq	%cr3, %rax
526	ret
527	SET_SIZE(getcr3)
528
529	ENTRY(setcr3)
530	movq	%rdi, %cr3
531	ret
532	SET_SIZE(setcr3)
533
534	ENTRY(reload_cr3)
535	movq	%cr3, %rdi
536	movq	%rdi, %cr3
537	ret
538	SET_SIZE(reload_cr3)
539
540	ENTRY(getcr4)
541	movq	%cr4, %rax
542	ret
543	SET_SIZE(getcr4)
544
545	ENTRY(setcr4)
546	movq	%rdi, %cr4
547	ret
548	SET_SIZE(setcr4)
549
550	ENTRY(getcr8)
551	movq	%cr8, %rax
552	ret
553	SET_SIZE(getcr8)
554
555	ENTRY(setcr8)
556	movq	%rdi, %cr8
557	ret
558	SET_SIZE(setcr8)
559
560#elif defined(__i386)
561
562        ENTRY(getcr0)
563        movl    %cr0, %eax
564        ret
565	SET_SIZE(getcr0)
566
567        ENTRY(setcr0)
568        movl    4(%esp), %eax
569        movl    %eax, %cr0
570        ret
571	SET_SIZE(setcr0)
572
573        ENTRY(getcr2)
574        movl    %cr2, %eax
575        ret
576	SET_SIZE(getcr2)
577
578	ENTRY(getcr3)
579	movl    %cr3, %eax
580	ret
581	SET_SIZE(getcr3)
582
583        ENTRY(setcr3)
584        movl    4(%esp), %eax
585        movl    %eax, %cr3
586        ret
587	SET_SIZE(setcr3)
588
589	ENTRY(reload_cr3)
590	movl    %cr3, %eax
591	movl    %eax, %cr3
592	ret
593	SET_SIZE(reload_cr3)
594
595	ENTRY(getcr4)
596	movl    %cr4, %eax
597	ret
598	SET_SIZE(getcr4)
599
600        ENTRY(setcr4)
601        movl    4(%esp), %eax
602        movl    %eax, %cr4
603        ret
604	SET_SIZE(setcr4)
605
606#endif	/* __i386 */
607#endif	/* __lint */
608
609#if defined(__lint)
610
611/*ARGSUSED*/
612uint32_t
613__cpuid_insn(uint32_t eax, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp)
614{ return (0); }
615
616#else	/* __lint */
617
618#if defined(__amd64)
619
620	ENTRY(__cpuid_insn)
621	movq	%rbx, %r11
622	movq	%rdx, %r8	/* r8 = ecxp */
623	movq	%rcx, %r9	/* r9 = edxp */
624	movl	%edi, %eax
625	cpuid
626	movl	%ebx, (%rsi)
627	movl	%ecx, (%r8)
628	movl	%edx, (%r9)
629	movq	%r11, %rbx
630	ret
631	SET_SIZE(__cpuid_insn)
632
633#elif defined(__i386)
634
635        ENTRY(__cpuid_insn)
636	pushl	%ebp
637	movl	%esp, %ebp
638	pushl	%ebx
639	movl	8(%ebp), %eax
640	cpuid
641	pushl	%eax
642	movl	0x0c(%ebp), %eax
643	movl	%ebx, (%eax)
644	movl	0x10(%ebp), %eax
645	movl	%ecx, (%eax)
646	movl	0x14(%ebp), %eax
647	movl	%edx, (%eax)
648	popl	%eax
649	popl	%ebx
650	popl	%ebp
651	ret
652	SET_SIZE(__cpuid_insn)
653
654#endif	/* __i386 */
655#endif	/* __lint */
656
657/*
658 * Insert entryp after predp in a doubly linked list.
659 */
660
661#if defined(__lint)
662
663/*ARGSUSED*/
664void
665_insque(caddr_t entryp, caddr_t predp)
666{}
667
668#else	/* __lint */
669
670#if defined(__amd64)
671
672	ENTRY(_insque)
673	movq	(%rsi), %rax		/* predp->forw 			*/
674	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
675	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
676	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
677	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
678	ret
679	SET_SIZE(_insque)
680
681#elif defined(__i386)
682
683	ENTRY(_insque)
684	movl	8(%esp), %edx
685	movl	4(%esp), %ecx
686	movl	(%edx), %eax		/* predp->forw			*/
687	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
688	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
689	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
690	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
691	ret
692	SET_SIZE(_insque)
693
694#endif	/* __i386 */
695#endif	/* __lint */
696
697/*
698 * Remove entryp from a doubly linked list
699 */
700
701#if defined(__lint)
702
703/*ARGSUSED*/
704void
705_remque(caddr_t entryp)
706{}
707
708#else	/* __lint */
709
710#if defined(__amd64)
711
712	ENTRY(_remque)
713	movq	(%rdi), %rax		/* entry->forw */
714	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
715	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
716	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
717	ret
718	SET_SIZE(_remque)
719
720#elif defined(__i386)
721
722	ENTRY(_remque)
723	movl	4(%esp), %ecx
724	movl	(%ecx), %eax		/* entry->forw */
725	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
726	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
727	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
728	ret
729	SET_SIZE(_remque)
730
731#endif	/* __i386 */
732#endif	/* __lint */
733
734/*
735 * Returns the number of
736 * non-NULL bytes in string argument.
737 */
738
739#if defined(__lint)
740
741/* ARGSUSED */
742size_t
743strlen(const char *str)
744{ return (0); }
745
746#else	/* __lint */
747
748#if defined(__amd64)
749
750/*
751 * This is close to a simple transliteration of a C version of this
752 * routine.  We should either just -make- this be a C version, or
753 * justify having it in assembler by making it significantly faster.
754 *
755 * size_t
756 * strlen(const char *s)
757 * {
758 *	const char *s0;
759 * #if defined(DEBUG)
760 *	if ((uintptr_t)s < KERNELBASE)
761 *		panic(.str_panic_msg);
762 * #endif
763 *	for (s0 = s; *s; s++)
764 *		;
765 *	return (s - s0);
766 * }
767 */
768
769	ENTRY(strlen)
770#ifdef DEBUG
771	movq	kernelbase(%rip), %rax
772	cmpq	%rax, %rdi
773	jae	str_valid
774	pushq	%rbp
775	movq	%rsp, %rbp
776	leaq	.str_panic_msg(%rip), %rdi
777	xorl	%eax, %eax
778	call	panic
779#endif	/* DEBUG */
780str_valid:
781	cmpb	$0, (%rdi)
782	movq	%rdi, %rax
783	je	.null_found
784	.align	4
785.strlen_loop:
786	incq	%rdi
787	cmpb	$0, (%rdi)
788	jne	.strlen_loop
789.null_found:
790	subq	%rax, %rdi
791	movq	%rdi, %rax
792	ret
793	SET_SIZE(strlen)
794
795#elif defined(__i386)
796
797	ENTRY(strlen)
798#ifdef DEBUG
799	movl	kernelbase, %eax
800	cmpl	%eax, 4(%esp)
801	jae	str_valid
802	pushl	%ebp
803	movl	%esp, %ebp
804	pushl	$.str_panic_msg
805	call	panic
806#endif /* DEBUG */
807
808str_valid:
809	movl	4(%esp), %eax		/* %eax = string address */
810	testl	$3, %eax		/* if %eax not word aligned */
811	jnz	.not_word_aligned	/* goto .not_word_aligned */
812	.align	4
813.word_aligned:
814	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
815	movl	$0x7f7f7f7f, %ecx
816	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
817	addl	$4, %eax		/* next word */
818	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
819	orl	%edx, %ecx		/* %ecx |= %edx */
820	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
821	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
822	je	.word_aligned		/* goto .word_aligned */
823	subl	$4, %eax		/* post-incremented */
824.not_word_aligned:
825	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
826	je	.null_found		/* goto .null_found */
827	incl	%eax			/* next byte */
828	testl	$3, %eax		/* if %eax not word aligned */
829	jnz	.not_word_aligned	/* goto .not_word_aligned */
830	jmp	.word_aligned		/* goto .word_aligned */
831	.align	4
832.null_found:
833	subl	4(%esp), %eax		/* %eax -= string address */
834	ret
835	SET_SIZE(strlen)
836
837#endif	/* __i386 */
838
839#ifdef DEBUG
840	.text
841.str_panic_msg:
842	.string "strlen: argument below kernelbase"
843#endif /* DEBUG */
844
845#endif	/* __lint */
846
847	/*
848	 * Berkley 4.3 introduced symbolically named interrupt levels
849	 * as a way deal with priority in a machine independent fashion.
850	 * Numbered priorities are machine specific, and should be
851	 * discouraged where possible.
852	 *
853	 * Note, for the machine specific priorities there are
854	 * examples listed for devices that use a particular priority.
855	 * It should not be construed that all devices of that
856	 * type should be at that priority.  It is currently were
857	 * the current devices fit into the priority scheme based
858	 * upon time criticalness.
859	 *
860	 * The underlying assumption of these assignments is that
861	 * IPL 10 is the highest level from which a device
862	 * routine can call wakeup.  Devices that interrupt from higher
863	 * levels are restricted in what they can do.  If they need
864	 * kernels services they should schedule a routine at a lower
865	 * level (via software interrupt) to do the required
866	 * processing.
867	 *
868	 * Examples of this higher usage:
869	 *	Level	Usage
870	 *	14	Profiling clock (and PROM uart polling clock)
871	 *	12	Serial ports
872	 *
873	 * The serial ports request lower level processing on level 6.
874	 *
875	 * Also, almost all splN routines (where N is a number or a
876	 * mnemonic) will do a RAISE(), on the assumption that they are
877	 * never used to lower our priority.
878	 * The exceptions are:
879	 *	spl8()		Because you can't be above 15 to begin with!
880	 *	splzs()		Because this is used at boot time to lower our
881	 *			priority, to allow the PROM to poll the uart.
882	 *	spl0()		Used to lower priority to 0.
883	 */
884
885#if defined(__lint)
886
887int spl0(void)		{ return (0); }
888int spl6(void)		{ return (0); }
889int spl7(void)		{ return (0); }
890int spl8(void)		{ return (0); }
891int splhigh(void)	{ return (0); }
892int splhi(void)		{ return (0); }
893int splzs(void)		{ return (0); }
894
895#else	/* __lint */
896
897/* reg = cpu->cpu_m.cpu_pri; */
898#define	GETIPL_NOGS(reg, cpup)	\
899	movl	CPU_PRI(cpup), reg;
900
901/* cpu->cpu_m.cpu_pri; */
902#define	SETIPL_NOGS(val, cpup)	\
903	movl	val, CPU_PRI(cpup);
904
905/* reg = cpu->cpu_m.cpu_pri; */
906#define	GETIPL(reg)	\
907	movl	%gs:CPU_PRI, reg;
908
909/* cpu->cpu_m.cpu_pri; */
910#define	SETIPL(val)	\
911	movl	val, %gs:CPU_PRI;
912
913/*
914 * Macro to raise processor priority level.
915 * Avoid dropping processor priority if already at high level.
916 * Also avoid going below CPU->cpu_base_spl, which could've just been set by
917 * a higher-level interrupt thread that just blocked.
918 */
919#if defined(__amd64)
920
921#define	RAISE(level) \
922	cli;			\
923	LOADCPU(%rcx);		\
924	movl	$/**/level, %edi;\
925	GETIPL_NOGS(%eax, %rcx);\
926	cmpl 	%eax, %edi;	\
927	jg	spl;		\
928	jmp	setsplhisti
929
930#elif defined(__i386)
931
932#define	RAISE(level) \
933	cli;			\
934	LOADCPU(%ecx);		\
935	movl	$/**/level, %edx;\
936	GETIPL_NOGS(%eax, %ecx);\
937	cmpl 	%eax, %edx;	\
938	jg	spl;		\
939	jmp	setsplhisti
940
941#endif	/* __i386 */
942
943/*
944 * Macro to set the priority to a specified level.
945 * Avoid dropping the priority below CPU->cpu_base_spl.
946 */
947#if defined(__amd64)
948
949#define	SETPRI(level) \
950	cli;				\
951	LOADCPU(%rcx);			\
952	movl	$/**/level, %edi;	\
953	jmp	spl
954
955#elif defined(__i386)
956
957#define SETPRI(level) \
958	cli;				\
959	LOADCPU(%ecx);			\
960	movl	$/**/level, %edx;	\
961	jmp	spl
962
963#endif	/* __i386 */
964
965	/* locks out all interrupts, including memory errors */
966	ENTRY(spl8)
967	SETPRI(15)
968	SET_SIZE(spl8)
969
970	/* just below the level that profiling runs */
971	ENTRY(spl7)
972	RAISE(13)
973	SET_SIZE(spl7)
974
975	/* sun specific - highest priority onboard serial i/o asy ports */
976	ENTRY(splzs)
977	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
978	SET_SIZE(splzs)
979
980	/*
981	 * should lock out clocks and all interrupts,
982	 * as you can see, there are exceptions
983	 */
984
985#if defined(__amd64)
986
987	.align	16
988	ENTRY(splhi)
989	ALTENTRY(splhigh)
990	ALTENTRY(spl6)
991	ALTENTRY(i_ddi_splhigh)
992	cli
993	LOADCPU(%rcx)
994	movl	$DISP_LEVEL, %edi
995	movl	CPU_PRI(%rcx), %eax
996	cmpl	%eax, %edi
997	jle	setsplhisti
998	SETIPL_NOGS(%edi, %rcx)
999	/*
1000	 * If we aren't using cr8 to control ipl then we patch this
1001	 * with a jump to slow_setsplhi
1002	 */
1003	ALTENTRY(setsplhi_patch)
1004	movq	CPU_PRI_DATA(%rcx), %r11 /* get pri data ptr */
1005	movzb	(%r11, %rdi, 1), %rdx	/* get apic mask for this ipl */
1006	movq	%rdx, %cr8		/* set new apic priority */
1007	/*
1008	 * enable interrupts
1009	 */
1010setsplhisti:
1011	nop	/* patch this to a sti when a proper setspl routine appears */
1012	ret
1013
1014	ALTENTRY(slow_setsplhi)
1015	pushq	%rbp
1016	movq	%rsp, %rbp
1017	subq	$16, %rsp
1018	movl	%eax, -4(%rbp)		/* save old ipl */
1019	call	*setspl(%rip)
1020	movl	-4(%rbp), %eax		/* return old ipl */
1021	leave
1022	jmp	setsplhisti
1023
1024	SET_SIZE(i_ddi_splhigh)
1025	SET_SIZE(spl6)
1026	SET_SIZE(splhigh)
1027	SET_SIZE(splhi)
1028
1029#elif defined(__i386)
1030
1031	.align	16
1032	ENTRY(splhi)
1033	ALTENTRY(splhigh)
1034	ALTENTRY(spl6)
1035	ALTENTRY(i_ddi_splhigh)
1036	cli
1037	LOADCPU(%ecx)
1038	movl	$DISP_LEVEL, %edx
1039	movl	CPU_PRI(%ecx), %eax
1040	cmpl	%eax, %edx
1041	jle	setsplhisti
1042	SETIPL_NOGS(%edx, %ecx)		/* set new ipl */
1043
1044	pushl   %eax                    /* save old ipl */
1045	pushl	%edx			/* pass new ipl */
1046	call	*setspl
1047	popl	%ecx			/* dummy pop */
1048	popl    %eax                    /* return old ipl */
1049	/*
1050	 * enable interrupts
1051	 *
1052	 * (we patch this to an sti once a proper setspl routine
1053	 * is installed)
1054	 */
1055setsplhisti:
1056	nop	/* patch this to a sti when a proper setspl routine appears */
1057	ret
1058	SET_SIZE(i_ddi_splhigh)
1059	SET_SIZE(spl6)
1060	SET_SIZE(splhigh)
1061	SET_SIZE(splhi)
1062
1063#endif	/* __i386 */
1064
1065	/* allow all interrupts */
1066	ENTRY(spl0)
1067	SETPRI(0)
1068	SET_SIZE(spl0)
1069
1070#endif	/* __lint */
1071
1072/*
1073 * splr is like splx but will only raise the priority and never drop it
1074 */
1075#if defined(__lint)
1076
1077/* ARGSUSED */
1078int
1079splr(int level)
1080{ return (0); }
1081
1082#else	/* __lint */
1083
1084#if defined(__amd64)
1085
1086	ENTRY(splr)
1087	cli
1088	LOADCPU(%rcx)
1089	GETIPL_NOGS(%eax, %rcx)
1090	cmpl	%eax, %edi		/* if new level > current level */
1091	jg	spl			/* then set ipl to new level */
1092splr_setsti:
1093	nop	/* patch this to a sti when a proper setspl routine appears */
1094	ret				/* else return the current level */
1095	SET_SIZE(splr)
1096
1097#elif defined(__i386)
1098
1099	ENTRY(splr)
1100	cli
1101	LOADCPU(%ecx)
1102	movl	4(%esp), %edx		/* get new spl level */
1103	GETIPL_NOGS(%eax, %ecx)
1104	cmpl 	%eax, %edx		/* if new level > current level */
1105	jg	spl			/* then set ipl to new level */
1106splr_setsti:
1107	nop	/* patch this to a sti when a proper setspl routine appears */
1108	ret				/* else return the current level */
1109	SET_SIZE(splr)
1110
1111#endif	/* __i386 */
1112#endif	/* __lint */
1113
1114
1115
1116/*
1117 * splx - set PIL back to that indicated by the level passed as an argument,
1118 * or to the CPU's base priority, whichever is higher.
1119 * Needs to be fall through to spl to save cycles.
1120 * Algorithm for spl:
1121 *
1122 *      turn off interrupts
1123 *
1124 *	if (CPU->cpu_base_spl > newipl)
1125 *		newipl = CPU->cpu_base_spl;
1126 *      oldipl = CPU->cpu_pridata->c_ipl;
1127 *      CPU->cpu_pridata->c_ipl = newipl;
1128 *
1129 *	/indirectly call function to set spl values (usually setpicmasks)
1130 *      setspl();  // load new masks into pics
1131 *
1132 * Be careful not to set priority lower than CPU->cpu_base_pri,
1133 * even though it seems we're raising the priority, it could be set
1134 * higher at any time by an interrupt routine, so we must block interrupts
1135 * and look at CPU->cpu_base_pri
1136 */
1137#if defined(__lint)
1138
1139/* ARGSUSED */
1140void
1141splx(int level)
1142{}
1143
1144#else	/* __lint */
1145
1146#if defined(__amd64)
1147
1148	ENTRY(splx)
1149	ALTENTRY(i_ddi_splx)
1150	cli				/* disable interrupts */
1151	LOADCPU(%rcx)
1152	/*FALLTHRU*/
1153	.align	4
1154spl:
1155	/*
1156	 * New priority level is in %edi, cpu struct pointer is in %rcx
1157	 */
1158	GETIPL_NOGS(%eax, %rcx)		/* get current ipl */
1159	cmpl   %edi, CPU_BASE_SPL(%rcx) /* if (base spl > new ipl) */
1160	ja     set_to_base_spl		/* then use base_spl */
1161
1162setprilev:
1163	SETIPL_NOGS(%edi, %rcx)		/* set new ipl */
1164	/*
1165	 * If we aren't using cr8 to control ipl then we patch this
1166	 * with a jump to slow_spl
1167	 */
1168	ALTENTRY(spl_patch)
1169	movq	CPU_PRI_DATA(%rcx), %r11 /* get pri data ptr */
1170	movzb	(%r11, %rdi, 1), %rdx	/* get apic mask for this ipl */
1171	movq	%rdx, %cr8		/* set new apic priority */
1172	xorl	%edx, %edx
1173	bsrl	CPU_SOFTINFO(%rcx), %edx /* fls(cpu->cpu_softinfo.st_pending) */
1174	cmpl	%edi, %edx		/* new ipl vs. st_pending */
1175	jle	setsplsti
1176
1177	pushq	%rbp
1178	movq	%rsp, %rbp
1179	/* stack now 16-byte aligned */
1180	pushq	%rax			/* save old spl */
1181	pushq	%rdi			/* save new ipl too */
1182	jmp	fakesoftint
1183
1184setsplsti:
1185	nop	/* patch this to a sti when a proper setspl routine appears */
1186	ret
1187
1188	ALTENTRY(slow_spl)
1189	pushq	%rbp
1190	movq	%rsp, %rbp
1191	/* stack now 16-byte aligned */
1192
1193	pushq	%rax			/* save old spl */
1194	pushq	%rdi			/* save new ipl too */
1195
1196	call	*setspl(%rip)
1197
1198	LOADCPU(%rcx)
1199	movl	CPU_SOFTINFO(%rcx), %eax
1200	orl	%eax, %eax
1201	jz	slow_setsplsti
1202
1203	bsrl	%eax, %edx		/* fls(cpu->cpu_softinfo.st_pending) */
1204	cmpl	0(%rsp), %edx		/* new ipl vs. st_pending */
1205	jg	fakesoftint
1206
1207	ALTENTRY(fakesoftint_return)
1208	/*
1209	 * enable interrupts
1210	 */
1211slow_setsplsti:
1212	nop	/* patch this to a sti when a proper setspl routine appears */
1213	popq	%rdi
1214	popq	%rax			/* return old ipl */
1215	leave
1216	ret
1217	SET_SIZE(fakesoftint_return)
1218
1219set_to_base_spl:
1220	movl	CPU_BASE_SPL(%rcx), %edi
1221	jmp	setprilev
1222	SET_SIZE(spl)
1223	SET_SIZE(i_ddi_splx)
1224	SET_SIZE(splx)
1225
1226#elif defined(__i386)
1227
1228	ENTRY(splx)
1229	ALTENTRY(i_ddi_splx)
1230	cli                             /* disable interrupts */
1231	LOADCPU(%ecx)
1232	movl	4(%esp), %edx		/* get new spl level */
1233	/*FALLTHRU*/
1234
1235	.align	4
1236	ALTENTRY(spl)
1237	/*
1238	 * New priority level is in %edx
1239	 * (doing this early to avoid an AGI in the next instruction)
1240	 */
1241	GETIPL_NOGS(%eax, %ecx)		/* get current ipl */
1242	cmpl	%edx, CPU_BASE_SPL(%ecx) /* if ( base spl > new ipl) */
1243	ja	set_to_base_spl		/* then use base_spl */
1244
1245setprilev:
1246	SETIPL_NOGS(%edx, %ecx)		/* set new ipl */
1247
1248	pushl   %eax                    /* save old ipl */
1249	pushl	%edx			/* pass new ipl */
1250	call	*setspl
1251
1252	LOADCPU(%ecx)
1253	movl	CPU_SOFTINFO(%ecx), %eax
1254	orl	%eax, %eax
1255	jz	setsplsti
1256
1257	/*
1258	 * Before dashing off, check that setsplsti has been patched.
1259	 */
1260	cmpl	$NOP_INSTR, setsplsti
1261	je	setsplsti
1262
1263	bsrl	%eax, %edx
1264	cmpl	0(%esp), %edx
1265	jg	fakesoftint
1266
1267	ALTENTRY(fakesoftint_return)
1268	/*
1269	 * enable interrupts
1270	 */
1271setsplsti:
1272	nop	/* patch this to a sti when a proper setspl routine appears */
1273	popl	%eax
1274	popl    %eax			/ return old ipl
1275	ret
1276	SET_SIZE(fakesoftint_return)
1277
1278set_to_base_spl:
1279	movl	CPU_BASE_SPL(%ecx), %edx
1280	jmp	setprilev
1281	SET_SIZE(spl)
1282	SET_SIZE(i_ddi_splx)
1283	SET_SIZE(splx)
1284
1285#endif	/* __i386 */
1286#endif	/* __lint */
1287
1288#if defined(__lint)
1289
1290void
1291install_spl(void)
1292{}
1293
1294#else	/* __lint */
1295
1296#if defined(__amd64)
1297
1298	ENTRY_NP(install_spl)
1299	movq	%cr0, %rax
1300	movq	%rax, %rdx
1301	movl	$_BITNOT(CR0_WP), %ecx
1302	movslq	%ecx, %rcx
1303	andq	%rcx, %rax		/* we don't want to take a fault */
1304	movq	%rax, %cr0
1305	jmp	1f
13061:	movb	$STI_INSTR, setsplsti(%rip)
1307	movb	$STI_INSTR, slow_setsplsti(%rip)
1308	movb	$STI_INSTR, setsplhisti(%rip)
1309	movb	$STI_INSTR, splr_setsti(%rip)
1310	testl	$1, intpri_use_cr8(%rip)	/* are using %cr8 ? */
1311	jz	2f				/* no, go patch more */
1312	movq	%rdx, %cr0
1313	ret
13142:
1315	/*
1316	 * Patch spl functions to use slow spl method
1317	 */
1318	leaq	setsplhi_patch(%rip), %rdi	/* get patch point addr */
1319	leaq	slow_setsplhi(%rip), %rax	/* jmp target */
1320	subq	%rdi, %rax			/* calculate jmp distance */
1321	subq	$2, %rax			/* minus size of jmp instr */
1322	shlq	$8, %rax			/* construct jmp instr */
1323	addq	$JMP_INSTR, %rax
1324	movw	%ax, setsplhi_patch(%rip)	/* patch in the jmp */
1325	leaq	spl_patch(%rip), %rdi		/* get patch point addr */
1326	leaq	slow_spl(%rip), %rax		/* jmp target */
1327	subq	%rdi, %rax			/* calculate jmp distance */
1328	subq	$2, %rax			/* minus size of jmp instr */
1329	shlq	$8, %rax			/* construct jmp instr */
1330	addq	$JMP_INSTR, %rax
1331	movw	%ax, spl_patch(%rip)		/* patch in the jmp */
1332	/*
1333	 * Ensure %cr8 is zero since we aren't using it
1334	 */
1335	xorl	%eax, %eax
1336	movq	%rax, %cr8
1337	movq	%rdx, %cr0
1338	ret
1339	SET_SIZE(install_spl)
1340
1341#elif defined(__i386)
1342
1343	ENTRY_NP(install_spl)
1344	movl	%cr0, %eax
1345	movl	%eax, %edx
1346	andl	$_BITNOT(CR0_WP), %eax	/* we don't want to take a fault */
1347	movl	%eax, %cr0
1348	jmp	1f
13491:	movb	$STI_INSTR, setsplsti
1350	movb	$STI_INSTR, setsplhisti
1351	movb	$STI_INSTR, splr_setsti
1352	movl	%edx, %cr0
1353	ret
1354	SET_SIZE(install_spl)
1355
1356#endif	/* __i386 */
1357#endif	/* __lint */
1358
1359
1360/*
1361 * Get current processor interrupt level
1362 */
1363
1364#if defined(__lint)
1365
1366int
1367getpil(void)
1368{ return (0); }
1369
1370#else	/* __lint */
1371
1372#if defined(__amd64)
1373
1374	ENTRY(getpil)
1375	GETIPL(%eax)			/* priority level into %eax */
1376	ret
1377	SET_SIZE(getpil)
1378
1379#elif defined(__i386)
1380
1381	ENTRY(getpil)
1382	GETIPL(%eax)			/* priority level into %eax */
1383	ret
1384	SET_SIZE(getpil)
1385
1386#endif	/* __i386 */
1387#endif	/* __lint */
1388
1389#if defined(__i386)
1390
1391/*
1392 * Read and write the %gs register
1393 */
1394
1395#if defined(__lint)
1396
1397/*ARGSUSED*/
1398uint16_t
1399getgs(void)
1400{ return (0); }
1401
1402/*ARGSUSED*/
1403void
1404setgs(uint16_t sel)
1405{}
1406
1407#else	/* __lint */
1408
1409	ENTRY(getgs)
1410	clr	%eax
1411	movw	%gs, %ax
1412	ret
1413	SET_SIZE(getgs)
1414
1415	ENTRY(setgs)
1416	movw	4(%esp), %gs
1417	ret
1418	SET_SIZE(setgs)
1419
1420#endif	/* __lint */
1421#endif	/* __i386 */
1422
1423#if defined(__lint)
1424
1425void
1426pc_reset(void)
1427{}
1428
1429#else	/* __lint */
1430
1431	ENTRY(wait_500ms)
1432	push	%ebx
1433	movl	$50000, %ebx
14341:
1435	call	tenmicrosec
1436	decl	%ebx
1437	jnz	1b
1438	pop	%ebx
1439	ret
1440	SET_SIZE(wait_500ms)
1441
1442#define	RESET_METHOD_KBC	1
1443#define	RESET_METHOD_PORT92	2
1444#define RESET_METHOD_PCI	4
1445
1446	DGDEF3(pc_reset_methods, 4, 8)
1447	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1448
1449	ENTRY(pc_reset)
1450
1451#if defined(__i386)
1452	testl	$RESET_METHOD_KBC, pc_reset_methods
1453#elif defined(__amd64)
1454	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1455#endif
1456	jz	1f
1457
1458	/
1459	/ Try the classic keyboard controller-triggered reset.
1460	/
1461	movw	$0x64, %dx
1462	movb	$0xfe, %al
1463	outb	(%dx)
1464
1465	/ Wait up to 500 milliseconds here for the keyboard controller
1466	/ to pull the reset line.  On some systems where the keyboard
1467	/ controller is slow to pull the reset line, the next reset method
1468	/ may be executed (which may be bad if those systems hang when the
1469	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1470	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1471
1472	call	wait_500ms
1473
14741:
1475#if defined(__i386)
1476	testl	$RESET_METHOD_PORT92, pc_reset_methods
1477#elif defined(__amd64)
1478	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1479#endif
1480	jz	3f
1481
1482	/
1483	/ Try port 0x92 fast reset
1484	/
1485	movw	$0x92, %dx
1486	inb	(%dx)
1487	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1488	je	1f
1489	testb	$1, %al		/ If bit 0
1490	jz	2f		/ is clear, jump to perform the reset
1491	andb	$0xfe, %al	/ otherwise,
1492	outb	(%dx)		/ clear bit 0 first, then
14932:
1494	orb	$1, %al		/ Set bit 0
1495	outb	(%dx)		/ and reset the system
14961:
1497
1498	call	wait_500ms
1499
15003:
1501#if defined(__i386)
1502	testl	$RESET_METHOD_PCI, pc_reset_methods
1503#elif defined(__amd64)
1504	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1505#endif
1506	jz	4f
1507
1508	/ Try the PCI (soft) reset vector (should work on all modern systems,
1509	/ but has been shown to cause problems on 450NX systems, and some newer
1510	/ systems (e.g. ATI IXP400-equipped systems))
1511	/ When resetting via this method, 2 writes are required.  The first
1512	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1513	/ power cycle).
1514	/ The reset occurs on the second write, during bit 2's transition from
1515	/ 0->1.
1516	movw	$0xcf9, %dx
1517	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1518	outb	(%dx)
1519	movb	$0x6, %al
1520	outb	(%dx)
1521
1522	call	wait_500ms
1523
15244:
1525	/
1526	/ port 0xcf9 failed also.  Last-ditch effort is to
1527	/ triple-fault the CPU.
1528	/
1529#if defined(__amd64)
1530	pushq	$0x0
1531	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1532	lidt	(%rsp)
1533#elif defined(__i386)
1534	pushl	$0x0
1535	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1536	lidt	(%esp)
1537#endif
1538	int	$0x0		/ Trigger interrupt, generate triple-fault
1539
1540	cli
1541	hlt			/ Wait forever
1542	/*NOTREACHED*/
1543	SET_SIZE(pc_reset)
1544
1545#endif	/* __lint */
1546
1547/*
1548 * C callable in and out routines
1549 */
1550
1551#if defined(__lint)
1552
1553/* ARGSUSED */
1554void
1555outl(int port_address, uint32_t val)
1556{}
1557
1558#else	/* __lint */
1559
1560#if defined(__amd64)
1561
1562	ENTRY(outl)
1563	movw	%di, %dx
1564	movl	%esi, %eax
1565	outl	(%dx)
1566	ret
1567	SET_SIZE(outl)
1568
1569#elif defined(__i386)
1570
1571	.set	PORT, 4
1572	.set	VAL, 8
1573
1574	ENTRY(outl)
1575	movw	PORT(%esp), %dx
1576	movl	VAL(%esp), %eax
1577	outl	(%dx)
1578	ret
1579	SET_SIZE(outl)
1580
1581#endif	/* __i386 */
1582#endif	/* __lint */
1583
1584#if defined(__lint)
1585
1586/* ARGSUSED */
1587void
1588outw(int port_address, uint16_t val)
1589{}
1590
1591#else	/* __lint */
1592
1593#if defined(__amd64)
1594
1595	ENTRY(outw)
1596	movw	%di, %dx
1597	movw	%si, %ax
1598	D16 outl (%dx)		/* XX64 why not outw? */
1599	ret
1600	SET_SIZE(outw)
1601
1602#elif defined(__i386)
1603
1604	ENTRY(outw)
1605	movw	PORT(%esp), %dx
1606	movw	VAL(%esp), %ax
1607	D16 outl (%dx)
1608	ret
1609	SET_SIZE(outw)
1610
1611#endif	/* __i386 */
1612#endif	/* __lint */
1613
1614#if defined(__lint)
1615
1616/* ARGSUSED */
1617void
1618outb(int port_address, uint8_t val)
1619{}
1620
1621#else	/* __lint */
1622
1623#if defined(__amd64)
1624
1625	ENTRY(outb)
1626	movw	%di, %dx
1627	movb	%sil, %al
1628	outb	(%dx)
1629	ret
1630	SET_SIZE(outb)
1631
1632#elif defined(__i386)
1633
1634	ENTRY(outb)
1635	movw	PORT(%esp), %dx
1636	movb	VAL(%esp), %al
1637	outb	(%dx)
1638	ret
1639	SET_SIZE(outb)
1640
1641#endif	/* __i386 */
1642#endif	/* __lint */
1643
1644#if defined(__lint)
1645
1646/* ARGSUSED */
1647uint32_t
1648inl(int port_address)
1649{ return (0); }
1650
1651#else	/* __lint */
1652
1653#if defined(__amd64)
1654
1655	ENTRY(inl)
1656	xorl	%eax, %eax
1657	movw	%di, %dx
1658	inl	(%dx)
1659	ret
1660	SET_SIZE(inl)
1661
1662#elif defined(__i386)
1663
1664	ENTRY(inl)
1665	movw	PORT(%esp), %dx
1666	inl	(%dx)
1667	ret
1668	SET_SIZE(inl)
1669
1670#endif	/* __i386 */
1671#endif	/* __lint */
1672
1673#if defined(__lint)
1674
1675/* ARGSUSED */
1676uint16_t
1677inw(int port_address)
1678{ return (0); }
1679
1680#else	/* __lint */
1681
1682#if defined(__amd64)
1683
1684	ENTRY(inw)
1685	xorl	%eax, %eax
1686	movw	%di, %dx
1687	D16 inl	(%dx)
1688	ret
1689	SET_SIZE(inw)
1690
1691#elif defined(__i386)
1692
1693	ENTRY(inw)
1694	subl	%eax, %eax
1695	movw	PORT(%esp), %dx
1696	D16 inl	(%dx)
1697	ret
1698	SET_SIZE(inw)
1699
1700#endif	/* __i386 */
1701#endif	/* __lint */
1702
1703
1704#if defined(__lint)
1705
1706/* ARGSUSED */
1707uint8_t
1708inb(int port_address)
1709{ return (0); }
1710
1711#else	/* __lint */
1712
1713#if defined(__amd64)
1714
1715	ENTRY(inb)
1716	xorl	%eax, %eax
1717	movw	%di, %dx
1718	inb	(%dx)
1719	ret
1720	SET_SIZE(inb)
1721
1722#elif defined(__i386)
1723
1724	ENTRY(inb)
1725	subl    %eax, %eax
1726	movw	PORT(%esp), %dx
1727	inb	(%dx)
1728	ret
1729	SET_SIZE(inb)
1730
1731#endif	/* __i386 */
1732#endif	/* __lint */
1733
1734
1735#if defined(__lint)
1736
1737/* ARGSUSED */
1738void
1739repoutsw(int port, uint16_t *addr, int cnt)
1740{}
1741
1742#else	/* __lint */
1743
1744#if defined(__amd64)
1745
1746	ENTRY(repoutsw)
1747	movl	%edx, %ecx
1748	movw	%di, %dx
1749	rep
1750	  D16 outsl
1751	ret
1752	SET_SIZE(repoutsw)
1753
1754#elif defined(__i386)
1755
1756	/*
1757	 * The arguments and saved registers are on the stack in the
1758	 *  following order:
1759	 *      |  cnt  |  +16
1760	 *      | *addr |  +12
1761	 *      | port  |  +8
1762	 *      |  eip  |  +4
1763	 *      |  esi  |  <-- %esp
1764	 * If additional values are pushed onto the stack, make sure
1765	 * to adjust the following constants accordingly.
1766	 */
1767	.set	PORT, 8
1768	.set	ADDR, 12
1769	.set	COUNT, 16
1770
1771	ENTRY(repoutsw)
1772	pushl	%esi
1773	movl	PORT(%esp), %edx
1774	movl	ADDR(%esp), %esi
1775	movl	COUNT(%esp), %ecx
1776	rep
1777	  D16 outsl
1778	popl	%esi
1779	ret
1780	SET_SIZE(repoutsw)
1781
1782#endif	/* __i386 */
1783#endif	/* __lint */
1784
1785
1786#if defined(__lint)
1787
1788/* ARGSUSED */
1789void
1790repinsw(int port_addr, uint16_t *addr, int cnt)
1791{}
1792
1793#else	/* __lint */
1794
1795#if defined(__amd64)
1796
1797	ENTRY(repinsw)
1798	movl	%edx, %ecx
1799	movw	%di, %dx
1800	rep
1801	  D16 insl
1802	ret
1803	SET_SIZE(repinsw)
1804
1805#elif defined(__i386)
1806
1807	ENTRY(repinsw)
1808	pushl	%edi
1809	movl	PORT(%esp), %edx
1810	movl	ADDR(%esp), %edi
1811	movl	COUNT(%esp), %ecx
1812	rep
1813	  D16 insl
1814	popl	%edi
1815	ret
1816	SET_SIZE(repinsw)
1817
1818#endif	/* __i386 */
1819#endif	/* __lint */
1820
1821
1822#if defined(__lint)
1823
1824/* ARGSUSED */
1825void
1826repinsb(int port, uint8_t *addr, int count)
1827{}
1828
1829#else	/* __lint */
1830
1831#if defined(__amd64)
1832
1833	ENTRY(repinsb)
1834	movl	%edx, %ecx
1835	movw	%di, %dx
1836	movq	%rsi, %rdi
1837	rep
1838	  insb
1839	ret
1840	SET_SIZE(repinsb)
1841
1842#elif defined(__i386)
1843
1844	/*
1845	 * The arguments and saved registers are on the stack in the
1846	 *  following order:
1847	 *      |  cnt  |  +16
1848	 *      | *addr |  +12
1849	 *      | port  |  +8
1850	 *      |  eip  |  +4
1851	 *      |  esi  |  <-- %esp
1852	 * If additional values are pushed onto the stack, make sure
1853	 * to adjust the following constants accordingly.
1854	 */
1855	.set	IO_PORT, 8
1856	.set	IO_ADDR, 12
1857	.set	IO_COUNT, 16
1858
1859	ENTRY(repinsb)
1860	pushl	%edi
1861	movl	IO_ADDR(%esp), %edi
1862	movl	IO_COUNT(%esp), %ecx
1863	movl	IO_PORT(%esp), %edx
1864	rep
1865	  insb
1866	popl	%edi
1867	ret
1868	SET_SIZE(repinsb)
1869
1870#endif	/* __i386 */
1871#endif	/* __lint */
1872
1873
1874/*
1875 * Input a stream of 32-bit words.
1876 * NOTE: count is a DWORD count.
1877 */
1878#if defined(__lint)
1879
1880/* ARGSUSED */
1881void
1882repinsd(int port, uint32_t *addr, int count)
1883{}
1884
1885#else	/* __lint */
1886
1887#if defined(__amd64)
1888
1889	ENTRY(repinsd)
1890	movl	%edx, %ecx
1891	movw	%di, %dx
1892	movq	%rsi, %rdi
1893	rep
1894	  insl
1895	ret
1896	SET_SIZE(repinsd)
1897
1898#elif defined(__i386)
1899
1900	ENTRY(repinsd)
1901	pushl	%edi
1902	movl	IO_ADDR(%esp), %edi
1903	movl	IO_COUNT(%esp), %ecx
1904	movl	IO_PORT(%esp), %edx
1905	rep
1906	  insl
1907	popl	%edi
1908	ret
1909	SET_SIZE(repinsd)
1910
1911#endif	/* __i386 */
1912#endif	/* __lint */
1913
1914/*
1915 * Output a stream of bytes
1916 * NOTE: count is a byte count
1917 */
1918#if defined(__lint)
1919
1920/* ARGSUSED */
1921void
1922repoutsb(int port, uint8_t *addr, int count)
1923{}
1924
1925#else	/* __lint */
1926
1927#if defined(__amd64)
1928
1929	ENTRY(repoutsb)
1930	movl	%edx, %ecx
1931	movw	%di, %dx
1932	rep
1933	  outsb
1934	ret
1935	SET_SIZE(repoutsb)
1936
1937#elif defined(__i386)
1938
1939	ENTRY(repoutsb)
1940	pushl	%esi
1941	movl	IO_ADDR(%esp), %esi
1942	movl	IO_COUNT(%esp), %ecx
1943	movl	IO_PORT(%esp), %edx
1944	rep
1945	  outsb
1946	popl	%esi
1947	ret
1948	SET_SIZE(repoutsb)
1949
1950#endif	/* __i386 */
1951#endif	/* __lint */
1952
1953/*
1954 * Output a stream of 32-bit words
1955 * NOTE: count is a DWORD count
1956 */
1957#if defined(__lint)
1958
1959/* ARGSUSED */
1960void
1961repoutsd(int port, uint32_t *addr, int count)
1962{}
1963
1964#else	/* __lint */
1965
1966#if defined(__amd64)
1967
1968	ENTRY(repoutsd)
1969	movl	%edx, %ecx
1970	movw	%di, %dx
1971	rep
1972	  outsl
1973	ret
1974	SET_SIZE(repoutsd)
1975
1976#elif defined(__i386)
1977
1978	ENTRY(repoutsd)
1979	pushl	%esi
1980	movl	IO_ADDR(%esp), %esi
1981	movl	IO_COUNT(%esp), %ecx
1982	movl	IO_PORT(%esp), %edx
1983	rep
1984	  outsl
1985	popl	%esi
1986	ret
1987	SET_SIZE(repoutsd)
1988
1989#endif	/* __i386 */
1990#endif	/* __lint */
1991
1992/*
1993 * void int20(void)
1994 */
1995
1996#if defined(__lint)
1997
1998void
1999int20(void)
2000{}
2001
2002#else	/* __lint */
2003
2004	ENTRY(int20)
2005	movl	boothowto, %eax
2006	andl	$RB_DEBUG, %eax
2007	jz	1f
2008
2009	int	$20
20101:
2011	rep;	ret	/* use 2 byte return instruction when branch target */
2012			/* AMD Software Optimization Guide - Section 6.2 */
2013	SET_SIZE(int20)
2014
2015#endif	/* __lint */
2016
2017#if defined(__lint)
2018
2019/* ARGSUSED */
2020int
2021scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
2022{ return (0); }
2023
2024#else	/* __lint */
2025
2026#if defined(__amd64)
2027
2028	ENTRY(scanc)
2029					/* rdi == size */
2030					/* rsi == cp */
2031					/* rdx == table */
2032					/* rcx == mask */
2033	addq	%rsi, %rdi		/* end = &cp[size] */
2034.scanloop:
2035	cmpq	%rdi, %rsi		/* while (cp < end */
2036	jnb	.scandone
2037	movzbq	(%rsi), %r8		/* %r8 = *cp */
2038	incq	%rsi			/* cp++ */
2039	testb	%cl, (%r8, %rdx)
2040	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
2041	decq	%rsi			/* (fix post-increment) */
2042.scandone:
2043	movl	%edi, %eax
2044	subl	%esi, %eax		/* return (end - cp) */
2045	ret
2046	SET_SIZE(scanc)
2047
2048#elif defined(__i386)
2049
2050	ENTRY(scanc)
2051	pushl	%edi
2052	pushl	%esi
2053	movb	24(%esp), %cl		/* mask = %cl */
2054	movl	16(%esp), %esi		/* cp = %esi */
2055	movl	20(%esp), %edx		/* table = %edx */
2056	movl	%esi, %edi
2057	addl	12(%esp), %edi		/* end = &cp[size]; */
2058.scanloop:
2059	cmpl	%edi, %esi		/* while (cp < end */
2060	jnb	.scandone
2061	movzbl	(%esi),  %eax		/* %al = *cp */
2062	incl	%esi			/* cp++ */
2063	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
2064	testb	%al, %cl
2065	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
2066	dec	%esi			/* post-incremented */
2067.scandone:
2068	movl	%edi, %eax
2069	subl	%esi, %eax		/* return (end - cp) */
2070	popl	%esi
2071	popl	%edi
2072	ret
2073	SET_SIZE(scanc)
2074
2075#endif	/* __i386 */
2076#endif	/* __lint */
2077
2078/*
2079 * Replacement functions for ones that are normally inlined.
2080 * In addition to the copy in i86.il, they are defined here just in case.
2081 */
2082
2083#if defined(__lint)
2084
2085int
2086intr_clear(void)
2087{ return 0; }
2088
2089int
2090clear_int_flag(void)
2091{ return 0; }
2092
2093#else	/* __lint */
2094
2095#if defined(__amd64)
2096
2097	ENTRY(intr_clear)
2098	ENTRY(clear_int_flag)
2099	pushfq
2100	cli
2101	popq	%rax
2102	ret
2103	SET_SIZE(clear_int_flag)
2104	SET_SIZE(intr_clear)
2105
2106#elif defined(__i386)
2107
2108	ENTRY(intr_clear)
2109	ENTRY(clear_int_flag)
2110	pushfl
2111	cli
2112	popl	%eax
2113	ret
2114	SET_SIZE(clear_int_flag)
2115	SET_SIZE(intr_clear)
2116
2117#endif	/* __i386 */
2118#endif	/* __lint */
2119
2120#if defined(__lint)
2121
2122struct cpu *
2123curcpup(void)
2124{ return 0; }
2125
2126#else	/* __lint */
2127
2128#if defined(__amd64)
2129
2130	ENTRY(curcpup)
2131	movq	%gs:CPU_SELF, %rax
2132	ret
2133	SET_SIZE(curcpup)
2134
2135#elif defined(__i386)
2136
2137	ENTRY(curcpup)
2138	movl	%gs:CPU_SELF, %eax
2139	ret
2140	SET_SIZE(curcpup)
2141
2142#endif	/* __i386 */
2143#endif	/* __lint */
2144
2145#if defined(__lint)
2146
2147/* ARGSUSED */
2148uint32_t
2149htonl(uint32_t i)
2150{ return (0); }
2151
2152/* ARGSUSED */
2153uint32_t
2154ntohl(uint32_t i)
2155{ return (0); }
2156
2157#else	/* __lint */
2158
2159#if defined(__amd64)
2160
2161	/* XX64 there must be shorter sequences for this */
2162	ENTRY(htonl)
2163	ALTENTRY(ntohl)
2164	movl	%edi, %eax
2165	bswap	%eax
2166	ret
2167	SET_SIZE(ntohl)
2168	SET_SIZE(htonl)
2169
2170#elif defined(__i386)
2171
2172	ENTRY(htonl)
2173	ALTENTRY(ntohl)
2174	movl	4(%esp), %eax
2175	bswap	%eax
2176	ret
2177	SET_SIZE(ntohl)
2178	SET_SIZE(htonl)
2179
2180#endif	/* __i386 */
2181#endif	/* __lint */
2182
2183#if defined(__lint)
2184
2185/* ARGSUSED */
2186uint16_t
2187htons(uint16_t i)
2188{ return (0); }
2189
2190/* ARGSUSED */
2191uint16_t
2192ntohs(uint16_t i)
2193{ return (0); }
2194
2195
2196#else	/* __lint */
2197
2198#if defined(__amd64)
2199
2200	/* XX64 there must be better sequences for this */
2201	ENTRY(htons)
2202	ALTENTRY(ntohs)
2203	movl	%edi, %eax
2204	bswap	%eax
2205	shrl	$16, %eax
2206	ret
2207	SET_SIZE(ntohs)
2208	SET_SIZE(htons)
2209
2210#elif defined(__i386)
2211
2212	ENTRY(htons)
2213	ALTENTRY(ntohs)
2214	movl	4(%esp), %eax
2215	bswap	%eax
2216	shrl	$16, %eax
2217	ret
2218	SET_SIZE(ntohs)
2219	SET_SIZE(htons)
2220
2221#endif	/* __i386 */
2222#endif	/* __lint */
2223
2224
2225#if defined(__lint)
2226
2227/* ARGSUSED */
2228void
2229intr_restore(uint_t i)
2230{ return; }
2231
2232/* ARGSUSED */
2233void
2234restore_int_flag(int i)
2235{ return; }
2236
2237#else	/* __lint */
2238
2239#if defined(__amd64)
2240
2241	ENTRY(intr_restore)
2242	ENTRY(restore_int_flag)
2243	pushq	%rdi
2244	popfq
2245	ret
2246	SET_SIZE(restore_int_flag)
2247	SET_SIZE(intr_restore)
2248
2249#elif defined(__i386)
2250
2251	ENTRY(intr_restore)
2252	ENTRY(restore_int_flag)
2253	pushl	4(%esp)
2254	popfl
2255	ret
2256	SET_SIZE(restore_int_flag)
2257	SET_SIZE(intr_restore)
2258
2259#endif	/* __i386 */
2260#endif	/* __lint */
2261
2262#if defined(__lint)
2263
2264void
2265sti(void)
2266{}
2267
2268#else	/* __lint */
2269
2270	ENTRY(sti)
2271	sti
2272	ret
2273	SET_SIZE(sti)
2274
2275#endif	/* __lint */
2276
2277#if defined(__lint)
2278
2279dtrace_icookie_t
2280dtrace_interrupt_disable(void)
2281{ return (0); }
2282
2283#else   /* __lint */
2284
2285#if defined(__amd64)
2286
2287	ENTRY(dtrace_interrupt_disable)
2288	pushfq
2289	popq	%rax
2290	cli
2291	ret
2292	SET_SIZE(dtrace_interrupt_disable)
2293
2294#elif defined(__i386)
2295
2296	ENTRY(dtrace_interrupt_disable)
2297	pushfl
2298	popl	%eax
2299	cli
2300	ret
2301	SET_SIZE(dtrace_interrupt_disable)
2302
2303#endif	/* __i386 */
2304#endif	/* __lint */
2305
2306#if defined(__lint)
2307
2308/*ARGSUSED*/
2309void
2310dtrace_interrupt_enable(dtrace_icookie_t cookie)
2311{}
2312
2313#else	/* __lint */
2314
2315#if defined(__amd64)
2316
2317	ENTRY(dtrace_interrupt_enable)
2318	pushq	%rdi
2319	popfq
2320	ret
2321	SET_SIZE(dtrace_interrupt_enable)
2322
2323#elif defined(__i386)
2324
2325	ENTRY(dtrace_interrupt_enable)
2326	movl	4(%esp), %eax
2327	pushl	%eax
2328	popfl
2329	ret
2330	SET_SIZE(dtrace_interrupt_enable)
2331
2332#endif	/* __i386 */
2333#endif	/* __lint */
2334
2335
2336#if defined(lint)
2337
2338void
2339dtrace_membar_producer(void)
2340{}
2341
2342void
2343dtrace_membar_consumer(void)
2344{}
2345
2346#else	/* __lint */
2347
2348	ENTRY(dtrace_membar_producer)
2349	rep;	ret	/* use 2 byte return instruction when branch target */
2350			/* AMD Software Optimization Guide - Section 6.2 */
2351	SET_SIZE(dtrace_membar_producer)
2352
2353	ENTRY(dtrace_membar_consumer)
2354	rep;	ret	/* use 2 byte return instruction when branch target */
2355			/* AMD Software Optimization Guide - Section 6.2 */
2356	SET_SIZE(dtrace_membar_consumer)
2357
2358#endif	/* __lint */
2359
2360#if defined(__lint)
2361
2362kthread_id_t
2363threadp(void)
2364{ return ((kthread_id_t)0); }
2365
2366#else	/* __lint */
2367
2368#if defined(__amd64)
2369
2370	ENTRY(threadp)
2371	movq	%gs:CPU_THREAD, %rax
2372	ret
2373	SET_SIZE(threadp)
2374
2375#elif defined(__i386)
2376
2377	ENTRY(threadp)
2378	movl	%gs:CPU_THREAD, %eax
2379	ret
2380	SET_SIZE(threadp)
2381
2382#endif	/* __i386 */
2383#endif	/* __lint */
2384
2385/*
2386 *   Checksum routine for Internet Protocol Headers
2387 */
2388
2389#if defined(__lint)
2390
2391/* ARGSUSED */
2392unsigned int
2393ip_ocsum(
2394	ushort_t *address,	/* ptr to 1st message buffer */
2395	int halfword_count,	/* length of data */
2396	unsigned int sum)	/* partial checksum */
2397{
2398	int		i;
2399	unsigned int	psum = 0;	/* partial sum */
2400
2401	for (i = 0; i < halfword_count; i++, address++) {
2402		psum += *address;
2403	}
2404
2405	while ((psum >> 16) != 0) {
2406		psum = (psum & 0xffff) + (psum >> 16);
2407	}
2408
2409	psum += sum;
2410
2411	while ((psum >> 16) != 0) {
2412		psum = (psum & 0xffff) + (psum >> 16);
2413	}
2414
2415	return (psum);
2416}
2417
2418#else	/* __lint */
2419
2420#if defined(__amd64)
2421
2422	ENTRY(ip_ocsum)
2423	pushq	%rbp
2424	movq	%rsp, %rbp
2425#ifdef DEBUG
2426	movq	kernelbase(%rip), %rax
2427	cmpq	%rax, %rdi
2428	jnb	1f
2429	xorl	%eax, %eax
2430	movq	%rdi, %rsi
2431	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2432	call	panic
2433	/*NOTREACHED*/
2434.ip_ocsum_panic_msg:
2435	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24361:
2437#endif
2438	movl	%esi, %ecx	/* halfword_count */
2439	movq	%rdi, %rsi	/* address */
2440				/* partial sum in %edx */
2441	xorl	%eax, %eax
2442	testl	%ecx, %ecx
2443	jz	.ip_ocsum_done
2444	testq	$3, %rsi
2445	jnz	.ip_csum_notaligned
2446.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2447.next_iter:
2448	/* XX64 opportunities for prefetch? */
2449	/* XX64 compute csum with 64 bit quantities? */
2450	subl	$32, %ecx
2451	jl	.less_than_32
2452
2453	addl	0(%rsi), %edx
2454.only60:
2455	adcl	4(%rsi), %eax
2456.only56:
2457	adcl	8(%rsi), %edx
2458.only52:
2459	adcl	12(%rsi), %eax
2460.only48:
2461	adcl	16(%rsi), %edx
2462.only44:
2463	adcl	20(%rsi), %eax
2464.only40:
2465	adcl	24(%rsi), %edx
2466.only36:
2467	adcl	28(%rsi), %eax
2468.only32:
2469	adcl	32(%rsi), %edx
2470.only28:
2471	adcl	36(%rsi), %eax
2472.only24:
2473	adcl	40(%rsi), %edx
2474.only20:
2475	adcl	44(%rsi), %eax
2476.only16:
2477	adcl	48(%rsi), %edx
2478.only12:
2479	adcl	52(%rsi), %eax
2480.only8:
2481	adcl	56(%rsi), %edx
2482.only4:
2483	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2484.only0:
2485	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2486	adcl	$0, %eax
2487
2488	addq	$64, %rsi
2489	testl	%ecx, %ecx
2490	jnz	.next_iter
2491
2492.ip_ocsum_done:
2493	addl	%eax, %edx
2494	adcl	$0, %edx
2495	movl	%edx, %eax	/* form a 16 bit checksum by */
2496	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2497	addw	%dx, %ax
2498	adcw	$0, %ax
2499	andl	$0xffff, %eax
2500	leave
2501	ret
2502
2503.ip_csum_notaligned:
2504	xorl	%edi, %edi
2505	movw	(%rsi), %di
2506	addl	%edi, %edx
2507	adcl	$0, %edx
2508	addq	$2, %rsi
2509	decl	%ecx
2510	jmp	.ip_csum_aligned
2511
2512.less_than_32:
2513	addl	$32, %ecx
2514	testl	$1, %ecx
2515	jz	.size_aligned
2516	andl	$0xfe, %ecx
2517	movzwl	(%rsi, %rcx, 2), %edi
2518	addl	%edi, %edx
2519	adcl	$0, %edx
2520.size_aligned:
2521	movl	%ecx, %edi
2522	shrl	$1, %ecx
2523	shl	$1, %edi
2524	subq	$64, %rdi
2525	addq	%rdi, %rsi
2526	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2527	leaq	(%rdi, %rcx, 8), %rdi
2528	xorl	%ecx, %ecx
2529	clc
2530	jmp 	*(%rdi)
2531
2532	.align	8
2533.ip_ocsum_jmptbl:
2534	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2535	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2536	.quad	.only48, .only52, .only56, .only60
2537	SET_SIZE(ip_ocsum)
2538
2539#elif defined(__i386)
2540
2541	ENTRY(ip_ocsum)
2542	pushl	%ebp
2543	movl	%esp, %ebp
2544	pushl	%ebx
2545	pushl	%esi
2546	pushl	%edi
2547	movl	12(%ebp), %ecx	/* count of half words */
2548	movl	16(%ebp), %edx	/* partial checksum */
2549	movl	8(%ebp), %esi
2550	xorl	%eax, %eax
2551	testl	%ecx, %ecx
2552	jz	.ip_ocsum_done
2553
2554	testl	$3, %esi
2555	jnz	.ip_csum_notaligned
2556.ip_csum_aligned:
2557.next_iter:
2558	subl	$32, %ecx
2559	jl	.less_than_32
2560
2561	addl	0(%esi), %edx
2562.only60:
2563	adcl	4(%esi), %eax
2564.only56:
2565	adcl	8(%esi), %edx
2566.only52:
2567	adcl	12(%esi), %eax
2568.only48:
2569	adcl	16(%esi), %edx
2570.only44:
2571	adcl	20(%esi), %eax
2572.only40:
2573	adcl	24(%esi), %edx
2574.only36:
2575	adcl	28(%esi), %eax
2576.only32:
2577	adcl	32(%esi), %edx
2578.only28:
2579	adcl	36(%esi), %eax
2580.only24:
2581	adcl	40(%esi), %edx
2582.only20:
2583	adcl	44(%esi), %eax
2584.only16:
2585	adcl	48(%esi), %edx
2586.only12:
2587	adcl	52(%esi), %eax
2588.only8:
2589	adcl	56(%esi), %edx
2590.only4:
2591	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2592.only0:
2593	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2594	adcl	$0, %eax
2595
2596	addl	$64, %esi
2597	andl	%ecx, %ecx
2598	jnz	.next_iter
2599
2600.ip_ocsum_done:
2601	addl	%eax, %edx
2602	adcl	$0, %edx
2603	movl	%edx, %eax	/* form a 16 bit checksum by */
2604	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2605	addw	%dx, %ax
2606	adcw	$0, %ax
2607	andl	$0xffff, %eax
2608	popl	%edi		/* restore registers */
2609	popl	%esi
2610	popl	%ebx
2611	leave
2612	ret
2613
2614.ip_csum_notaligned:
2615	xorl	%edi, %edi
2616	movw	(%esi), %di
2617	addl	%edi, %edx
2618	adcl	$0, %edx
2619	addl	$2, %esi
2620	decl	%ecx
2621	jmp	.ip_csum_aligned
2622
2623.less_than_32:
2624	addl	$32, %ecx
2625	testl	$1, %ecx
2626	jz	.size_aligned
2627	andl	$0xfe, %ecx
2628	movzwl	(%esi, %ecx, 2), %edi
2629	addl	%edi, %edx
2630	adcl	$0, %edx
2631.size_aligned:
2632	movl	%ecx, %edi
2633	shrl	$1, %ecx
2634	shl	$1, %edi
2635	subl	$64, %edi
2636	addl	%edi, %esi
2637	movl	$.ip_ocsum_jmptbl, %edi
2638	lea	(%edi, %ecx, 4), %edi
2639	xorl	%ecx, %ecx
2640	clc
2641	jmp 	*(%edi)
2642	SET_SIZE(ip_ocsum)
2643
2644	.data
2645	.align	4
2646
2647.ip_ocsum_jmptbl:
2648	.long	.only0, .only4, .only8, .only12, .only16, .only20
2649	.long	.only24, .only28, .only32, .only36, .only40, .only44
2650	.long	.only48, .only52, .only56, .only60
2651
2652
2653#endif	/* __i386 */
2654#endif	/* __lint */
2655
2656/*
2657 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2658 * Provided to manipulate hrtime_t values.
2659 */
2660#if defined(__lint)
2661
2662/* result = a * b; */
2663
2664/* ARGSUSED */
2665unsigned long long
2666mul32(uint_t a, uint_t b)
2667{ return (0); }
2668
2669#else	/* __lint */
2670
2671#if defined(__amd64)
2672
2673	ENTRY(mul32)
2674	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2675	movl	%edi, %eax
2676	mull	%esi
2677	shlq	$32, %rdx
2678	orq	%rdx, %rax
2679	ret
2680	SET_SIZE(mul32)
2681
2682#elif defined(__i386)
2683
2684	ENTRY(mul32)
2685	movl	8(%esp), %eax
2686	movl	4(%esp), %ecx
2687	mull	%ecx
2688	ret
2689	SET_SIZE(mul32)
2690
2691#endif	/* __i386 */
2692#endif	/* __lint */
2693
2694#if defined(notused)
2695#if defined(__lint)
2696/* ARGSUSED */
2697void
2698load_pte64(uint64_t *pte, uint64_t pte_value)
2699{}
2700#else	/* __lint */
2701	.globl load_pte64
2702load_pte64:
2703	movl	4(%esp), %eax
2704	movl	8(%esp), %ecx
2705	movl	12(%esp), %edx
2706	movl	%edx, 4(%eax)
2707	movl	%ecx, (%eax)
2708	ret
2709#endif	/* __lint */
2710#endif	/* notused */
2711
2712#if defined(__lint)
2713
2714/*ARGSUSED*/
2715void
2716scan_memory(caddr_t addr, size_t size)
2717{}
2718
2719#else	/* __lint */
2720
2721#if defined(__amd64)
2722
2723	ENTRY(scan_memory)
2724	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2725	jz	.scanm_done
2726	movq	%rsi, %rcx	/* move count into rep control register */
2727	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2728	rep lodsq		/* scan the memory range */
2729.scanm_done:
2730	rep;	ret	/* use 2 byte return instruction when branch target */
2731			/* AMD Software Optimization Guide - Section 6.2 */
2732	SET_SIZE(scan_memory)
2733
2734#elif defined(__i386)
2735
2736	ENTRY(scan_memory)
2737	pushl	%ecx
2738	pushl	%esi
2739	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2740	shrl	$2, %ecx	/* convert from byte count to word count */
2741	jz	.scanm_done
2742	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2743	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2744	lodsl
2745.scanm_done:
2746	popl	%esi
2747	popl	%ecx
2748	ret
2749	SET_SIZE(scan_memory)
2750
2751#endif	/* __i386 */
2752#endif	/* __lint */
2753
2754
2755#if defined(__lint)
2756
2757/*ARGSUSED */
2758int
2759lowbit(ulong_t i)
2760{ return (0); }
2761
2762#else	/* __lint */
2763
2764#if defined(__amd64)
2765
2766	ENTRY(lowbit)
2767	movl	$-1, %eax
2768	bsfq	%rdi, %rax
2769	incl	%eax
2770	ret
2771	SET_SIZE(lowbit)
2772
2773#elif defined(__i386)
2774
2775	ENTRY(lowbit)
2776	movl	$-1, %eax
2777	bsfl	4(%esp), %eax
2778	incl	%eax
2779	ret
2780	SET_SIZE(lowbit)
2781
2782#endif	/* __i386 */
2783#endif	/* __lint */
2784
2785#if defined(__lint)
2786
2787/*ARGSUSED*/
2788int
2789highbit(ulong_t i)
2790{ return (0); }
2791
2792#else	/* __lint */
2793
2794#if defined(__amd64)
2795
2796	ENTRY(highbit)
2797	movl	$-1, %eax
2798	bsrq	%rdi, %rax
2799	incl	%eax
2800	ret
2801	SET_SIZE(highbit)
2802
2803#elif defined(__i386)
2804
2805	ENTRY(highbit)
2806	movl	$-1, %eax
2807	bsrl	4(%esp), %eax
2808	incl	%eax
2809	ret
2810	SET_SIZE(highbit)
2811
2812#endif	/* __i386 */
2813#endif	/* __lint */
2814
2815#if defined(__lint)
2816
2817/*ARGSUSED*/
2818uint64_t
2819rdmsr(uint_t r)
2820{ return (0); }
2821
2822/*ARGSUSED*/
2823void
2824wrmsr(uint_t r, const uint64_t val)
2825{}
2826
2827void
2828invalidate_cache(void)
2829{}
2830
2831#else  /* __lint */
2832
2833#if defined(__amd64)
2834
2835	ENTRY(rdmsr)
2836	movl	%edi, %ecx
2837	rdmsr
2838	shlq	$32, %rdx
2839	orq	%rdx, %rax
2840	ret
2841	SET_SIZE(rdmsr)
2842
2843	ENTRY(wrmsr)
2844	movq	%rsi, %rdx
2845	shrq	$32, %rdx
2846	movl	%esi, %eax
2847	movl	%edi, %ecx
2848	wrmsr
2849	ret
2850	SET_SIZE(wrmsr)
2851
2852#elif defined(__i386)
2853
2854	ENTRY(rdmsr)
2855	movl	4(%esp), %ecx
2856	rdmsr
2857	ret
2858	SET_SIZE(rdmsr)
2859
2860	ENTRY(wrmsr)
2861	movl	4(%esp), %ecx
2862	movl	8(%esp), %eax
2863	movl	12(%esp), %edx
2864	wrmsr
2865	ret
2866	SET_SIZE(wrmsr)
2867
2868#endif	/* __i386 */
2869
2870	ENTRY(invalidate_cache)
2871	wbinvd
2872	ret
2873	SET_SIZE(invalidate_cache)
2874
2875#endif	/* __lint */
2876
2877#if defined(__lint)
2878
2879/*ARGSUSED*/
2880void getcregs(struct cregs *crp)
2881{}
2882
2883#else	/* __lint */
2884
2885#if defined(__amd64)
2886
2887#define	GETMSR(r, off, d)	\
2888	movl	$r, %ecx;	\
2889	rdmsr;			\
2890	movl	%eax, off(d);	\
2891	movl	%edx, off+4(d)
2892
2893	ENTRY_NP(getcregs)
2894	xorl	%eax, %eax
2895	movq	%rax, CREG_GDT+8(%rdi)
2896	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2897	movq	%rax, CREG_IDT+8(%rdi)
2898	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2899	movq	%rax, CREG_LDT(%rdi)
2900	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2901	movq	%rax, CREG_TASKR(%rdi)
2902	str	CREG_TASKR(%rdi)	/* 2 bytes */
2903	movq	%cr0, %rax
2904	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2905	movq	%cr2, %rax
2906	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2907	movq	%cr3, %rax
2908	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2909	movq	%cr4, %rax
2910	movq	%rax, CREG_CR8(%rdi)	/* cr4 */
2911	movq	%cr8, %rax
2912	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2913	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2914	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2915	SET_SIZE(getcregs)
2916
2917#undef GETMSR
2918
2919#elif defined(__i386)
2920
2921	ENTRY_NP(getcregs)
2922	movl	4(%esp), %edx
2923	movw	$0, CREG_GDT+6(%edx)
2924	movw	$0, CREG_IDT+6(%edx)
2925	sgdt	CREG_GDT(%edx)		/* gdt */
2926	sidt	CREG_IDT(%edx)		/* idt */
2927	sldt	CREG_LDT(%edx)		/* ldt */
2928	str	CREG_TASKR(%edx)	/* task */
2929	movl	%cr0, %eax
2930	movl	%eax, CREG_CR0(%edx)	/* cr0 */
2931	movl	%cr2, %eax
2932	movl	%eax, CREG_CR2(%edx)	/* cr2 */
2933	movl	%cr3, %eax
2934	movl	%eax, CREG_CR3(%edx)	/* cr3 */
2935	testl	$X86_LARGEPAGE, x86_feature
2936	jz	.nocr4
2937	movl	%cr4, %eax
2938	movl	%eax, CREG_CR4(%edx)	/* cr4 */
2939	jmp	.skip
2940.nocr4:
2941	movl	$0, CREG_CR4(%edx)
2942.skip:
2943	rep;	ret	/* use 2 byte return instruction when branch target */
2944			/* AMD Software Optimization Guide - Section 6.2 */
2945	SET_SIZE(getcregs)
2946
2947#endif	/* __i386 */
2948#endif	/* __lint */
2949
2950
2951/*
2952 * A panic trigger is a word which is updated atomically and can only be set
2953 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
2954 * previous value was 0, we succeed and return 1; otherwise return 0.
2955 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
2956 * has its own version of this function to allow it to panic correctly from
2957 * probe context.
2958 */
2959#if defined(__lint)
2960
2961/*ARGSUSED*/
2962int
2963panic_trigger(int *tp)
2964{ return (0); }
2965
2966/*ARGSUSED*/
2967int
2968dtrace_panic_trigger(int *tp)
2969{ return (0); }
2970
2971#else	/* __lint */
2972
2973#if defined(__amd64)
2974
2975	ENTRY_NP(panic_trigger)
2976	xorl	%eax, %eax
2977	movl	$0xdefacedd, %edx
2978	lock
2979	  xchgl	%edx, (%rdi)
2980	cmpl	$0, %edx
2981	je	0f
2982	movl	$0, %eax
2983	ret
29840:	movl	$1, %eax
2985	ret
2986	SET_SIZE(panic_trigger)
2987
2988	ENTRY_NP(dtrace_panic_trigger)
2989	xorl	%eax, %eax
2990	movl	$0xdefacedd, %edx
2991	lock
2992	  xchgl	%edx, (%rdi)
2993	cmpl	$0, %edx
2994	je	0f
2995	movl	$0, %eax
2996	ret
29970:	movl	$1, %eax
2998	ret
2999	SET_SIZE(dtrace_panic_trigger)
3000
3001#elif defined(__i386)
3002
3003	ENTRY_NP(panic_trigger)
3004	movl	4(%esp), %edx		/ %edx = address of trigger
3005	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3006	lock				/ assert lock
3007	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3008	cmpl	$0, %eax		/ if (%eax == 0x0)
3009	je	0f			/   return (1);
3010	movl	$0, %eax		/ else
3011	ret				/   return (0);
30120:	movl	$1, %eax
3013	ret
3014	SET_SIZE(panic_trigger)
3015
3016	ENTRY_NP(dtrace_panic_trigger)
3017	movl	4(%esp), %edx		/ %edx = address of trigger
3018	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3019	lock				/ assert lock
3020	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3021	cmpl	$0, %eax		/ if (%eax == 0x0)
3022	je	0f			/   return (1);
3023	movl	$0, %eax		/ else
3024	ret				/   return (0);
30250:	movl	$1, %eax
3026	ret
3027	SET_SIZE(dtrace_panic_trigger)
3028
3029#endif	/* __i386 */
3030#endif	/* __lint */
3031
3032/*
3033 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3034 * into the panic code implemented in panicsys().  vpanic() is responsible
3035 * for passing through the format string and arguments, and constructing a
3036 * regs structure on the stack into which it saves the current register
3037 * values.  If we are not dying due to a fatal trap, these registers will
3038 * then be preserved in panicbuf as the current processor state.  Before
3039 * invoking panicsys(), vpanic() activates the first panic trigger (see
3040 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3041 * DTrace takes a slightly different panic path if it must panic from probe
3042 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3043 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3044 * branches back into vpanic().
3045 */
3046#if defined(__lint)
3047
3048/*ARGSUSED*/
3049void
3050vpanic(const char *format, va_list alist)
3051{}
3052
3053/*ARGSUSED*/
3054void
3055dtrace_vpanic(const char *format, va_list alist)
3056{}
3057
3058#else	/* __lint */
3059
3060#if defined(__amd64)
3061
3062	ENTRY_NP(vpanic)			/* Initial stack layout: */
3063
3064	pushq	%rbp				/* | %rip | 	0x60	*/
3065	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3066	pushfq					/* | rfl  |	0x50	*/
3067	pushq	%r11				/* | %r11 |	0x48	*/
3068	pushq	%r10				/* | %r10 |	0x40	*/
3069	pushq	%rbx				/* | %rbx |	0x38	*/
3070	pushq	%rax				/* | %rax |	0x30	*/
3071	pushq	%r9				/* | %r9  |	0x28	*/
3072	pushq	%r8				/* | %r8  |	0x20	*/
3073	pushq	%rcx				/* | %rcx |	0x18	*/
3074	pushq	%rdx				/* | %rdx |	0x10	*/
3075	pushq	%rsi				/* | %rsi |	0x8 alist */
3076	pushq	%rdi				/* | %rdi |	0x0 format */
3077
3078	movq	%rsp, %rbx			/* %rbx = current %rsp */
3079
3080	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3081	call	panic_trigger			/* %eax = panic_trigger() */
3082
3083vpanic_common:
3084	cmpl	$0, %eax
3085	je	0f
3086
3087	/*
3088	 * If panic_trigger() was successful, we are the first to initiate a
3089	 * panic: we now switch to the reserved panic_stack before continuing.
3090	 */
3091	leaq	panic_stack(%rip), %rsp
3092	addq	$PANICSTKSIZE, %rsp
30930:	subq	$REGSIZE, %rsp
3094	/*
3095	 * Now that we've got everything set up, store the register values as
3096	 * they were when we entered vpanic() to the designated location in
3097	 * the regs structure we allocated on the stack.
3098	 */
3099	movq	0x0(%rbx), %rcx
3100	movq	%rcx, REGOFF_RDI(%rsp)
3101	movq	0x8(%rbx), %rcx
3102	movq	%rcx, REGOFF_RSI(%rsp)
3103	movq	0x10(%rbx), %rcx
3104	movq	%rcx, REGOFF_RDX(%rsp)
3105	movq	0x18(%rbx), %rcx
3106	movq	%rcx, REGOFF_RCX(%rsp)
3107	movq	0x20(%rbx), %rcx
3108
3109	movq	%rcx, REGOFF_R8(%rsp)
3110	movq	0x28(%rbx), %rcx
3111	movq	%rcx, REGOFF_R9(%rsp)
3112	movq	0x30(%rbx), %rcx
3113	movq	%rcx, REGOFF_RAX(%rsp)
3114	movq	0x38(%rbx), %rcx
3115	movq	%rbx, REGOFF_RBX(%rsp)
3116	movq	0x58(%rbx), %rcx
3117
3118	movq	%rcx, REGOFF_RBP(%rsp)
3119	movq	0x40(%rbx), %rcx
3120	movq	%rcx, REGOFF_R10(%rsp)
3121	movq	0x48(%rbx), %rcx
3122	movq	%rcx, REGOFF_R11(%rsp)
3123	movq	%r12, REGOFF_R12(%rsp)
3124
3125	movq	%r13, REGOFF_R13(%rsp)
3126	movq	%r14, REGOFF_R14(%rsp)
3127	movq	%r15, REGOFF_R15(%rsp)
3128
3129	movl	$MSR_AMD_FSBASE, %ecx
3130	rdmsr
3131	movl	%eax, REGOFF_FSBASE(%rsp)
3132	movl	%edx, REGOFF_FSBASE+4(%rsp)
3133
3134	movl	$MSR_AMD_GSBASE, %ecx
3135	rdmsr
3136	movl	%eax, REGOFF_GSBASE(%rsp)
3137	movl	%edx, REGOFF_GSBASE+4(%rsp)
3138
3139	xorl	%ecx, %ecx
3140	movw	%ds, %cx
3141	movq	%rcx, REGOFF_DS(%rsp)
3142	movw	%es, %cx
3143	movq	%rcx, REGOFF_ES(%rsp)
3144	movw	%fs, %cx
3145	movq	%rcx, REGOFF_FS(%rsp)
3146	movw	%gs, %cx
3147	movq	%rcx, REGOFF_GS(%rsp)
3148
3149	movq	$0, REGOFF_TRAPNO(%rsp)
3150
3151	movq	$0, REGOFF_ERR(%rsp)
3152	leaq	vpanic(%rip), %rcx
3153	movq	%rcx, REGOFF_RIP(%rsp)
3154	movw	%cs, %cx
3155	movzwq	%cx, %rcx
3156	movq	%rcx, REGOFF_CS(%rsp)
3157	movq	0x50(%rbx), %rcx
3158	movq	%rcx, REGOFF_RFL(%rsp)
3159	movq	%rbx, %rcx
3160	addq	$0x60, %rcx
3161	movq	%rcx, REGOFF_RSP(%rsp)
3162	movw	%ss, %cx
3163	movzwq	%cx, %rcx
3164	movq	%rcx, REGOFF_SS(%rsp)
3165
3166	/*
3167	 * panicsys(format, alist, rp, on_panic_stack)
3168	 */
3169	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3170	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3171	movq	%rsp, %rdx			/* struct regs */
3172	movl	%eax, %ecx			/* on_panic_stack */
3173	call	panicsys
3174	addq	$REGSIZE, %rsp
3175	popq	%rdi
3176	popq	%rsi
3177	popq	%rdx
3178	popq	%rcx
3179	popq	%r8
3180	popq	%r9
3181	popq	%rax
3182	popq	%rbx
3183	popq	%r10
3184	popq	%r11
3185	popfq
3186	leave
3187	ret
3188	SET_SIZE(vpanic)
3189
3190	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3191
3192	pushq	%rbp				/* | %rip | 	0x60	*/
3193	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3194	pushfq					/* | rfl  |	0x50	*/
3195	pushq	%r11				/* | %r11 |	0x48	*/
3196	pushq	%r10				/* | %r10 |	0x40	*/
3197	pushq	%rbx				/* | %rbx |	0x38	*/
3198	pushq	%rax				/* | %rax |	0x30	*/
3199	pushq	%r9				/* | %r9  |	0x28	*/
3200	pushq	%r8				/* | %r8  |	0x20	*/
3201	pushq	%rcx				/* | %rcx |	0x18	*/
3202	pushq	%rdx				/* | %rdx |	0x10	*/
3203	pushq	%rsi				/* | %rsi |	0x8 alist */
3204	pushq	%rdi				/* | %rdi |	0x0 format */
3205
3206	movq	%rsp, %rbx			/* %rbx = current %rsp */
3207
3208	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3209	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3210	jmp	vpanic_common
3211
3212	SET_SIZE(dtrace_vpanic)
3213
3214#elif defined(__i386)
3215
3216	ENTRY_NP(vpanic)			/ Initial stack layout:
3217
3218	pushl	%ebp				/ | %eip | 20
3219	movl	%esp, %ebp			/ | %ebp | 16
3220	pushl	%eax				/ | %eax | 12
3221	pushl	%ebx				/ | %ebx |  8
3222	pushl	%ecx				/ | %ecx |  4
3223	pushl	%edx				/ | %edx |  0
3224
3225	movl	%esp, %ebx			/ %ebx = current stack pointer
3226
3227	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3228	pushl	%eax				/ push &panic_quiesce
3229	call	panic_trigger			/ %eax = panic_trigger()
3230	addl	$4, %esp			/ reset stack pointer
3231
3232vpanic_common:
3233	cmpl	$0, %eax			/ if (%eax == 0)
3234	je	0f				/   goto 0f;
3235
3236	/*
3237	 * If panic_trigger() was successful, we are the first to initiate a
3238	 * panic: we now switch to the reserved panic_stack before continuing.
3239	 */
3240	lea	panic_stack, %esp		/ %esp  = panic_stack
3241	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3242
32430:	subl	$REGSIZE, %esp			/ allocate struct regs
3244
3245	/*
3246	 * Now that we've got everything set up, store the register values as
3247	 * they were when we entered vpanic() to the designated location in
3248	 * the regs structure we allocated on the stack.
3249	 */
3250#if !defined(__GNUC_AS__)
3251	movw	%gs, %edx
3252	movl	%edx, REGOFF_GS(%esp)
3253	movw	%fs, %edx
3254	movl	%edx, REGOFF_FS(%esp)
3255	movw	%es, %edx
3256	movl	%edx, REGOFF_ES(%esp)
3257	movw	%ds, %edx
3258	movl	%edx, REGOFF_DS(%esp)
3259#else	/* __GNUC_AS__ */
3260	mov	%gs, %edx
3261	mov	%edx, REGOFF_GS(%esp)
3262	mov	%fs, %edx
3263	mov	%edx, REGOFF_FS(%esp)
3264	mov	%es, %edx
3265	mov	%edx, REGOFF_ES(%esp)
3266	mov	%ds, %edx
3267	mov	%edx, REGOFF_DS(%esp)
3268#endif	/* __GNUC_AS__ */
3269	movl	%edi, REGOFF_EDI(%esp)
3270	movl	%esi, REGOFF_ESI(%esp)
3271	movl	16(%ebx), %ecx
3272	movl	%ecx, REGOFF_EBP(%esp)
3273	movl	%ebx, %ecx
3274	addl	$20, %ecx
3275	movl	%ecx, REGOFF_ESP(%esp)
3276	movl	8(%ebx), %ecx
3277	movl	%ecx, REGOFF_EBX(%esp)
3278	movl	0(%ebx), %ecx
3279	movl	%ecx, REGOFF_EDX(%esp)
3280	movl	4(%ebx), %ecx
3281	movl	%ecx, REGOFF_ECX(%esp)
3282	movl	12(%ebx), %ecx
3283	movl	%ecx, REGOFF_EAX(%esp)
3284	movl	$0, REGOFF_TRAPNO(%esp)
3285	movl	$0, REGOFF_ERR(%esp)
3286	lea	vpanic, %ecx
3287	movl	%ecx, REGOFF_EIP(%esp)
3288#if !defined(__GNUC_AS__)
3289	movw	%cs, %edx
3290#else	/* __GNUC_AS__ */
3291	mov	%cs, %edx
3292#endif	/* __GNUC_AS__ */
3293	movl	%edx, REGOFF_CS(%esp)
3294	pushfl
3295	popl	%ecx
3296	movl	%ecx, REGOFF_EFL(%esp)
3297	movl	$0, REGOFF_UESP(%esp)
3298#if !defined(__GNUC_AS__)
3299	movw	%ss, %edx
3300#else	/* __GNUC_AS__ */
3301	mov	%ss, %edx
3302#endif	/* __GNUC_AS__ */
3303	movl	%edx, REGOFF_SS(%esp)
3304
3305	movl	%esp, %ecx			/ %ecx = &regs
3306	pushl	%eax				/ push on_panic_stack
3307	pushl	%ecx				/ push &regs
3308	movl	12(%ebp), %ecx			/ %ecx = alist
3309	pushl	%ecx				/ push alist
3310	movl	8(%ebp), %ecx			/ %ecx = format
3311	pushl	%ecx				/ push format
3312	call	panicsys			/ panicsys();
3313	addl	$16, %esp			/ pop arguments
3314
3315	addl	$REGSIZE, %esp
3316	popl	%edx
3317	popl	%ecx
3318	popl	%ebx
3319	popl	%eax
3320	leave
3321	ret
3322	SET_SIZE(vpanic)
3323
3324	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3325
3326	pushl	%ebp				/ | %eip | 20
3327	movl	%esp, %ebp			/ | %ebp | 16
3328	pushl	%eax				/ | %eax | 12
3329	pushl	%ebx				/ | %ebx |  8
3330	pushl	%ecx				/ | %ecx |  4
3331	pushl	%edx				/ | %edx |  0
3332
3333	movl	%esp, %ebx			/ %ebx = current stack pointer
3334
3335	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3336	pushl	%eax				/ push &panic_quiesce
3337	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3338	addl	$4, %esp			/ reset stack pointer
3339	jmp	vpanic_common			/ jump back to common code
3340
3341	SET_SIZE(dtrace_vpanic)
3342
3343#endif	/* __i386 */
3344#endif	/* __lint */
3345
3346#if defined(__lint)
3347
3348void
3349hres_tick(void)
3350{}
3351
3352int64_t timedelta;
3353hrtime_t hres_last_tick;
3354timestruc_t hrestime;
3355int64_t hrestime_adj;
3356volatile int hres_lock;
3357uint_t nsec_scale;
3358hrtime_t hrtime_base;
3359
3360#else	/* __lint */
3361
3362	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3363	.NWORD	0, 0
3364
3365	DGDEF3(hrestime_adj, 8, 8)
3366	.long	0, 0
3367
3368	DGDEF3(hres_last_tick, 8, 8)
3369	.long	0, 0
3370
3371	DGDEF3(timedelta, 8, 8)
3372	.long	0, 0
3373
3374	DGDEF3(hres_lock, 4, 8)
3375	.long	0
3376
3377	/*
3378	 * initialized to a non zero value to make pc_gethrtime()
3379	 * work correctly even before clock is initialized
3380	 */
3381	DGDEF3(hrtime_base, 8, 8)
3382	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3383
3384	DGDEF3(adj_shift, 4, 4)
3385	.long	ADJ_SHIFT
3386
3387#if defined(__amd64)
3388
3389	ENTRY_NP(hres_tick)
3390	pushq	%rbp
3391	movq	%rsp, %rbp
3392
3393	/*
3394	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3395	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3396	 * At worst, performing this now instead of under CLOCK_LOCK may
3397	 * introduce some jitter in pc_gethrestime().
3398	 */
3399	call	*gethrtimef(%rip)
3400	movq	%rax, %r8
3401
3402	leaq	hres_lock(%rip), %rax
3403	movb	$-1, %dl
3404.CL1:
3405	xchgb	%dl, (%rax)
3406	testb	%dl, %dl
3407	jz	.CL3			/* got it */
3408.CL2:
3409	cmpb	$0, (%rax)		/* possible to get lock? */
3410	pause
3411	jne	.CL2
3412	jmp	.CL1			/* yes, try again */
3413.CL3:
3414	/*
3415	 * compute the interval since last time hres_tick was called
3416	 * and adjust hrtime_base and hrestime accordingly
3417	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3418	 * a timestruc_t (sec, nsec)
3419	 */
3420	leaq	hres_last_tick(%rip), %rax
3421	movq	%r8, %r11
3422	subq	(%rax), %r8
3423	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3424	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3425	/*
3426	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3427	 */
3428	movq	%r11, (%rax)
3429
3430	call	__adj_hrestime
3431
3432	/*
3433	 * release the hres_lock
3434	 */
3435	incl	hres_lock(%rip)
3436	leave
3437	ret
3438	SET_SIZE(hres_tick)
3439
3440#elif defined(__i386)
3441
3442	ENTRY_NP(hres_tick)
3443	pushl	%ebp
3444	movl	%esp, %ebp
3445	pushl	%esi
3446	pushl	%ebx
3447
3448	/*
3449	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3450	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3451	 * At worst, performing this now instead of under CLOCK_LOCK may
3452	 * introduce some jitter in pc_gethrestime().
3453	 */
3454	call	*gethrtimef
3455	movl	%eax, %ebx
3456	movl	%edx, %esi
3457
3458	movl	$hres_lock, %eax
3459	movl	$-1, %edx
3460.CL1:
3461	xchgb	%dl, (%eax)
3462	testb	%dl, %dl
3463	jz	.CL3			/ got it
3464.CL2:
3465	cmpb	$0, (%eax)		/ possible to get lock?
3466	pause
3467	jne	.CL2
3468	jmp	.CL1			/ yes, try again
3469.CL3:
3470	/*
3471	 * compute the interval since last time hres_tick was called
3472	 * and adjust hrtime_base and hrestime accordingly
3473	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3474	 * timestruc_t (sec, nsec)
3475	 */
3476
3477	lea	hres_last_tick, %eax
3478
3479	movl	%ebx, %edx
3480	movl	%esi, %ecx
3481
3482	subl 	(%eax), %edx
3483	sbbl 	4(%eax), %ecx
3484
3485	addl	%edx, hrtime_base	/ add interval to hrtime_base
3486	adcl	%ecx, hrtime_base+4
3487
3488	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3489
3490	/
3491	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3492	/
3493	movl	%ebx, (%eax)
3494	movl	%esi,  4(%eax)
3495
3496	/ get hrestime at this moment. used as base for pc_gethrestime
3497	/
3498	/ Apply adjustment, if any
3499	/
3500	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3501	/ (max_hres_adj)
3502	/
3503	/ void
3504	/ adj_hrestime()
3505	/ {
3506	/	long long adj;
3507	/
3508	/	if (hrestime_adj == 0)
3509	/		adj = 0;
3510	/	else if (hrestime_adj > 0) {
3511	/		if (hrestime_adj < HRES_ADJ)
3512	/			adj = hrestime_adj;
3513	/		else
3514	/			adj = HRES_ADJ;
3515	/	}
3516	/	else {
3517	/		if (hrestime_adj < -(HRES_ADJ))
3518	/			adj = -(HRES_ADJ);
3519	/		else
3520	/			adj = hrestime_adj;
3521	/	}
3522	/
3523	/	timedelta -= adj;
3524	/	hrestime_adj = timedelta;
3525	/	hrestime.tv_nsec += adj;
3526	/
3527	/	while (hrestime.tv_nsec >= NANOSEC) {
3528	/		one_sec++;
3529	/		hrestime.tv_sec++;
3530	/		hrestime.tv_nsec -= NANOSEC;
3531	/	}
3532	/ }
3533__adj_hrestime:
3534	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3535	movl	hrestime_adj+4, %edx
3536	andl	%esi, %esi
3537	jne	.CL4			/ no
3538	andl	%edx, %edx
3539	jne	.CL4			/ no
3540	subl	%ecx, %ecx		/ yes, adj = 0;
3541	subl	%edx, %edx
3542	jmp	.CL5
3543.CL4:
3544	subl	%ecx, %ecx
3545	subl	%eax, %eax
3546	subl	%esi, %ecx
3547	sbbl	%edx, %eax
3548	andl	%eax, %eax		/ if (hrestime_adj > 0)
3549	jge	.CL6
3550
3551	/ In the following comments, HRES_ADJ is used, while in the code
3552	/ max_hres_adj is used.
3553	/
3554	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3555	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3556	/ on the logical equivalence of:
3557	/
3558	/	!(hrestime_adj < HRES_ADJ)
3559	/
3560	/ and the two step sequence:
3561	/
3562	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3563	/
3564	/ which computes whether or not the least significant 32-bits
3565	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3566	/
3567	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3568	/
3569	/ which generates a carry whenever step 1 is true or the most
3570	/ significant long of the longlong hrestime_adj is non-zero.
3571
3572	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3573	subl	%esi, %ecx
3574	movl	%edx, %eax
3575	adcl	$-1, %eax
3576	jnc	.CL7
3577	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3578	subl	%edx, %edx
3579	jmp	.CL5
3580
3581	/ The following computation is similar to the one above.
3582	/
3583	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3584	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3585	/ on the logical equivalence of:
3586	/
3587	/	(hrestime_adj > -HRES_ADJ)
3588	/
3589	/ and the two step sequence:
3590	/
3591	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3592	/
3593	/ which means the least significant 32-bits of hrestime_adj is
3594	/ greater than -HRES_ADJ, followed by:
3595	/
3596	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3597	/
3598	/ which generates a carry only when step 1 is true and the most
3599	/ significant long of the longlong hrestime_adj is -1.
3600
3601.CL6:					/ hrestime_adj is negative
3602	movl	%esi, %ecx
3603	addl	max_hres_adj, %ecx
3604	movl	%edx, %eax
3605	adcl	$0, %eax
3606	jc	.CL7
3607	xor	%ecx, %ecx
3608	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3609	movl	$-1, %edx
3610	jmp	.CL5
3611.CL7:
3612	movl	%esi, %ecx		/ adj = hrestime_adj;
3613.CL5:
3614	movl	timedelta, %esi
3615	subl	%ecx, %esi
3616	movl	timedelta+4, %eax
3617	sbbl	%edx, %eax
3618	movl	%esi, timedelta
3619	movl	%eax, timedelta+4	/ timedelta -= adj;
3620	movl	%esi, hrestime_adj
3621	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3622	addl	hrestime+4, %ecx
3623
3624	movl	%ecx, %eax		/ eax = tv_nsec
36251:
3626	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3627	jb	.CL8			/ no
3628	incl	one_sec			/ yes,  one_sec++;
3629	incl	hrestime		/ hrestime.tv_sec++;
3630	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3631	jmp	1b			/ check for more seconds
3632
3633.CL8:
3634	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3635	incl	hres_lock		/ release the hres_lock
3636
3637	popl	%ebx
3638	popl	%esi
3639	leave
3640	ret
3641	SET_SIZE(hres_tick)
3642
3643#endif	/* __i386 */
3644#endif	/* __lint */
3645
3646/*
3647 * void prefetch_smap_w(void *)
3648 *
3649 * Prefetch ahead within a linear list of smap structures.
3650 * Not implemented for ia32.  Stub for compatibility.
3651 */
3652
3653#if defined(__lint)
3654
3655/*ARGSUSED*/
3656void prefetch_smap_w(void *smp)
3657{}
3658
3659#else	/* __lint */
3660
3661	ENTRY(prefetch_smap_w)
3662	rep;	ret	/* use 2 byte return instruction when branch target */
3663			/* AMD Software Optimization Guide - Section 6.2 */
3664	SET_SIZE(prefetch_smap_w)
3665
3666#endif	/* __lint */
3667
3668/*
3669 * prefetch_page_r(page_t *)
3670 * issue prefetch instructions for a page_t
3671 */
3672#if defined(__lint)
3673
3674/*ARGSUSED*/
3675void
3676prefetch_page_r(void *pp)
3677{}
3678
3679#else	/* __lint */
3680
3681	ENTRY(prefetch_page_r)
3682	rep;	ret	/* use 2 byte return instruction when branch target */
3683			/* AMD Software Optimization Guide - Section 6.2 */
3684	SET_SIZE(prefetch_page_r)
3685
3686#endif	/* __lint */
3687
3688#if defined(__lint)
3689
3690/*ARGSUSED*/
3691int
3692bcmp(const void *s1, const void *s2, size_t count)
3693{ return (0); }
3694
3695#else   /* __lint */
3696
3697#if defined(__amd64)
3698
3699	ENTRY(bcmp)
3700	pushq	%rbp
3701	movq	%rsp, %rbp
3702#ifdef DEBUG
3703	movq	kernelbase(%rip), %r11
3704	cmpq	%r11, %rdi
3705	jb	0f
3706	cmpq	%r11, %rsi
3707	jnb	1f
37080:	leaq	.bcmp_panic_msg(%rip), %rdi
3709	xorl	%eax, %eax
3710	call	panic
37111:
3712#endif	/* DEBUG */
3713	call	memcmp
3714	testl	%eax, %eax
3715	setne	%dl
3716	leave
3717	movzbl	%dl, %eax
3718	ret
3719	SET_SIZE(bcmp)
3720
3721#elif defined(__i386)
3722
3723#define	ARG_S1		8
3724#define	ARG_S2		12
3725#define	ARG_LENGTH	16
3726
3727	ENTRY(bcmp)
3728#ifdef DEBUG
3729	pushl   %ebp
3730	movl    %esp, %ebp
3731	movl    kernelbase, %eax
3732	cmpl    %eax, ARG_S1(%ebp)
3733	jb	0f
3734	cmpl    %eax, ARG_S2(%ebp)
3735	jnb	1f
37360:	pushl   $.bcmp_panic_msg
3737	call    panic
37381:	popl    %ebp
3739#endif	/* DEBUG */
3740
3741	pushl	%edi		/ save register variable
3742	movl	ARG_S1(%esp), %eax	/ %eax = address of string 1
3743	movl	ARG_S2(%esp), %ecx	/ %ecx = address of string 2
3744	cmpl	%eax, %ecx	/ if the same string
3745	je	.equal		/ goto .equal
3746	movl	ARG_LENGTH(%esp), %edi	/ %edi = length in bytes
3747	cmpl	$4, %edi	/ if %edi < 4
3748	jb	.byte_check	/ goto .byte_check
3749	.align	4
3750.word_loop:
3751	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3752	leal	-4(%edi), %edi	/ %edi -= 4
3753	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3754	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3755	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3756	leal	4(%eax), %eax	/ %eax += 4 (next word)
3757	cmpl	$4, %edi	/ if %edi >= 4
3758	jae	.word_loop	/ goto .word_loop
3759.byte_check:
3760	cmpl	$0, %edi	/ if %edi == 0
3761	je	.equal		/ goto .equal
3762	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3763.word_not_equal:
3764	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3765	.align	4
3766.byte_loop:
3767	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3768	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3769	jne	.not_equal	/ if not equal, goto .not_equal
3770	incl	%ecx		/ %ecx++ (next byte)
3771	incl	%eax		/ %eax++ (next byte)
3772	decl	%edi		/ %edi--
3773	jnz	.byte_loop	/ if not zero, goto .byte_loop
3774.equal:
3775	xorl	%eax, %eax	/ %eax = 0
3776	popl	%edi		/ restore register variable
3777	ret			/ return (NULL)
3778	.align	4
3779.not_equal:
3780	movl	$1, %eax	/ return 1
3781	popl	%edi		/ restore register variable
3782	ret			/ return (NULL)
3783	SET_SIZE(bcmp)
3784
3785#endif	/* __i386 */
3786
3787#ifdef DEBUG
3788	.text
3789.bcmp_panic_msg:
3790	.string "bcmp: arguments below kernelbase"
3791#endif	/* DEBUG */
3792
3793#endif	/* __lint */
3794