xref: /titanic_51/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#else	/* __lint */
63#include "assym.h"
64#endif	/* __lint */
65#include <sys/dditypes.h>
66
67/*
68 * on_fault()
69 * Catch lofault faults. Like setjmp except it returns one
70 * if code following causes uncorrectable fault. Turned off
71 * by calling no_fault().
72 */
73
74#if defined(__lint)
75
76/* ARGSUSED */
77int
78on_fault(label_t *ljb)
79{ return (0); }
80
81void
82no_fault(void)
83{}
84
85#else	/* __lint */
86
87#if defined(__amd64)
88
89	ENTRY(on_fault)
90	movq	%gs:CPU_THREAD, %rsi
91	leaq	catch_fault(%rip), %rdx
92	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
93	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
94	jmp	setjmp				/* let setjmp do the rest */
95
96catch_fault:
97	movq	%gs:CPU_THREAD, %rsi
98	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
99	xorl	%eax, %eax
100	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
101	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
102	jmp	longjmp				/* let longjmp do the rest */
103	SET_SIZE(on_fault)
104
105	ENTRY(no_fault)
106	movq	%gs:CPU_THREAD, %rsi
107	xorl	%eax, %eax
108	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
109	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
110	ret
111	SET_SIZE(no_fault)
112
113#elif defined(__i386)
114
115	ENTRY(on_fault)
116	movl	%gs:CPU_THREAD, %edx
117	movl	4(%esp), %eax			/* jumpbuf address */
118	leal	catch_fault, %ecx
119	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
120	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
121	jmp	setjmp				/* let setjmp do the rest */
122
123catch_fault:
124	movl	%gs:CPU_THREAD, %edx
125	xorl	%eax, %eax
126	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
127	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
128	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
129	pushl	%ecx
130	call	longjmp				/* let longjmp do the rest */
131	SET_SIZE(on_fault)
132
133	ENTRY(no_fault)
134	movl	%gs:CPU_THREAD, %edx
135	xorl	%eax, %eax
136	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
137	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
138	ret
139	SET_SIZE(no_fault)
140
141#endif	/* __i386 */
142#endif	/* __lint */
143
144/*
145 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
146 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
147 */
148
149#if defined(lint)
150
151void
152on_trap_trampoline(void)
153{}
154
155#else	/* __lint */
156
157#if defined(__amd64)
158
159	ENTRY(on_trap_trampoline)
160	movq	%gs:CPU_THREAD, %rsi
161	movq	T_ONTRAP(%rsi), %rdi
162	addq	$OT_JMPBUF, %rdi
163	jmp	longjmp
164	SET_SIZE(on_trap_trampoline)
165
166#elif defined(__i386)
167
168	ENTRY(on_trap_trampoline)
169	movl	%gs:CPU_THREAD, %eax
170	movl	T_ONTRAP(%eax), %eax
171	addl	$OT_JMPBUF, %eax
172	pushl	%eax
173	call	longjmp
174	SET_SIZE(on_trap_trampoline)
175
176#endif	/* __i386 */
177#endif	/* __lint */
178
179/*
180 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
181 * more information about the on_trap() mechanism.  If the on_trap_data is the
182 * same as the topmost stack element, we just modify that element.
183 */
184#if defined(lint)
185
186/*ARGSUSED*/
187int
188on_trap(on_trap_data_t *otp, uint_t prot)
189{ return (0); }
190
191#else	/* __lint */
192
193#if defined(__amd64)
194
195	ENTRY(on_trap)
196	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
197	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
198	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
199	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
200	xorl	%ecx, %ecx
201	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
202	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
203	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
204	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
205	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
206	je	0f				/*	don't modify t_ontrap */
207
208	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
209	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
210
2110:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
212	jmp	setjmp
213	SET_SIZE(on_trap)
214
215#elif defined(__i386)
216
217	ENTRY(on_trap)
218	movl	4(%esp), %eax			/* %eax = otp */
219	movl	8(%esp), %edx			/* %edx = prot */
220
221	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
222	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
223	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
224	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
225	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
226	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
227	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
228	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
229	cmpl	%eax, %ecx			/* if (otp == %ecx) */
230	je	0f				/*    don't modify t_ontrap */
231
232	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
233	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
234
2350:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
236	movl	%eax, 4(%esp)			/* put %eax back on the stack */
237	jmp	setjmp				/* let setjmp do the rest */
238	SET_SIZE(on_trap)
239
240#endif	/* __i386 */
241#endif	/* __lint */
242
243/*
244 * Setjmp and longjmp implement non-local gotos using state vectors
245 * type label_t.
246 */
247
248#if defined(__lint)
249
250/* ARGSUSED */
251int
252setjmp(label_t *lp)
253{ return (0); }
254
255/* ARGSUSED */
256void
257longjmp(label_t *lp)
258{}
259
260#else	/* __lint */
261
262#if LABEL_PC != 0
263#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
264#endif	/* LABEL_PC != 0 */
265
266#if defined(__amd64)
267
268	ENTRY(setjmp)
269	movq	%rsp, LABEL_SP(%rdi)
270	movq	%rbp, LABEL_RBP(%rdi)
271	movq	%rbx, LABEL_RBX(%rdi)
272	movq	%r12, LABEL_R12(%rdi)
273	movq	%r13, LABEL_R13(%rdi)
274	movq	%r14, LABEL_R14(%rdi)
275	movq	%r15, LABEL_R15(%rdi)
276	movq	(%rsp), %rdx		/* return address */
277	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
278	xorl	%eax, %eax		/* return 0 */
279	ret
280	SET_SIZE(setjmp)
281
282	ENTRY(longjmp)
283	movq	LABEL_SP(%rdi), %rsp
284	movq	LABEL_RBP(%rdi), %rbp
285	movq	LABEL_RBX(%rdi), %rbx
286	movq	LABEL_R12(%rdi), %r12
287	movq	LABEL_R13(%rdi), %r13
288	movq	LABEL_R14(%rdi), %r14
289	movq	LABEL_R15(%rdi), %r15
290	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
291	movq	%rdx, (%rsp)
292	xorl	%eax, %eax
293	incl	%eax			/* return 1 */
294	ret
295	SET_SIZE(longjmp)
296
297#elif defined(__i386)
298
299	ENTRY(setjmp)
300	movl	4(%esp), %edx		/* address of save area */
301	movl	%ebp, LABEL_EBP(%edx)
302	movl	%ebx, LABEL_EBX(%edx)
303	movl	%esi, LABEL_ESI(%edx)
304	movl	%edi, LABEL_EDI(%edx)
305	movl	%esp, 4(%edx)
306	movl	(%esp), %ecx		/* %eip (return address) */
307	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
308	subl	%eax, %eax		/* return 0 */
309	ret
310	SET_SIZE(setjmp)
311
312	ENTRY(longjmp)
313	movl	4(%esp), %edx		/* address of save area */
314	movl	LABEL_EBP(%edx), %ebp
315	movl	LABEL_EBX(%edx), %ebx
316	movl	LABEL_ESI(%edx), %esi
317	movl	LABEL_EDI(%edx), %edi
318	movl	4(%edx), %esp
319	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
320	movl	$1, %eax
321	addl	$4, %esp		/* pop ret adr */
322	jmp	*%ecx			/* indirect */
323	SET_SIZE(longjmp)
324
325#endif	/* __i386 */
326#endif	/* __lint */
327
328/*
329 * if a() calls b() calls caller(),
330 * caller() returns return address in a().
331 * (Note: We assume a() and b() are C routines which do the normal entry/exit
332 *  sequence.)
333 */
334
335#if defined(__lint)
336
337caddr_t
338caller(void)
339{ return (0); }
340
341#else	/* __lint */
342
343#if defined(__amd64)
344
345	ENTRY(caller)
346	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
347	ret
348	SET_SIZE(caller)
349
350#elif defined(__i386)
351
352	ENTRY(caller)
353	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
354	ret
355	SET_SIZE(caller)
356
357#endif	/* __i386 */
358#endif	/* __lint */
359
360/*
361 * if a() calls callee(), callee() returns the
362 * return address in a();
363 */
364
365#if defined(__lint)
366
367caddr_t
368callee(void)
369{ return (0); }
370
371#else	/* __lint */
372
373#if defined(__amd64)
374
375	ENTRY(callee)
376	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
377	ret
378	SET_SIZE(callee)
379
380#elif defined(__i386)
381
382	ENTRY(callee)
383	movl	(%esp), %eax		/* callee()'s return pc, in a() */
384	ret
385	SET_SIZE(callee)
386
387#endif	/* __i386 */
388#endif	/* __lint */
389
390/*
391 * return the current frame pointer
392 */
393
394#if defined(__lint)
395
396greg_t
397getfp(void)
398{ return (0); }
399
400#else	/* __lint */
401
402#if defined(__amd64)
403
404	ENTRY(getfp)
405	movq	%rbp, %rax
406	ret
407	SET_SIZE(getfp)
408
409#elif defined(__i386)
410
411	ENTRY(getfp)
412	movl	%ebp, %eax
413	ret
414	SET_SIZE(getfp)
415
416#endif	/* __i386 */
417#endif	/* __lint */
418
419/*
420 * Invalidate a single page table entry in the TLB
421 */
422
423#if defined(__lint)
424
425/* ARGSUSED */
426void
427mmu_tlbflush_entry(caddr_t m)
428{}
429
430#else	/* __lint */
431
432#if defined(__amd64)
433
434	ENTRY(mmu_tlbflush_entry)
435	invlpg	(%rdi)
436	ret
437	SET_SIZE(mmu_tlbflush_entry)
438
439#elif defined(__i386)
440
441	ENTRY(mmu_tlbflush_entry)
442	movl	4(%esp), %eax
443	invlpg	(%eax)
444	ret
445	SET_SIZE(mmu_tlbflush_entry)
446
447#endif	/* __i386 */
448#endif	/* __lint */
449
450
451/*
452 * Get/Set the value of various control registers
453 */
454
455#if defined(__lint)
456
457ulong_t
458getcr0(void)
459{ return (0); }
460
461/* ARGSUSED */
462void
463setcr0(ulong_t value)
464{}
465
466ulong_t
467getcr2(void)
468{ return (0); }
469
470ulong_t
471getcr3(void)
472{ return (0); }
473
474/* ARGSUSED */
475void
476setcr3(ulong_t val)
477{}
478
479void
480reload_cr3(void)
481{}
482
483ulong_t
484getcr4(void)
485{ return (0); }
486
487/* ARGSUSED */
488void
489setcr4(ulong_t val)
490{}
491
492#if defined(__amd64)
493
494ulong_t
495getcr8(void)
496{ return (0); }
497
498/* ARGSUSED */
499void
500setcr8(ulong_t val)
501{}
502
503#endif	/* __amd64 */
504
505#else	/* __lint */
506
507#if defined(__amd64)
508
509	ENTRY(getcr0)
510	movq	%cr0, %rax
511	ret
512	SET_SIZE(getcr0)
513
514	ENTRY(setcr0)
515	movq	%rdi, %cr0
516	ret
517	SET_SIZE(setcr0)
518
519	ENTRY(getcr2)
520	movq	%cr2, %rax
521	ret
522	SET_SIZE(getcr2)
523
524	ENTRY(getcr3)
525	movq	%cr3, %rax
526	ret
527	SET_SIZE(getcr3)
528
529	ENTRY(setcr3)
530	movq	%rdi, %cr3
531	ret
532	SET_SIZE(setcr3)
533
534	ENTRY(reload_cr3)
535	movq	%cr3, %rdi
536	movq	%rdi, %cr3
537	ret
538	SET_SIZE(reload_cr3)
539
540	ENTRY(getcr4)
541	movq	%cr4, %rax
542	ret
543	SET_SIZE(getcr4)
544
545	ENTRY(setcr4)
546	movq	%rdi, %cr4
547	ret
548	SET_SIZE(setcr4)
549
550	ENTRY(getcr8)
551	movq	%cr8, %rax
552	ret
553	SET_SIZE(getcr8)
554
555	ENTRY(setcr8)
556	movq	%rdi, %cr8
557	ret
558	SET_SIZE(setcr8)
559
560#elif defined(__i386)
561
562        ENTRY(getcr0)
563        movl    %cr0, %eax
564        ret
565	SET_SIZE(getcr0)
566
567        ENTRY(setcr0)
568        movl    4(%esp), %eax
569        movl    %eax, %cr0
570        ret
571	SET_SIZE(setcr0)
572
573        ENTRY(getcr2)
574        movl    %cr2, %eax
575        ret
576	SET_SIZE(getcr2)
577
578	ENTRY(getcr3)
579	movl    %cr3, %eax
580	ret
581	SET_SIZE(getcr3)
582
583        ENTRY(setcr3)
584        movl    4(%esp), %eax
585        movl    %eax, %cr3
586        ret
587	SET_SIZE(setcr3)
588
589	ENTRY(reload_cr3)
590	movl    %cr3, %eax
591	movl    %eax, %cr3
592	ret
593	SET_SIZE(reload_cr3)
594
595	ENTRY(getcr4)
596	movl    %cr4, %eax
597	ret
598	SET_SIZE(getcr4)
599
600        ENTRY(setcr4)
601        movl    4(%esp), %eax
602        movl    %eax, %cr4
603        ret
604	SET_SIZE(setcr4)
605
606#endif	/* __i386 */
607#endif	/* __lint */
608
609#if defined(__lint)
610
611/*ARGSUSED*/
612uint32_t
613__cpuid_insn(uint32_t eax, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp)
614{ return (0); }
615
616#else	/* __lint */
617
618#if defined(__amd64)
619
620	ENTRY(__cpuid_insn)
621	movq	%rbx, %r11
622	movq	%rdx, %r8	/* r8 = ecxp */
623	movq	%rcx, %r9	/* r9 = edxp */
624	movl	%edi, %eax
625	cpuid
626	movl	%ebx, (%rsi)
627	movl	%ecx, (%r8)
628	movl	%edx, (%r9)
629	movq	%r11, %rbx
630	ret
631	SET_SIZE(__cpuid_insn)
632
633#elif defined(__i386)
634
635        ENTRY(__cpuid_insn)
636	pushl	%ebp
637	movl	%esp, %ebp
638	pushl	%ebx
639	movl	8(%ebp), %eax
640	cpuid
641	pushl	%eax
642	movl	0x0c(%ebp), %eax
643	movl	%ebx, (%eax)
644	movl	0x10(%ebp), %eax
645	movl	%ecx, (%eax)
646	movl	0x14(%ebp), %eax
647	movl	%edx, (%eax)
648	popl	%eax
649	popl	%ebx
650	popl	%ebp
651	ret
652	SET_SIZE(__cpuid_insn)
653
654#endif	/* __i386 */
655#endif	/* __lint */
656
657/*
658 * Insert entryp after predp in a doubly linked list.
659 */
660
661#if defined(__lint)
662
663/*ARGSUSED*/
664void
665_insque(caddr_t entryp, caddr_t predp)
666{}
667
668#else	/* __lint */
669
670#if defined(__amd64)
671
672	ENTRY(_insque)
673	movq	(%rsi), %rax		/* predp->forw 			*/
674	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
675	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
676	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
677	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
678	ret
679	SET_SIZE(_insque)
680
681#elif defined(__i386)
682
683	ENTRY(_insque)
684	movl	8(%esp), %edx
685	movl	4(%esp), %ecx
686	movl	(%edx), %eax		/* predp->forw			*/
687	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
688	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
689	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
690	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
691	ret
692	SET_SIZE(_insque)
693
694#endif	/* __i386 */
695#endif	/* __lint */
696
697/*
698 * Remove entryp from a doubly linked list
699 */
700
701#if defined(__lint)
702
703/*ARGSUSED*/
704void
705_remque(caddr_t entryp)
706{}
707
708#else	/* __lint */
709
710#if defined(__amd64)
711
712	ENTRY(_remque)
713	movq	(%rdi), %rax		/* entry->forw */
714	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
715	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
716	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
717	ret
718	SET_SIZE(_remque)
719
720#elif defined(__i386)
721
722	ENTRY(_remque)
723	movl	4(%esp), %ecx
724	movl	(%ecx), %eax		/* entry->forw */
725	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
726	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
727	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
728	ret
729	SET_SIZE(_remque)
730
731#endif	/* __i386 */
732#endif	/* __lint */
733
734/*
735 * Returns the number of
736 * non-NULL bytes in string argument.
737 */
738
739#if defined(__lint)
740
741/* ARGSUSED */
742size_t
743strlen(const char *str)
744{ return (0); }
745
746#else	/* __lint */
747
748#if defined(__amd64)
749
750/*
751 * This is close to a simple transliteration of a C version of this
752 * routine.  We should either just -make- this be a C version, or
753 * justify having it in assembler by making it significantly faster.
754 *
755 * size_t
756 * strlen(const char *s)
757 * {
758 *	const char *s0;
759 * #if defined(DEBUG)
760 *	if ((uintptr_t)s < KERNELBASE)
761 *		panic(.str_panic_msg);
762 * #endif
763 *	for (s0 = s; *s; s++)
764 *		;
765 *	return (s - s0);
766 * }
767 */
768
769	ENTRY(strlen)
770#ifdef DEBUG
771	movq	kernelbase(%rip), %rax
772	cmpq	%rax, %rdi
773	jae	str_valid
774	pushq	%rbp
775	movq	%rsp, %rbp
776	leaq	.str_panic_msg(%rip), %rdi
777	xorl	%eax, %eax
778	call	panic
779#endif	/* DEBUG */
780str_valid:
781	cmpb	$0, (%rdi)
782	movq	%rdi, %rax
783	je	.null_found
784	.align	4
785.strlen_loop:
786	incq	%rdi
787	cmpb	$0, (%rdi)
788	jne	.strlen_loop
789.null_found:
790	subq	%rax, %rdi
791	movq	%rdi, %rax
792	ret
793	SET_SIZE(strlen)
794
795#elif defined(__i386)
796
797	ENTRY(strlen)
798#ifdef DEBUG
799	movl	kernelbase, %eax
800	cmpl	%eax, 4(%esp)
801	jae	str_valid
802	pushl	%ebp
803	movl	%esp, %ebp
804	pushl	$.str_panic_msg
805	call	panic
806#endif /* DEBUG */
807
808str_valid:
809	movl	4(%esp), %eax		/* %eax = string address */
810	testl	$3, %eax		/* if %eax not word aligned */
811	jnz	.not_word_aligned	/* goto .not_word_aligned */
812	.align	4
813.word_aligned:
814	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
815	movl	$0x7f7f7f7f, %ecx
816	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
817	addl	$4, %eax		/* next word */
818	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
819	orl	%edx, %ecx		/* %ecx |= %edx */
820	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
821	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
822	je	.word_aligned		/* goto .word_aligned */
823	subl	$4, %eax		/* post-incremented */
824.not_word_aligned:
825	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
826	je	.null_found		/* goto .null_found */
827	incl	%eax			/* next byte */
828	testl	$3, %eax		/* if %eax not word aligned */
829	jnz	.not_word_aligned	/* goto .not_word_aligned */
830	jmp	.word_aligned		/* goto .word_aligned */
831	.align	4
832.null_found:
833	subl	4(%esp), %eax		/* %eax -= string address */
834	ret
835	SET_SIZE(strlen)
836
837#endif	/* __i386 */
838
839#ifdef DEBUG
840	.text
841.str_panic_msg:
842	.string "strlen: argument below kernelbase"
843#endif /* DEBUG */
844
845#endif	/* __lint */
846
847	/*
848	 * Berkley 4.3 introduced symbolically named interrupt levels
849	 * as a way deal with priority in a machine independent fashion.
850	 * Numbered priorities are machine specific, and should be
851	 * discouraged where possible.
852	 *
853	 * Note, for the machine specific priorities there are
854	 * examples listed for devices that use a particular priority.
855	 * It should not be construed that all devices of that
856	 * type should be at that priority.  It is currently were
857	 * the current devices fit into the priority scheme based
858	 * upon time criticalness.
859	 *
860	 * The underlying assumption of these assignments is that
861	 * IPL 10 is the highest level from which a device
862	 * routine can call wakeup.  Devices that interrupt from higher
863	 * levels are restricted in what they can do.  If they need
864	 * kernels services they should schedule a routine at a lower
865	 * level (via software interrupt) to do the required
866	 * processing.
867	 *
868	 * Examples of this higher usage:
869	 *	Level	Usage
870	 *	14	Profiling clock (and PROM uart polling clock)
871	 *	12	Serial ports
872	 *
873	 * The serial ports request lower level processing on level 6.
874	 *
875	 * Also, almost all splN routines (where N is a number or a
876	 * mnemonic) will do a RAISE(), on the assumption that they are
877	 * never used to lower our priority.
878	 * The exceptions are:
879	 *	spl8()		Because you can't be above 15 to begin with!
880	 *	splzs()		Because this is used at boot time to lower our
881	 *			priority, to allow the PROM to poll the uart.
882	 *	spl0()		Used to lower priority to 0.
883	 */
884
885#if defined(__lint)
886
887int spl0(void)		{ return (0); }
888int spl6(void)		{ return (0); }
889int spl7(void)		{ return (0); }
890int spl8(void)		{ return (0); }
891int splhigh(void)	{ return (0); }
892int splhi(void)		{ return (0); }
893int splzs(void)		{ return (0); }
894
895#else	/* __lint */
896
897/* reg = cpu->cpu_m.cpu_pri; */
898#define	GETIPL_NOGS(reg, cpup)	\
899	movl	CPU_PRI(cpup), reg;
900
901/* cpu->cpu_m.cpu_pri; */
902#define	SETIPL_NOGS(val, cpup)	\
903	movl	val, CPU_PRI(cpup);
904
905/* reg = cpu->cpu_m.cpu_pri; */
906#define	GETIPL(reg)	\
907	movl	%gs:CPU_PRI, reg;
908
909/* cpu->cpu_m.cpu_pri; */
910#define	SETIPL(val)	\
911	movl	val, %gs:CPU_PRI;
912
913/*
914 * Macro to raise processor priority level.
915 * Avoid dropping processor priority if already at high level.
916 * Also avoid going below CPU->cpu_base_spl, which could've just been set by
917 * a higher-level interrupt thread that just blocked.
918 */
919#if defined(__amd64)
920
921#define	RAISE(level) \
922	cli;			\
923	LOADCPU(%rcx);		\
924	movl	$/**/level, %edi;\
925	GETIPL_NOGS(%eax, %rcx);\
926	cmpl 	%eax, %edi;	\
927	jg	spl;		\
928	jmp	setsplhisti
929
930#elif defined(__i386)
931
932#define	RAISE(level) \
933	cli;			\
934	LOADCPU(%ecx);		\
935	movl	$/**/level, %edx;\
936	GETIPL_NOGS(%eax, %ecx);\
937	cmpl 	%eax, %edx;	\
938	jg	spl;		\
939	jmp	setsplhisti
940
941#endif	/* __i386 */
942
943/*
944 * Macro to set the priority to a specified level.
945 * Avoid dropping the priority below CPU->cpu_base_spl.
946 */
947#if defined(__amd64)
948
949#define	SETPRI(level) \
950	cli;				\
951	LOADCPU(%rcx);			\
952	movl	$/**/level, %edi;	\
953	jmp	spl
954
955#elif defined(__i386)
956
957#define SETPRI(level) \
958	cli;				\
959	LOADCPU(%ecx);			\
960	movl	$/**/level, %edx;	\
961	jmp	spl
962
963#endif	/* __i386 */
964
965	/* locks out all interrupts, including memory errors */
966	ENTRY(spl8)
967	SETPRI(15)
968	SET_SIZE(spl8)
969
970	/* just below the level that profiling runs */
971	ENTRY(spl7)
972	RAISE(13)
973	SET_SIZE(spl7)
974
975	/* sun specific - highest priority onboard serial i/o asy ports */
976	ENTRY(splzs)
977	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
978	SET_SIZE(splzs)
979
980	/*
981	 * should lock out clocks and all interrupts,
982	 * as you can see, there are exceptions
983	 */
984
985#if defined(__amd64)
986
987	.align	16
988	ENTRY(splhi)
989	ALTENTRY(splhigh)
990	ALTENTRY(spl6)
991	ALTENTRY(i_ddi_splhigh)
992	cli
993	LOADCPU(%rcx)
994	movl	$DISP_LEVEL, %edi
995	movl	CPU_PRI(%rcx), %eax
996	cmpl	%eax, %edi
997	jle	setsplhisti
998	SETIPL_NOGS(%edi, %rcx)
999	/*
1000	 * If we aren't using cr8 to control ipl then we patch this
1001	 * with a jump to slow_setsplhi
1002	 */
1003	ALTENTRY(setsplhi_patch)
1004	movq	CPU_PRI_DATA(%rcx), %r11 /* get pri data ptr */
1005	movzb	(%r11, %rdi, 1), %rdx	/* get apic mask for this ipl */
1006	movq	%rdx, %cr8		/* set new apic priority */
1007	/*
1008	 * enable interrupts
1009	 */
1010setsplhisti:
1011	nop	/* patch this to a sti when a proper setspl routine appears */
1012	ret
1013
1014	ALTENTRY(slow_setsplhi)
1015	pushq	%rbp
1016	movq	%rsp, %rbp
1017	subq	$16, %rsp
1018	movl	%eax, -4(%rbp)		/* save old ipl */
1019	call	*setspl(%rip)
1020	movl	-4(%rbp), %eax		/* return old ipl */
1021	leave
1022	jmp	setsplhisti
1023
1024	SET_SIZE(i_ddi_splhigh)
1025	SET_SIZE(spl6)
1026	SET_SIZE(splhigh)
1027	SET_SIZE(splhi)
1028
1029#elif defined(__i386)
1030
1031	.align	16
1032	ENTRY(splhi)
1033	ALTENTRY(splhigh)
1034	ALTENTRY(spl6)
1035	ALTENTRY(i_ddi_splhigh)
1036	cli
1037	LOADCPU(%ecx)
1038	movl	$DISP_LEVEL, %edx
1039	movl	CPU_PRI(%ecx), %eax
1040	cmpl	%eax, %edx
1041	jle	setsplhisti
1042	SETIPL_NOGS(%edx, %ecx)		/* set new ipl */
1043
1044	pushl   %eax                    /* save old ipl */
1045	pushl	%edx			/* pass new ipl */
1046	call	*setspl
1047	popl	%ecx			/* dummy pop */
1048	popl    %eax                    /* return old ipl */
1049	/*
1050	 * enable interrupts
1051	 *
1052	 * (we patch this to an sti once a proper setspl routine
1053	 * is installed)
1054	 */
1055setsplhisti:
1056	nop	/* patch this to a sti when a proper setspl routine appears */
1057	ret
1058	SET_SIZE(i_ddi_splhigh)
1059	SET_SIZE(spl6)
1060	SET_SIZE(splhigh)
1061	SET_SIZE(splhi)
1062
1063#endif	/* __i386 */
1064
1065	/* allow all interrupts */
1066	ENTRY(spl0)
1067	SETPRI(0)
1068	SET_SIZE(spl0)
1069
1070#endif	/* __lint */
1071
1072/*
1073 * splr is like splx but will only raise the priority and never drop it
1074 */
1075#if defined(__lint)
1076
1077/* ARGSUSED */
1078int
1079splr(int level)
1080{ return (0); }
1081
1082#else	/* __lint */
1083
1084#if defined(__amd64)
1085
1086	ENTRY(splr)
1087	cli
1088	LOADCPU(%rcx)
1089	GETIPL_NOGS(%eax, %rcx)
1090	cmpl	%eax, %edi		/* if new level > current level */
1091	jg	spl			/* then set ipl to new level */
1092splr_setsti:
1093	nop	/* patch this to a sti when a proper setspl routine appears */
1094	ret				/* else return the current level */
1095	SET_SIZE(splr)
1096
1097#elif defined(__i386)
1098
1099	ENTRY(splr)
1100	cli
1101	LOADCPU(%ecx)
1102	movl	4(%esp), %edx		/* get new spl level */
1103	GETIPL_NOGS(%eax, %ecx)
1104	cmpl 	%eax, %edx		/* if new level > current level */
1105	jg	spl			/* then set ipl to new level */
1106splr_setsti:
1107	nop	/* patch this to a sti when a proper setspl routine appears */
1108	ret				/* else return the current level */
1109	SET_SIZE(splr)
1110
1111#endif	/* __i386 */
1112#endif	/* __lint */
1113
1114
1115
1116/*
1117 * splx - set PIL back to that indicated by the level passed as an argument,
1118 * or to the CPU's base priority, whichever is higher.
1119 * Needs to be fall through to spl to save cycles.
1120 * Algorithm for spl:
1121 *
1122 *      turn off interrupts
1123 *
1124 *	if (CPU->cpu_base_spl > newipl)
1125 *		newipl = CPU->cpu_base_spl;
1126 *      oldipl = CPU->cpu_pridata->c_ipl;
1127 *      CPU->cpu_pridata->c_ipl = newipl;
1128 *
1129 *	/indirectly call function to set spl values (usually setpicmasks)
1130 *      setspl();  // load new masks into pics
1131 *
1132 * Be careful not to set priority lower than CPU->cpu_base_pri,
1133 * even though it seems we're raising the priority, it could be set
1134 * higher at any time by an interrupt routine, so we must block interrupts
1135 * and look at CPU->cpu_base_pri
1136 */
1137#if defined(__lint)
1138
1139/* ARGSUSED */
1140void
1141splx(int level)
1142{}
1143
1144#else	/* __lint */
1145
1146#if defined(__amd64)
1147
1148	ENTRY(splx)
1149	ALTENTRY(i_ddi_splx)
1150	cli				/* disable interrupts */
1151	LOADCPU(%rcx)
1152	/*FALLTHRU*/
1153	.align	4
1154spl:
1155	/*
1156	 * New priority level is in %edi, cpu struct pointer is in %rcx
1157	 */
1158	GETIPL_NOGS(%eax, %rcx)		/* get current ipl */
1159	cmpl   %edi, CPU_BASE_SPL(%rcx) /* if (base spl > new ipl) */
1160	ja     set_to_base_spl		/* then use base_spl */
1161
1162setprilev:
1163	SETIPL_NOGS(%edi, %rcx)		/* set new ipl */
1164	/*
1165	 * If we aren't using cr8 to control ipl then we patch this
1166	 * with a jump to slow_spl
1167	 */
1168	ALTENTRY(spl_patch)
1169	movq	CPU_PRI_DATA(%rcx), %r11 /* get pri data ptr */
1170	movzb	(%r11, %rdi, 1), %rdx	/* get apic mask for this ipl */
1171	movq	%rdx, %cr8		/* set new apic priority */
1172	xorl	%edx, %edx
1173	bsrl	CPU_SOFTINFO(%rcx), %edx /* fls(cpu->cpu_softinfo.st_pending) */
1174	cmpl	%edi, %edx		/* new ipl vs. st_pending */
1175	jle	setsplsti
1176
1177	pushq	%rbp
1178	movq	%rsp, %rbp
1179	/* stack now 16-byte aligned */
1180	pushq	%rax			/* save old spl */
1181	pushq	%rdi			/* save new ipl too */
1182	jmp	fakesoftint
1183
1184setsplsti:
1185	nop	/* patch this to a sti when a proper setspl routine appears */
1186	ret
1187
1188	ALTENTRY(slow_spl)
1189	pushq	%rbp
1190	movq	%rsp, %rbp
1191	/* stack now 16-byte aligned */
1192
1193	pushq	%rax			/* save old spl */
1194	pushq	%rdi			/* save new ipl too */
1195
1196	call	*setspl(%rip)
1197
1198	LOADCPU(%rcx)
1199	movl	CPU_SOFTINFO(%rcx), %eax
1200	orl	%eax, %eax
1201	jz	slow_setsplsti
1202
1203	bsrl	%eax, %edx		/* fls(cpu->cpu_softinfo.st_pending) */
1204	cmpl	0(%rsp), %edx		/* new ipl vs. st_pending */
1205	jg	fakesoftint
1206
1207	ALTENTRY(fakesoftint_return)
1208	/*
1209	 * enable interrupts
1210	 */
1211slow_setsplsti:
1212	nop	/* patch this to a sti when a proper setspl routine appears */
1213	popq	%rdi
1214	popq	%rax			/* return old ipl */
1215	leave
1216	ret
1217	SET_SIZE(fakesoftint_return)
1218
1219set_to_base_spl:
1220	movl	CPU_BASE_SPL(%rcx), %edi
1221	jmp	setprilev
1222	SET_SIZE(spl)
1223	SET_SIZE(i_ddi_splx)
1224	SET_SIZE(splx)
1225
1226#elif defined(__i386)
1227
1228	ENTRY(splx)
1229	ALTENTRY(i_ddi_splx)
1230	cli                             /* disable interrupts */
1231	LOADCPU(%ecx)
1232	movl	4(%esp), %edx		/* get new spl level */
1233	/*FALLTHRU*/
1234
1235	.align	4
1236	ALTENTRY(spl)
1237	/*
1238	 * New priority level is in %edx
1239	 * (doing this early to avoid an AGI in the next instruction)
1240	 */
1241	GETIPL_NOGS(%eax, %ecx)		/* get current ipl */
1242	cmpl	%edx, CPU_BASE_SPL(%ecx) /* if ( base spl > new ipl) */
1243	ja	set_to_base_spl		/* then use base_spl */
1244
1245setprilev:
1246	SETIPL_NOGS(%edx, %ecx)		/* set new ipl */
1247
1248	pushl   %eax                    /* save old ipl */
1249	pushl	%edx			/* pass new ipl */
1250	call	*setspl
1251
1252	LOADCPU(%ecx)
1253	movl	CPU_SOFTINFO(%ecx), %eax
1254	orl	%eax, %eax
1255	jz	setsplsti
1256
1257	/*
1258	 * Before dashing off, check that setsplsti has been patched.
1259	 */
1260	cmpl	$NOP_INSTR, setsplsti
1261	je	setsplsti
1262
1263	bsrl	%eax, %edx
1264	cmpl	0(%esp), %edx
1265	jg	fakesoftint
1266
1267	ALTENTRY(fakesoftint_return)
1268	/*
1269	 * enable interrupts
1270	 */
1271setsplsti:
1272	nop	/* patch this to a sti when a proper setspl routine appears */
1273	popl	%eax
1274	popl    %eax			/ return old ipl
1275	ret
1276	SET_SIZE(fakesoftint_return)
1277
1278set_to_base_spl:
1279	movl	CPU_BASE_SPL(%ecx), %edx
1280	jmp	setprilev
1281	SET_SIZE(spl)
1282	SET_SIZE(i_ddi_splx)
1283	SET_SIZE(splx)
1284
1285#endif	/* __i386 */
1286#endif	/* __lint */
1287
1288#if defined(__lint)
1289
1290void
1291install_spl(void)
1292{}
1293
1294#else	/* __lint */
1295
1296#if defined(__amd64)
1297
1298	ENTRY_NP(install_spl)
1299	movq	%cr0, %rax
1300	movq	%rax, %rdx
1301	movl	$_BITNOT(CR0_WP), %ecx
1302	movslq	%ecx, %rcx
1303	andq	%rcx, %rax		/* we don't want to take a fault */
1304	movq	%rax, %cr0
1305	jmp	1f
13061:	movb	$STI_INSTR, setsplsti(%rip)
1307	movb	$STI_INSTR, slow_setsplsti(%rip)
1308	movb	$STI_INSTR, setsplhisti(%rip)
1309	movb	$STI_INSTR, splr_setsti(%rip)
1310	testl	$1, intpri_use_cr8(%rip)	/* are using %cr8 ? */
1311	jz	2f				/* no, go patch more */
1312	movq	%rdx, %cr0
1313	ret
13142:
1315	/*
1316	 * Patch spl functions to use slow spl method
1317	 */
1318	leaq	setsplhi_patch(%rip), %rdi	/* get patch point addr */
1319	leaq	slow_setsplhi(%rip), %rax	/* jmp target */
1320	subq	%rdi, %rax			/* calculate jmp distance */
1321	subq	$2, %rax			/* minus size of jmp instr */
1322	shlq	$8, %rax			/* construct jmp instr */
1323	addq	$JMP_INSTR, %rax
1324	movw	%ax, setsplhi_patch(%rip)	/* patch in the jmp */
1325	leaq	spl_patch(%rip), %rdi		/* get patch point addr */
1326	leaq	slow_spl(%rip), %rax		/* jmp target */
1327	subq	%rdi, %rax			/* calculate jmp distance */
1328	subq	$2, %rax			/* minus size of jmp instr */
1329	shlq	$8, %rax			/* construct jmp instr */
1330	addq	$JMP_INSTR, %rax
1331	movw	%ax, spl_patch(%rip)		/* patch in the jmp */
1332	/*
1333	 * Ensure %cr8 is zero since we aren't using it
1334	 */
1335	xorl	%eax, %eax
1336	movq	%rax, %cr8
1337	movq	%rdx, %cr0
1338	ret
1339	SET_SIZE(install_spl)
1340
1341#elif defined(__i386)
1342
1343	ENTRY_NP(install_spl)
1344	movl	%cr0, %eax
1345	movl	%eax, %edx
1346	andl	$_BITNOT(CR0_WP), %eax	/* we don't want to take a fault */
1347	movl	%eax, %cr0
1348	jmp	1f
13491:	movb	$STI_INSTR, setsplsti
1350	movb	$STI_INSTR, setsplhisti
1351	movb	$STI_INSTR, splr_setsti
1352	movl	%edx, %cr0
1353	ret
1354	SET_SIZE(install_spl)
1355
1356#endif	/* __i386 */
1357#endif	/* __lint */
1358
1359
1360/*
1361 * Get current processor interrupt level
1362 */
1363
1364#if defined(__lint)
1365
1366int
1367getpil(void)
1368{ return (0); }
1369
1370#else	/* __lint */
1371
1372#if defined(__amd64)
1373
1374	ENTRY(getpil)
1375	GETIPL(%eax)			/* priority level into %eax */
1376	ret
1377	SET_SIZE(getpil)
1378
1379#elif defined(__i386)
1380
1381	ENTRY(getpil)
1382	GETIPL(%eax)			/* priority level into %eax */
1383	ret
1384	SET_SIZE(getpil)
1385
1386#endif	/* __i386 */
1387#endif	/* __lint */
1388
1389#if defined(__i386)
1390
1391/*
1392 * Read and write the %gs register
1393 */
1394
1395#if defined(__lint)
1396
1397/*ARGSUSED*/
1398uint16_t
1399getgs(void)
1400{ return (0); }
1401
1402/*ARGSUSED*/
1403void
1404setgs(uint16_t sel)
1405{}
1406
1407#else	/* __lint */
1408
1409	ENTRY(getgs)
1410	clr	%eax
1411	movw	%gs, %ax
1412	ret
1413	SET_SIZE(getgs)
1414
1415	ENTRY(setgs)
1416	movw	4(%esp), %gs
1417	ret
1418	SET_SIZE(setgs)
1419
1420#endif	/* __lint */
1421#endif	/* __i386 */
1422
1423#if defined(__lint)
1424
1425void
1426pc_reset(void)
1427{}
1428
1429#else	/* __lint */
1430
1431	ENTRY(pc_reset)
1432	movw	$0x64, %dx
1433	movb	$0xfe, %al
1434	outb	(%dx)
1435	hlt
1436	/*NOTREACHED*/
1437	SET_SIZE(pc_reset)
1438
1439#endif	/* __lint */
1440
1441/*
1442 * C callable in and out routines
1443 */
1444
1445#if defined(__lint)
1446
1447/* ARGSUSED */
1448void
1449outl(int port_address, uint32_t val)
1450{}
1451
1452#else	/* __lint */
1453
1454#if defined(__amd64)
1455
1456	ENTRY(outl)
1457	movw	%di, %dx
1458	movl	%esi, %eax
1459	outl	(%dx)
1460	ret
1461	SET_SIZE(outl)
1462
1463#elif defined(__i386)
1464
1465	.set	PORT, 4
1466	.set	VAL, 8
1467
1468	ENTRY(outl)
1469	movw	PORT(%esp), %dx
1470	movl	VAL(%esp), %eax
1471	outl	(%dx)
1472	ret
1473	SET_SIZE(outl)
1474
1475#endif	/* __i386 */
1476#endif	/* __lint */
1477
1478#if defined(__lint)
1479
1480/* ARGSUSED */
1481void
1482outw(int port_address, uint16_t val)
1483{}
1484
1485#else	/* __lint */
1486
1487#if defined(__amd64)
1488
1489	ENTRY(outw)
1490	movw	%di, %dx
1491	movw	%si, %ax
1492	D16 outl (%dx)		/* XX64 why not outw? */
1493	ret
1494	SET_SIZE(outw)
1495
1496#elif defined(__i386)
1497
1498	ENTRY(outw)
1499	movw	PORT(%esp), %dx
1500	movw	VAL(%esp), %ax
1501	D16 outl (%dx)
1502	ret
1503	SET_SIZE(outw)
1504
1505#endif	/* __i386 */
1506#endif	/* __lint */
1507
1508#if defined(__lint)
1509
1510/* ARGSUSED */
1511void
1512outb(int port_address, uint8_t val)
1513{}
1514
1515#else	/* __lint */
1516
1517#if defined(__amd64)
1518
1519	ENTRY(outb)
1520	movw	%di, %dx
1521	movb	%sil, %al
1522	outb	(%dx)
1523	ret
1524	SET_SIZE(outb)
1525
1526#elif defined(__i386)
1527
1528	ENTRY(outb)
1529	movw	PORT(%esp), %dx
1530	movb	VAL(%esp), %al
1531	outb	(%dx)
1532	ret
1533	SET_SIZE(outb)
1534
1535#endif	/* __i386 */
1536#endif	/* __lint */
1537
1538#if defined(__lint)
1539
1540/* ARGSUSED */
1541uint32_t
1542inl(int port_address)
1543{ return (0); }
1544
1545#else	/* __lint */
1546
1547#if defined(__amd64)
1548
1549	ENTRY(inl)
1550	xorl	%eax, %eax
1551	movw	%di, %dx
1552	inl	(%dx)
1553	ret
1554	SET_SIZE(inl)
1555
1556#elif defined(__i386)
1557
1558	ENTRY(inl)
1559	movw	PORT(%esp), %dx
1560	inl	(%dx)
1561	ret
1562	SET_SIZE(inl)
1563
1564#endif	/* __i386 */
1565#endif	/* __lint */
1566
1567#if defined(__lint)
1568
1569/* ARGSUSED */
1570uint16_t
1571inw(int port_address)
1572{ return (0); }
1573
1574#else	/* __lint */
1575
1576#if defined(__amd64)
1577
1578	ENTRY(inw)
1579	xorl	%eax, %eax
1580	movw	%di, %dx
1581	D16 inl	(%dx)
1582	ret
1583	SET_SIZE(inw)
1584
1585#elif defined(__i386)
1586
1587	ENTRY(inw)
1588	subl	%eax, %eax
1589	movw	PORT(%esp), %dx
1590	D16 inl	(%dx)
1591	ret
1592	SET_SIZE(inw)
1593
1594#endif	/* __i386 */
1595#endif	/* __lint */
1596
1597
1598#if defined(__lint)
1599
1600/* ARGSUSED */
1601uint8_t
1602inb(int port_address)
1603{ return (0); }
1604
1605#else	/* __lint */
1606
1607#if defined(__amd64)
1608
1609	ENTRY(inb)
1610	xorl	%eax, %eax
1611	movw	%di, %dx
1612	inb	(%dx)
1613	ret
1614	SET_SIZE(inb)
1615
1616#elif defined(__i386)
1617
1618	ENTRY(inb)
1619	subl    %eax, %eax
1620	movw	PORT(%esp), %dx
1621	inb	(%dx)
1622	ret
1623	SET_SIZE(inb)
1624
1625#endif	/* __i386 */
1626#endif	/* __lint */
1627
1628
1629#if defined(__lint)
1630
1631/* ARGSUSED */
1632void
1633repoutsw(int port, uint16_t *addr, int cnt)
1634{}
1635
1636#else	/* __lint */
1637
1638#if defined(__amd64)
1639
1640	ENTRY(repoutsw)
1641	movl	%edx, %ecx
1642	movw	%di, %dx
1643	rep
1644	  D16 outsl
1645	ret
1646	SET_SIZE(repoutsw)
1647
1648#elif defined(__i386)
1649
1650	/*
1651	 * The arguments and saved registers are on the stack in the
1652	 *  following order:
1653	 *      |  cnt  |  +16
1654	 *      | *addr |  +12
1655	 *      | port  |  +8
1656	 *      |  eip  |  +4
1657	 *      |  esi  |  <-- %esp
1658	 * If additional values are pushed onto the stack, make sure
1659	 * to adjust the following constants accordingly.
1660	 */
1661	.set	PORT, 8
1662	.set	ADDR, 12
1663	.set	COUNT, 16
1664
1665	ENTRY(repoutsw)
1666	pushl	%esi
1667	movl	PORT(%esp), %edx
1668	movl	ADDR(%esp), %esi
1669	movl	COUNT(%esp), %ecx
1670	rep
1671	  D16 outsl
1672	popl	%esi
1673	ret
1674	SET_SIZE(repoutsw)
1675
1676#endif	/* __i386 */
1677#endif	/* __lint */
1678
1679
1680#if defined(__lint)
1681
1682/* ARGSUSED */
1683void
1684repinsw(int port_addr, uint16_t *addr, int cnt)
1685{}
1686
1687#else	/* __lint */
1688
1689#if defined(__amd64)
1690
1691	ENTRY(repinsw)
1692	movl	%edx, %ecx
1693	movw	%di, %dx
1694	rep
1695	  D16 insl
1696	ret
1697	SET_SIZE(repinsw)
1698
1699#elif defined(__i386)
1700
1701	ENTRY(repinsw)
1702	pushl	%edi
1703	movl	PORT(%esp), %edx
1704	movl	ADDR(%esp), %edi
1705	movl	COUNT(%esp), %ecx
1706	rep
1707	  D16 insl
1708	popl	%edi
1709	ret
1710	SET_SIZE(repinsw)
1711
1712#endif	/* __i386 */
1713#endif	/* __lint */
1714
1715
1716#if defined(__lint)
1717
1718/* ARGSUSED */
1719void
1720repinsb(int port, uint8_t *addr, int count)
1721{}
1722
1723#else	/* __lint */
1724
1725#if defined(__amd64)
1726
1727	ENTRY(repinsb)
1728	movl	%edx, %ecx
1729	movw	%di, %dx
1730	movq	%rsi, %rdi
1731	rep
1732	  insb
1733	ret
1734	SET_SIZE(repinsb)
1735
1736#elif defined(__i386)
1737
1738	/*
1739	 * The arguments and saved registers are on the stack in the
1740	 *  following order:
1741	 *      |  cnt  |  +16
1742	 *      | *addr |  +12
1743	 *      | port  |  +8
1744	 *      |  eip  |  +4
1745	 *      |  esi  |  <-- %esp
1746	 * If additional values are pushed onto the stack, make sure
1747	 * to adjust the following constants accordingly.
1748	 */
1749	.set	IO_PORT, 8
1750	.set	IO_ADDR, 12
1751	.set	IO_COUNT, 16
1752
1753	ENTRY(repinsb)
1754	pushl	%edi
1755	movl	IO_ADDR(%esp), %edi
1756	movl	IO_COUNT(%esp), %ecx
1757	movl	IO_PORT(%esp), %edx
1758	rep
1759	  insb
1760	popl	%edi
1761	ret
1762	SET_SIZE(repinsb)
1763
1764#endif	/* __i386 */
1765#endif	/* __lint */
1766
1767
1768/*
1769 * Input a stream of 32-bit words.
1770 * NOTE: count is a DWORD count.
1771 */
1772#if defined(__lint)
1773
1774/* ARGSUSED */
1775void
1776repinsd(int port, uint32_t *addr, int count)
1777{}
1778
1779#else	/* __lint */
1780
1781#if defined(__amd64)
1782
1783	ENTRY(repinsd)
1784	movl	%edx, %ecx
1785	movw	%di, %dx
1786	movq	%rsi, %rdi
1787	rep
1788	  insl
1789	ret
1790	SET_SIZE(repinsd)
1791
1792#elif defined(__i386)
1793
1794	ENTRY(repinsd)
1795	pushl	%edi
1796	movl	IO_ADDR(%esp), %edi
1797	movl	IO_COUNT(%esp), %ecx
1798	movl	IO_PORT(%esp), %edx
1799	rep
1800	  insl
1801	popl	%edi
1802	ret
1803	SET_SIZE(repinsd)
1804
1805#endif	/* __i386 */
1806#endif	/* __lint */
1807
1808/*
1809 * Output a stream of bytes
1810 * NOTE: count is a byte count
1811 */
1812#if defined(__lint)
1813
1814/* ARGSUSED */
1815void
1816repoutsb(int port, uint8_t *addr, int count)
1817{}
1818
1819#else	/* __lint */
1820
1821#if defined(__amd64)
1822
1823	ENTRY(repoutsb)
1824	movl	%edx, %ecx
1825	movw	%di, %dx
1826	rep
1827	  outsb
1828	ret
1829	SET_SIZE(repoutsb)
1830
1831#elif defined(__i386)
1832
1833	ENTRY(repoutsb)
1834	pushl	%esi
1835	movl	IO_ADDR(%esp), %esi
1836	movl	IO_COUNT(%esp), %ecx
1837	movl	IO_PORT(%esp), %edx
1838	rep
1839	  outsb
1840	popl	%esi
1841	ret
1842	SET_SIZE(repoutsb)
1843
1844#endif	/* __i386 */
1845#endif	/* __lint */
1846
1847/*
1848 * Output a stream of 32-bit words
1849 * NOTE: count is a DWORD count
1850 */
1851#if defined(__lint)
1852
1853/* ARGSUSED */
1854void
1855repoutsd(int port, uint32_t *addr, int count)
1856{}
1857
1858#else	/* __lint */
1859
1860#if defined(__amd64)
1861
1862	ENTRY(repoutsd)
1863	movl	%edx, %ecx
1864	movw	%di, %dx
1865	rep
1866	  outsl
1867	ret
1868	SET_SIZE(repoutsd)
1869
1870#elif defined(__i386)
1871
1872	ENTRY(repoutsd)
1873	pushl	%esi
1874	movl	IO_ADDR(%esp), %esi
1875	movl	IO_COUNT(%esp), %ecx
1876	movl	IO_PORT(%esp), %edx
1877	rep
1878	  outsl
1879	popl	%esi
1880	ret
1881	SET_SIZE(repoutsd)
1882
1883#endif	/* __i386 */
1884#endif	/* __lint */
1885
1886/*
1887 * void int20(void)
1888 */
1889
1890#if defined(__lint)
1891
1892void
1893int20(void)
1894{}
1895
1896#else	/* __lint */
1897
1898	ENTRY(int20)
1899	movl	boothowto, %eax
1900	andl	$RB_DEBUG, %eax
1901	jz	1f
1902
1903	int	$20
19041:
1905	ret
1906	SET_SIZE(int20)
1907
1908#endif	/* __lint */
1909
1910#if defined(__lint)
1911
1912/* ARGSUSED */
1913int
1914scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1915{ return (0); }
1916
1917#else	/* __lint */
1918
1919#if defined(__amd64)
1920
1921	ENTRY(scanc)
1922					/* rdi == size */
1923					/* rsi == cp */
1924					/* rdx == table */
1925					/* rcx == mask */
1926	addq	%rsi, %rdi		/* end = &cp[size] */
1927.scanloop:
1928	cmpq	%rdi, %rsi		/* while (cp < end */
1929	jnb	.scandone
1930	movzbq	(%rsi), %r8		/* %r8 = *cp */
1931	incq	%rsi			/* cp++ */
1932	testb	%cl, (%r8, %rdx)
1933	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1934	decq	%rsi			/* (fix post-increment) */
1935.scandone:
1936	movl	%edi, %eax
1937	subl	%esi, %eax		/* return (end - cp) */
1938	ret
1939	SET_SIZE(scanc)
1940
1941#elif defined(__i386)
1942
1943	ENTRY(scanc)
1944	pushl	%edi
1945	pushl	%esi
1946	movb	24(%esp), %cl		/* mask = %cl */
1947	movl	16(%esp), %esi		/* cp = %esi */
1948	movl	20(%esp), %edx		/* table = %edx */
1949	movl	%esi, %edi
1950	addl	12(%esp), %edi		/* end = &cp[size]; */
1951.scanloop:
1952	cmpl	%edi, %esi		/* while (cp < end */
1953	jnb	.scandone
1954	movzbl	(%esi),  %eax		/* %al = *cp */
1955	incl	%esi			/* cp++ */
1956	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1957	testb	%al, %cl
1958	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1959	dec	%esi			/* post-incremented */
1960.scandone:
1961	movl	%edi, %eax
1962	subl	%esi, %eax		/* return (end - cp) */
1963	popl	%esi
1964	popl	%edi
1965	ret
1966	SET_SIZE(scanc)
1967
1968#endif	/* __i386 */
1969#endif	/* __lint */
1970
1971/*
1972 * Replacement functions for ones that are normally inlined.
1973 * In addition to the copy in i86.il, they are defined here just in case.
1974 */
1975
1976#if defined(__lint)
1977
1978int
1979intr_clear(void)
1980{ return 0; }
1981
1982int
1983clear_int_flag(void)
1984{ return 0; }
1985
1986#else	/* __lint */
1987
1988#if defined(__amd64)
1989
1990	ENTRY(intr_clear)
1991	ENTRY(clear_int_flag)
1992	pushfq
1993	cli
1994	popq	%rax
1995	ret
1996	SET_SIZE(clear_int_flag)
1997	SET_SIZE(intr_clear)
1998
1999#elif defined(__i386)
2000
2001	ENTRY(intr_clear)
2002	ENTRY(clear_int_flag)
2003	pushfl
2004	cli
2005	popl	%eax
2006	ret
2007	SET_SIZE(clear_int_flag)
2008	SET_SIZE(intr_clear)
2009
2010#endif	/* __i386 */
2011#endif	/* __lint */
2012
2013#if defined(__lint)
2014
2015struct cpu *
2016curcpup(void)
2017{ return 0; }
2018
2019#else	/* __lint */
2020
2021#if defined(__amd64)
2022
2023	ENTRY(curcpup)
2024	movq	%gs:CPU_SELF, %rax
2025	ret
2026	SET_SIZE(curcpup)
2027
2028#elif defined(__i386)
2029
2030	ENTRY(curcpup)
2031	movl	%gs:CPU_SELF, %eax
2032	ret
2033	SET_SIZE(curcpup)
2034
2035#endif	/* __i386 */
2036#endif	/* __lint */
2037
2038#if defined(__lint)
2039
2040/* ARGSUSED */
2041uint32_t
2042htonl(uint32_t i)
2043{ return (0); }
2044
2045/* ARGSUSED */
2046uint32_t
2047ntohl(uint32_t i)
2048{ return (0); }
2049
2050#else	/* __lint */
2051
2052#if defined(__amd64)
2053
2054	/* XX64 there must be shorter sequences for this */
2055	ENTRY(htonl)
2056	ALTENTRY(ntohl)
2057	movl	%edi, %eax
2058	bswap	%eax
2059	ret
2060	SET_SIZE(ntohl)
2061	SET_SIZE(htonl)
2062
2063#elif defined(__i386)
2064
2065	ENTRY(htonl)
2066	ALTENTRY(ntohl)
2067	movl	4(%esp), %eax
2068	bswap	%eax
2069	ret
2070	SET_SIZE(ntohl)
2071	SET_SIZE(htonl)
2072
2073#endif	/* __i386 */
2074#endif	/* __lint */
2075
2076#if defined(__lint)
2077
2078/* ARGSUSED */
2079uint16_t
2080htons(uint16_t i)
2081{ return (0); }
2082
2083/* ARGSUSED */
2084uint16_t
2085ntohs(uint16_t i)
2086{ return (0); }
2087
2088
2089#else	/* __lint */
2090
2091#if defined(__amd64)
2092
2093	/* XX64 there must be better sequences for this */
2094	ENTRY(htons)
2095	ALTENTRY(ntohs)
2096	movl	%edi, %eax
2097	bswap	%eax
2098	shrl	$16, %eax
2099	ret
2100	SET_SIZE(ntohs)
2101	SET_SIZE(htons)
2102
2103#elif defined(__i386)
2104
2105	ENTRY(htons)
2106	ALTENTRY(ntohs)
2107	movl	4(%esp), %eax
2108	bswap	%eax
2109	shrl	$16, %eax
2110	ret
2111	SET_SIZE(ntohs)
2112	SET_SIZE(htons)
2113
2114#endif	/* __i386 */
2115#endif	/* __lint */
2116
2117
2118#if defined(__lint)
2119
2120/* ARGSUSED */
2121void
2122intr_restore(uint_t i)
2123{ return; }
2124
2125/* ARGSUSED */
2126void
2127restore_int_flag(int i)
2128{ return; }
2129
2130#else	/* __lint */
2131
2132#if defined(__amd64)
2133
2134	ENTRY(intr_restore)
2135	ENTRY(restore_int_flag)
2136	pushq	%rdi
2137	popfq
2138	ret
2139	SET_SIZE(restore_int_flag)
2140	SET_SIZE(intr_restore)
2141
2142#elif defined(__i386)
2143
2144	ENTRY(intr_restore)
2145	ENTRY(restore_int_flag)
2146	pushl	4(%esp)
2147	popfl
2148	ret
2149	SET_SIZE(restore_int_flag)
2150	SET_SIZE(intr_restore)
2151
2152#endif	/* __i386 */
2153#endif	/* __lint */
2154
2155#if defined(__lint)
2156
2157void
2158sti(void)
2159{}
2160
2161#else	/* __lint */
2162
2163	ENTRY(sti)
2164	sti
2165	ret
2166	SET_SIZE(sti)
2167
2168#endif	/* __lint */
2169
2170#if defined(__lint)
2171
2172dtrace_icookie_t
2173dtrace_interrupt_disable(void)
2174{ return (0); }
2175
2176#else   /* __lint */
2177
2178#if defined(__amd64)
2179
2180	ENTRY(dtrace_interrupt_disable)
2181	pushfq
2182	popq	%rax
2183	cli
2184	ret
2185	SET_SIZE(dtrace_interrupt_disable)
2186
2187#elif defined(__i386)
2188
2189	ENTRY(dtrace_interrupt_disable)
2190	pushfl
2191	popl	%eax
2192	cli
2193	ret
2194	SET_SIZE(dtrace_interrupt_disable)
2195
2196#endif	/* __i386 */
2197#endif	/* __lint */
2198
2199#if defined(__lint)
2200
2201/*ARGSUSED*/
2202void
2203dtrace_interrupt_enable(dtrace_icookie_t cookie)
2204{}
2205
2206#else	/* __lint */
2207
2208#if defined(__amd64)
2209
2210	ENTRY(dtrace_interrupt_enable)
2211	pushq	%rdi
2212	popfq
2213	ret
2214	SET_SIZE(dtrace_interrupt_enable)
2215
2216#elif defined(__i386)
2217
2218	ENTRY(dtrace_interrupt_enable)
2219	movl	4(%esp), %eax
2220	pushl	%eax
2221	popfl
2222	ret
2223	SET_SIZE(dtrace_interrupt_enable)
2224
2225#endif	/* __i386 */
2226#endif	/* __lint */
2227
2228
2229#if defined(lint)
2230
2231void
2232dtrace_membar_producer(void)
2233{}
2234
2235void
2236dtrace_membar_consumer(void)
2237{}
2238
2239#else	/* __lint */
2240
2241	ENTRY(dtrace_membar_producer)
2242	ret
2243	SET_SIZE(dtrace_membar_producer)
2244
2245	ENTRY(dtrace_membar_consumer)
2246	ret
2247	SET_SIZE(dtrace_membar_consumer)
2248
2249#endif	/* __lint */
2250
2251#if defined(__lint)
2252
2253kthread_id_t
2254threadp(void)
2255{ return ((kthread_id_t)0); }
2256
2257#else	/* __lint */
2258
2259#if defined(__amd64)
2260
2261	ENTRY(threadp)
2262	movq	%gs:CPU_THREAD, %rax
2263	ret
2264	SET_SIZE(threadp)
2265
2266#elif defined(__i386)
2267
2268	ENTRY(threadp)
2269	movl	%gs:CPU_THREAD, %eax
2270	ret
2271	SET_SIZE(threadp)
2272
2273#endif	/* __i386 */
2274#endif	/* __lint */
2275
2276/*
2277 *   Checksum routine for Internet Protocol Headers
2278 */
2279
2280#if defined(__lint)
2281
2282/* ARGSUSED */
2283unsigned int
2284ip_ocsum(
2285	ushort_t *address,	/* ptr to 1st message buffer */
2286	int halfword_count,	/* length of data */
2287	unsigned int sum)	/* partial checksum */
2288{
2289	int		i;
2290	unsigned int	psum = 0;	/* partial sum */
2291
2292	for (i = 0; i < halfword_count; i++, address++) {
2293		psum += *address;
2294	}
2295
2296	while ((psum >> 16) != 0) {
2297		psum = (psum & 0xffff) + (psum >> 16);
2298	}
2299
2300	psum += sum;
2301
2302	while ((psum >> 16) != 0) {
2303		psum = (psum & 0xffff) + (psum >> 16);
2304	}
2305
2306	return (psum);
2307}
2308
2309#else	/* __lint */
2310
2311#if defined(__amd64)
2312
2313	ENTRY(ip_ocsum)
2314	pushq	%rbp
2315	movq	%rsp, %rbp
2316#ifdef DEBUG
2317	movq	kernelbase(%rip), %rax
2318	cmpq	%rax, %rdi
2319	jnb	1f
2320	xorl	%eax, %eax
2321	movq	%rdi, %rsi
2322	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2323	call	panic
2324	/*NOTREACHED*/
2325.ip_ocsum_panic_msg:
2326	.string	"ip_ocsum: address 0x%p below kernelbase\n"
23271:
2328#endif
2329	movl	%esi, %ecx	/* halfword_count */
2330	movq	%rdi, %rsi	/* address */
2331				/* partial sum in %edx */
2332	xorl	%eax, %eax
2333	testl	%ecx, %ecx
2334	jz	.ip_ocsum_done
2335	testq	$3, %rsi
2336	jnz	.ip_csum_notaligned
2337.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2338.next_iter:
2339	/* XX64 opportunities for prefetch? */
2340	/* XX64 compute csum with 64 bit quantities? */
2341	subl	$32, %ecx
2342	jl	.less_than_32
2343
2344	addl	0(%rsi), %edx
2345.only60:
2346	adcl	4(%rsi), %eax
2347.only56:
2348	adcl	8(%rsi), %edx
2349.only52:
2350	adcl	12(%rsi), %eax
2351.only48:
2352	adcl	16(%rsi), %edx
2353.only44:
2354	adcl	20(%rsi), %eax
2355.only40:
2356	adcl	24(%rsi), %edx
2357.only36:
2358	adcl	28(%rsi), %eax
2359.only32:
2360	adcl	32(%rsi), %edx
2361.only28:
2362	adcl	36(%rsi), %eax
2363.only24:
2364	adcl	40(%rsi), %edx
2365.only20:
2366	adcl	44(%rsi), %eax
2367.only16:
2368	adcl	48(%rsi), %edx
2369.only12:
2370	adcl	52(%rsi), %eax
2371.only8:
2372	adcl	56(%rsi), %edx
2373.only4:
2374	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2375.only0:
2376	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2377	adcl	$0, %eax
2378
2379	addq	$64, %rsi
2380	testl	%ecx, %ecx
2381	jnz	.next_iter
2382
2383.ip_ocsum_done:
2384	addl	%eax, %edx
2385	adcl	$0, %edx
2386	movl	%edx, %eax	/* form a 16 bit checksum by */
2387	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2388	addw	%dx, %ax
2389	adcw	$0, %ax
2390	andl	$0xffff, %eax
2391	leave
2392	ret
2393
2394.ip_csum_notaligned:
2395	xorl	%edi, %edi
2396	movw	(%rsi), %di
2397	addl	%edi, %edx
2398	adcl	$0, %edx
2399	addq	$2, %rsi
2400	decl	%ecx
2401	jmp	.ip_csum_aligned
2402
2403.less_than_32:
2404	addl	$32, %ecx
2405	testl	$1, %ecx
2406	jz	.size_aligned
2407	andl	$0xfe, %ecx
2408	movzwl	(%rsi, %rcx, 2), %edi
2409	addl	%edi, %edx
2410	adcl	$0, %edx
2411.size_aligned:
2412	movl	%ecx, %edi
2413	shrl	$1, %ecx
2414	shl	$1, %edi
2415	subq	$64, %rdi
2416	addq	%rdi, %rsi
2417	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2418	leaq	(%rdi, %rcx, 8), %rdi
2419	xorl	%ecx, %ecx
2420	clc
2421	jmp 	*(%rdi)
2422
2423	.align	8
2424.ip_ocsum_jmptbl:
2425	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2426	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2427	.quad	.only48, .only52, .only56, .only60
2428	SET_SIZE(ip_ocsum)
2429
2430#elif defined(__i386)
2431
2432	ENTRY(ip_ocsum)
2433	pushl	%ebp
2434	movl	%esp, %ebp
2435	pushl	%ebx
2436	pushl	%esi
2437	pushl	%edi
2438	movl	12(%ebp), %ecx	/* count of half words */
2439	movl	16(%ebp), %edx	/* partial checksum */
2440	movl	8(%ebp), %esi
2441	xorl	%eax, %eax
2442	testl	%ecx, %ecx
2443	jz	.ip_ocsum_done
2444
2445	testl	$3, %esi
2446	jnz	.ip_csum_notaligned
2447.ip_csum_aligned:
2448.next_iter:
2449	subl	$32, %ecx
2450	jl	.less_than_32
2451
2452	addl	0(%esi), %edx
2453.only60:
2454	adcl	4(%esi), %eax
2455.only56:
2456	adcl	8(%esi), %edx
2457.only52:
2458	adcl	12(%esi), %eax
2459.only48:
2460	adcl	16(%esi), %edx
2461.only44:
2462	adcl	20(%esi), %eax
2463.only40:
2464	adcl	24(%esi), %edx
2465.only36:
2466	adcl	28(%esi), %eax
2467.only32:
2468	adcl	32(%esi), %edx
2469.only28:
2470	adcl	36(%esi), %eax
2471.only24:
2472	adcl	40(%esi), %edx
2473.only20:
2474	adcl	44(%esi), %eax
2475.only16:
2476	adcl	48(%esi), %edx
2477.only12:
2478	adcl	52(%esi), %eax
2479.only8:
2480	adcl	56(%esi), %edx
2481.only4:
2482	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2483.only0:
2484	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2485	adcl	$0, %eax
2486
2487	addl	$64, %esi
2488	andl	%ecx, %ecx
2489	jnz	.next_iter
2490
2491.ip_ocsum_done:
2492	addl	%eax, %edx
2493	adcl	$0, %edx
2494	movl	%edx, %eax	/* form a 16 bit checksum by */
2495	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2496	addw	%dx, %ax
2497	adcw	$0, %ax
2498	andl	$0xffff, %eax
2499	popl	%edi		/* restore registers */
2500	popl	%esi
2501	popl	%ebx
2502	leave
2503	ret
2504
2505.ip_csum_notaligned:
2506	xorl	%edi, %edi
2507	movw	(%esi), %di
2508	addl	%edi, %edx
2509	adcl	$0, %edx
2510	addl	$2, %esi
2511	decl	%ecx
2512	jmp	.ip_csum_aligned
2513
2514.less_than_32:
2515	addl	$32, %ecx
2516	testl	$1, %ecx
2517	jz	.size_aligned
2518	andl	$0xfe, %ecx
2519	movzwl	(%esi, %ecx, 2), %edi
2520	addl	%edi, %edx
2521	adcl	$0, %edx
2522.size_aligned:
2523	movl	%ecx, %edi
2524	shrl	$1, %ecx
2525	shl	$1, %edi
2526	subl	$64, %edi
2527	addl	%edi, %esi
2528	movl	$.ip_ocsum_jmptbl, %edi
2529	lea	(%edi, %ecx, 4), %edi
2530	xorl	%ecx, %ecx
2531	clc
2532	jmp 	*(%edi)
2533	SET_SIZE(ip_ocsum)
2534
2535	.data
2536	.align	4
2537
2538.ip_ocsum_jmptbl:
2539	.long	.only0, .only4, .only8, .only12, .only16, .only20
2540	.long	.only24, .only28, .only32, .only36, .only40, .only44
2541	.long	.only48, .only52, .only56, .only60
2542
2543
2544#endif	/* __i386 */
2545#endif	/* __lint */
2546
2547/*
2548 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2549 * Provided to manipulate hrtime_t values.
2550 */
2551#if defined(__lint)
2552
2553/* result = a * b; */
2554
2555/* ARGSUSED */
2556unsigned long long
2557mul32(uint_t a, uint_t b)
2558{ return (0); }
2559
2560#else	/* __lint */
2561
2562#if defined(__amd64)
2563
2564	ENTRY(mul32)
2565	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2566	movl	%edi, %eax
2567	mull	%esi
2568	shlq	$32, %rdx
2569	orq	%rdx, %rax
2570	ret
2571	SET_SIZE(mul32)
2572
2573#elif defined(__i386)
2574
2575	ENTRY(mul32)
2576	movl	8(%esp), %eax
2577	movl	4(%esp), %ecx
2578	mull	%ecx
2579	ret
2580	SET_SIZE(mul32)
2581
2582#endif	/* __i386 */
2583#endif	/* __lint */
2584
2585#if defined(__i386) && !defined(__amd64)
2586
2587#if defined(__lint)
2588
2589/* ARGSUSED */
2590long long
2591__mul64(long long a, long long b)
2592{ return (0); }
2593
2594#else   /* __lint */
2595
2596/*
2597 *   function __mul64(A, B:Longint):Longint;
2598 *	{Overflow is not checked}
2599 *
2600 * We essentially do multiply by longhand, using base 2**32 digits.
2601 *               a       b	parameter A
2602 *	     x 	c       d	parameter B
2603 *		---------
2604 *               ad      bd
2605 *       ac	bc
2606 *       -----------------
2607 *       ac	ad+bc	bd
2608 *
2609 *       We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened.
2610 */
2611	ENTRY(__mul64)
2612	push	%ebp
2613	movl   	%esp, %ebp
2614	pushl	%esi
2615	movl	12(%ebp), %eax	/* A.hi (a) */
2616	mull	16(%ebp)	/* Multiply A.hi by B.lo (produces ad) */
2617	xchg	%ecx, %eax	/* ecx = bottom half of ad. */
2618	movl    8(%ebp), %eax	/* A.Lo (b) */
2619	movl	%eax, %esi	/* Save A.lo for later */
2620	mull	16(%ebp)	/* Multiply A.Lo by B.LO (dx:ax = bd.) */
2621	addl	%edx, %ecx	/* cx is ad */
2622	xchg	%eax, %esi	/* esi is bd, eax = A.lo (d) */
2623	mull	20(%ebp)	/* Multiply A.lo * B.hi (producing bc) */
2624	addl	%ecx, %eax	/* Produce ad+bc */
2625	movl	%esi, %edx
2626	xchg	%eax, %edx
2627	popl	%esi
2628	movl	%ebp, %esp
2629	popl	%ebp
2630	ret	$16
2631	SET_SIZE(__mul64)
2632
2633#endif	/* __lint */
2634
2635#if defined(__lint)
2636
2637/*
2638 * C support for 64-bit modulo and division.
2639 * GNU routines callable from C (though generated by the compiler).
2640 * Hand-customized compiler output - see comments for details.
2641 */
2642/*ARGSUSED*/
2643unsigned long long
2644__udivdi3(unsigned long long a, unsigned long long b)
2645{ return (0); }
2646
2647/*ARGSUSED*/
2648unsigned long long
2649__umoddi3(unsigned long long a, unsigned long long b)
2650{ return (0); }
2651
2652/*ARGSUSED*/
2653long long
2654__divdi3(long long a, long long b)
2655{ return (0); }
2656
2657/*ARGSUSED*/
2658long long
2659__moddi3(long long a, long long b)
2660{ return (0); }
2661
2662/* ARGSUSED */
2663int64_t __div64(int64_t a, int64_t b)
2664{ return (0); }
2665
2666/* ARGSUSED */
2667int64_t __divrem64(int64_t a, int64_t b)
2668{ return (0); }
2669
2670/* ARGSUSED */
2671int64_t __rem64(int64_t a, int64_t b)
2672{ return (0); }
2673
2674/* ARGSUSED */
2675uint64_t __udiv64(uint64_t a, uint64_t b)
2676{ return (0); }
2677
2678/* ARGSUSED */
2679uint64_t __udivrem64(uint64_t a, uint64_t b)
2680{ return (0); }
2681
2682/* ARGSUSED */
2683uint64_t __urem64(uint64_t a, uint64_t b)
2684{ return (0); }
2685
2686#else	/* __lint */
2687
2688/*
2689 * int32_t/int64_t division/manipulation
2690 *
2691 * Hand-customized compiler output: the non-GCC entry points depart from
2692 * the SYS V ABI by requiring their arguments to be popped, and in the
2693 * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the
2694 * compiler-generated use of %edx:%eax for the first argument of
2695 * internal entry points.
2696 *
2697 * Inlines for speed:
2698 * - counting the number of leading zeros in a word
2699 * - multiplying two 32-bit numbers giving a 64-bit result
2700 * - dividing a 64-bit number by a 32-bit number, giving both quotient
2701 *	and remainder
2702 * - subtracting two 64-bit results
2703 */
2704/ #define	LO(X)		((uint32_t)(X) & 0xffffffff)
2705/ #define	HI(X)		((uint32_t)((X) >> 32) & 0xffffffff)
2706/ #define	HILO(H, L)	(((uint64_t)(H) << 32) + (L))
2707/
2708/ /* give index of highest bit */
2709/ #define	HIBIT(a, r) \
2710/     asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a))
2711/
2712/ /* multiply two uint32_ts resulting in a uint64_t */
2713/ #define	A_MUL32(a, b, lo, hi) \
2714/     asm("mull %2" \
2715/ 	: "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a))
2716/
2717/ /* divide a uint64_t by a uint32_t */
2718/ #define	A_DIV32(lo, hi, b, q, r) \
2719/     asm("divl %2" \
2720/ 	: "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \
2721/ 	: "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi))
2722/
2723/ /* subtract two uint64_ts (with borrow) */
2724/ #define	A_SUB2(bl, bh, al, ah) \
2725/     asm("subl %4,%0\n\tsbbl %5,%1" \
2726/ 	: "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \
2727/ 	: "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \
2728/ 	"g"((uint32_t)(bh)))
2729/
2730/ /*
2731/  * Unsigned division with remainder.
2732/  * Divide two uint64_ts, and calculate remainder.
2733/  */
2734/ uint64_t
2735/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod)
2736/ {
2737/ 	/* simple cases: y is a single uint32_t */
2738/ 	if (HI(y) == 0) {
2739/ 		uint32_t	div_hi, div_rem;
2740/ 		uint32_t 	q0, q1;
2741/
2742/ 		/* calculate q1 */
2743/ 		if (HI(x) < LO(y)) {
2744/ 			/* result is a single uint32_t, use one division */
2745/ 			q1 = 0;
2746/ 			div_hi = HI(x);
2747/ 		} else {
2748/ 			/* result is a double uint32_t, use two divisions */
2749/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
2750/ 		}
2751/
2752/ 		/* calculate q0 and remainder */
2753/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
2754/
2755/ 		/* return remainder */
2756/ 		*pmod = div_rem;
2757/
2758/ 		/* return result */
2759/ 		return (HILO(q1, q0));
2760/
2761/ 	} else if (HI(x) < HI(y)) {
2762/ 		/* HI(x) < HI(y) => x < y => result is 0 */
2763/
2764/ 		/* return remainder */
2765/ 		*pmod = x;
2766/
2767/ 		/* return result */
2768/ 		return (0);
2769/
2770/ 	} else {
2771/ 		/*
2772/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
2773/ 		 * result
2774/ 		 */
2775/ 		uint32_t		y0, y1;
2776/ 		uint32_t		x1, x0;
2777/ 		uint32_t		q0;
2778/ 		uint32_t		normshift;
2779/
2780/ 		/* normalize by shifting x and y so MSB(y) == 1 */
2781/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
2782/ 		normshift = 31 - normshift;
2783/
2784/ 		if (normshift == 0) {
2785/ 			/* no shifting needed, and x < 2*y so q <= 1 */
2786/ 			y1 = HI(y);
2787/ 			y0 = LO(y);
2788/ 			x1 = HI(x);
2789/ 			x0 = LO(x);
2790/
2791/ 			/* if x >= y then q = 1 (note x1 >= y1) */
2792/ 			if (x1 > y1 || x0 >= y0) {
2793/ 				q0 = 1;
2794/ 				/* subtract y from x to get remainder */
2795/ 				A_SUB2(y0, y1, x0, x1);
2796/ 			} else {
2797/ 				q0 = 0;
2798/ 			}
2799/
2800/ 			/* return remainder */
2801/ 			*pmod = HILO(x1, x0);
2802/
2803/ 			/* return result */
2804/ 			return (q0);
2805/
2806/ 		} else {
2807/ 			/*
2808/ 			 * the last case: result is one uint32_t, but we need to
2809/ 			 * normalize
2810/ 			 */
2811/ 			uint64_t	dt;
2812/ 			uint32_t		t0, t1, x2;
2813/
2814/ 			/* normalize y */
2815/ 			dt = (y << normshift);
2816/ 			y1 = HI(dt);
2817/ 			y0 = LO(dt);
2818/
2819/ 			/* normalize x (we need 3 uint32_ts!!!) */
2820/ 			x2 = (HI(x) >> (32 - normshift));
2821/ 			dt = (x << normshift);
2822/ 			x1 = HI(dt);
2823/ 			x0 = LO(dt);
2824/
2825/ 			/* estimate q0, and reduce x to a two uint32_t value */
2826/ 			A_DIV32(x1, x2, y1, q0, x1);
2827/
2828/ 			/* adjust q0 down if too high */
2829/ 			/*
2830/ 			 * because of the limited range of x2 we can only be
2831/ 			 * one off
2832/ 			 */
2833/ 			A_MUL32(y0, q0, t0, t1);
2834/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
2835/ 				q0--;
2836/ 				A_SUB2(y0, y1, t0, t1);
2837/ 			}
2838/ 			/* return remainder */
2839/ 			/* subtract product from x to get remainder */
2840/ 			A_SUB2(t0, t1, x0, x1);
2841/ 			*pmod = (HILO(x1, x0) >> normshift);
2842/
2843/ 			/* return result */
2844/ 			return (q0);
2845/ 		}
2846/ 	}
2847/ }
2848	ENTRY(UDivRem)
2849	pushl	%ebp
2850	pushl	%edi
2851	pushl	%esi
2852	subl	$48, %esp
2853	movl	68(%esp), %edi	/ y,
2854	testl	%edi, %edi	/ tmp63
2855	movl	%eax, 40(%esp)	/ x, x
2856	movl	%edx, 44(%esp)	/ x, x
2857	movl	%edi, %esi	/, tmp62
2858	movl	%edi, %ecx	/ tmp62, tmp63
2859	jne	.LL2
2860	movl	%edx, %eax	/, tmp68
2861	cmpl	64(%esp), %eax	/ y, tmp68
2862	jae	.LL21
2863.LL4:
2864	movl	72(%esp), %ebp	/ pmod,
2865	xorl	%esi, %esi	/ <result>
2866	movl	40(%esp), %eax	/ x, q0
2867	movl	%ecx, %edi	/ <result>, <result>
2868	divl	64(%esp)	/ y
2869	movl	%edx, (%ebp)	/ div_rem,
2870	xorl	%edx, %edx	/ q0
2871	addl	%eax, %esi	/ q0, <result>
2872	movl	$0, 4(%ebp)
2873	adcl	%edx, %edi	/ q0, <result>
2874	addl	$48, %esp
2875	movl	%esi, %eax	/ <result>, <result>
2876	popl	%esi
2877	movl	%edi, %edx	/ <result>, <result>
2878	popl	%edi
2879	popl	%ebp
2880	ret
2881	.align	16
2882.LL2:
2883	movl	44(%esp), %eax	/ x,
2884	xorl	%edx, %edx
2885	cmpl	%esi, %eax	/ tmp62, tmp5
2886	movl	%eax, 32(%esp)	/ tmp5,
2887	movl	%edx, 36(%esp)
2888	jae	.LL6
2889	movl	72(%esp), %esi	/ pmod,
2890	movl	40(%esp), %ebp	/ x,
2891	movl	44(%esp), %ecx	/ x,
2892	movl	%ebp, (%esi)
2893	movl	%ecx, 4(%esi)
2894	xorl	%edi, %edi	/ <result>
2895	xorl	%esi, %esi	/ <result>
2896.LL22:
2897	addl	$48, %esp
2898	movl	%esi, %eax	/ <result>, <result>
2899	popl	%esi
2900	movl	%edi, %edx	/ <result>, <result>
2901	popl	%edi
2902	popl	%ebp
2903	ret
2904	.align	16
2905.LL21:
2906	movl	%edi, %edx	/ tmp63, div_hi
2907	divl	64(%esp)	/ y
2908	movl	%eax, %ecx	/, q1
2909	jmp	.LL4
2910	.align	16
2911.LL6:
2912	movl	$31, %edi	/, tmp87
2913	bsrl	%esi,%edx	/ tmp62, normshift
2914	subl	%edx, %edi	/ normshift, tmp87
2915	movl	%edi, 28(%esp)	/ tmp87,
2916	jne	.LL8
2917	movl	32(%esp), %edx	/, x1
2918	cmpl	%ecx, %edx	/ y1, x1
2919	movl	64(%esp), %edi	/ y, y0
2920	movl	40(%esp), %esi	/ x, x0
2921	ja	.LL10
2922	xorl	%ebp, %ebp	/ q0
2923	cmpl	%edi, %esi	/ y0, x0
2924	jb	.LL11
2925.LL10:
2926	movl	$1, %ebp	/, q0
2927	subl	%edi,%esi	/ y0, x0
2928	sbbl	%ecx,%edx	/ tmp63, x1
2929.LL11:
2930	movl	%edx, %ecx	/ x1, x1
2931	xorl	%edx, %edx	/ x1
2932	xorl	%edi, %edi	/ x0
2933	addl	%esi, %edx	/ x0, x1
2934	adcl	%edi, %ecx	/ x0, x1
2935	movl	72(%esp), %esi	/ pmod,
2936	movl	%edx, (%esi)	/ x1,
2937	movl	%ecx, 4(%esi)	/ x1,
2938	xorl	%edi, %edi	/ <result>
2939	movl	%ebp, %esi	/ q0, <result>
2940	jmp	.LL22
2941	.align	16
2942.LL8:
2943	movb	28(%esp), %cl
2944	movl	64(%esp), %esi	/ y, dt
2945	movl	68(%esp), %edi	/ y, dt
2946	shldl	%esi, %edi	/, dt, dt
2947	sall	%cl, %esi	/, dt
2948	andl	$32, %ecx
2949	jne	.LL23
2950.LL17:
2951	movl	$32, %ecx	/, tmp102
2952	subl	28(%esp), %ecx	/, tmp102
2953	movl	%esi, %ebp	/ dt, y0
2954	movl	32(%esp), %esi
2955	shrl	%cl, %esi	/ tmp102,
2956	movl	%edi, 24(%esp)	/ tmp99,
2957	movb	28(%esp), %cl
2958	movl	%esi, 12(%esp)	/, x2
2959	movl	44(%esp), %edi	/ x, dt
2960	movl	40(%esp), %esi	/ x, dt
2961	shldl	%esi, %edi	/, dt, dt
2962	sall	%cl, %esi	/, dt
2963	andl	$32, %ecx
2964	je	.LL18
2965	movl	%esi, %edi	/ dt, dt
2966	xorl	%esi, %esi	/ dt
2967.LL18:
2968	movl	%edi, %ecx	/ dt,
2969	movl	%edi, %eax	/ tmp2,
2970	movl	%ecx, (%esp)
2971	movl	12(%esp), %edx	/ x2,
2972	divl	24(%esp)
2973	movl	%edx, %ecx	/, x1
2974	xorl	%edi, %edi
2975	movl	%eax, 20(%esp)
2976	movl	%ebp, %eax	/ y0, t0
2977	mull	20(%esp)
2978	cmpl	%ecx, %edx	/ x1, t1
2979	movl	%edi, 4(%esp)
2980	ja	.LL14
2981	je	.LL24
2982.LL15:
2983	movl	%ecx, %edi	/ x1,
2984	subl	%eax,%esi	/ t0, x0
2985	sbbl	%edx,%edi	/ t1,
2986	movl	%edi, %eax	/, x1
2987	movl	%eax, %edx	/ x1, x1
2988	xorl	%eax, %eax	/ x1
2989	xorl	%ebp, %ebp	/ x0
2990	addl	%esi, %eax	/ x0, x1
2991	adcl	%ebp, %edx	/ x0, x1
2992	movb	28(%esp), %cl
2993	shrdl	%edx, %eax	/, x1, x1
2994	shrl	%cl, %edx	/, x1
2995	andl	$32, %ecx
2996	je	.LL16
2997	movl	%edx, %eax	/ x1, x1
2998	xorl	%edx, %edx	/ x1
2999.LL16:
3000	movl	72(%esp), %ecx	/ pmod,
3001	movl	20(%esp), %esi	/, <result>
3002	xorl	%edi, %edi	/ <result>
3003	movl	%eax, (%ecx)	/ x1,
3004	movl	%edx, 4(%ecx)	/ x1,
3005	jmp	.LL22
3006	.align	16
3007.LL24:
3008	cmpl	%esi, %eax	/ x0, t0
3009	jbe	.LL15
3010.LL14:
3011	decl	20(%esp)
3012	subl	%ebp,%eax	/ y0, t0
3013	sbbl	24(%esp),%edx	/, t1
3014	jmp	.LL15
3015.LL23:
3016	movl	%esi, %edi	/ dt, dt
3017	xorl	%esi, %esi	/ dt
3018	jmp	.LL17
3019	SET_SIZE(UDivRem)
3020
3021/*
3022 * Unsigned division without remainder.
3023 */
3024/ uint64_t
3025/ UDiv(uint64_t x, uint64_t y)
3026/ {
3027/ 	if (HI(y) == 0) {
3028/ 		/* simple cases: y is a single uint32_t */
3029/ 		uint32_t	div_hi, div_rem;
3030/ 		uint32_t	q0, q1;
3031/
3032/ 		/* calculate q1 */
3033/ 		if (HI(x) < LO(y)) {
3034/ 			/* result is a single uint32_t, use one division */
3035/ 			q1 = 0;
3036/ 			div_hi = HI(x);
3037/ 		} else {
3038/ 			/* result is a double uint32_t, use two divisions */
3039/ 			A_DIV32(HI(x), 0, LO(y), q1, div_hi);
3040/ 		}
3041/
3042/ 		/* calculate q0 and remainder */
3043/ 		A_DIV32(LO(x), div_hi, LO(y), q0, div_rem);
3044/
3045/ 		/* return result */
3046/ 		return (HILO(q1, q0));
3047/
3048/ 	} else if (HI(x) < HI(y)) {
3049/ 		/* HI(x) < HI(y) => x < y => result is 0 */
3050/
3051/ 		/* return result */
3052/ 		return (0);
3053/
3054/ 	} else {
3055/ 		/*
3056/ 		 * uint64_t by uint64_t division, resulting in a one-uint32_t
3057/ 		 * result
3058/ 		 */
3059/ 		uint32_t		y0, y1;
3060/ 		uint32_t		x1, x0;
3061/ 		uint32_t		q0;
3062/ 		unsigned		normshift;
3063/
3064/ 		/* normalize by shifting x and y so MSB(y) == 1 */
3065/ 		HIBIT(HI(y), normshift);	/* index of highest 1 bit */
3066/ 		normshift = 31 - normshift;
3067/
3068/ 		if (normshift == 0) {
3069/ 			/* no shifting needed, and x < 2*y so q <= 1 */
3070/ 			y1 = HI(y);
3071/ 			y0 = LO(y);
3072/ 			x1 = HI(x);
3073/ 			x0 = LO(x);
3074/
3075/ 			/* if x >= y then q = 1 (note x1 >= y1) */
3076/ 			if (x1 > y1 || x0 >= y0) {
3077/ 				q0 = 1;
3078/ 				/* subtract y from x to get remainder */
3079/ 				/* A_SUB2(y0, y1, x0, x1); */
3080/ 			} else {
3081/ 				q0 = 0;
3082/ 			}
3083/
3084/ 			/* return result */
3085/ 			return (q0);
3086/
3087/ 		} else {
3088/ 			/*
3089/ 			 * the last case: result is one uint32_t, but we need to
3090/ 			 * normalize
3091/ 			 */
3092/ 			uint64_t	dt;
3093/ 			uint32_t		t0, t1, x2;
3094/
3095/ 			/* normalize y */
3096/ 			dt = (y << normshift);
3097/ 			y1 = HI(dt);
3098/ 			y0 = LO(dt);
3099/
3100/ 			/* normalize x (we need 3 uint32_ts!!!) */
3101/ 			x2 = (HI(x) >> (32 - normshift));
3102/ 			dt = (x << normshift);
3103/ 			x1 = HI(dt);
3104/ 			x0 = LO(dt);
3105/
3106/ 			/* estimate q0, and reduce x to a two uint32_t value */
3107/ 			A_DIV32(x1, x2, y1, q0, x1);
3108/
3109/ 			/* adjust q0 down if too high */
3110/ 			/*
3111/ 			 * because of the limited range of x2 we can only be
3112/ 			 * one off
3113/ 			 */
3114/ 			A_MUL32(y0, q0, t0, t1);
3115/ 			if (t1 > x1 || (t1 == x1 && t0 > x0)) {
3116/ 				q0--;
3117/ 			}
3118/ 			/* return result */
3119/ 			return (q0);
3120/ 		}
3121/ 	}
3122/ }
3123	ENTRY(UDiv)
3124	pushl	%ebp
3125	pushl	%edi
3126	pushl	%esi
3127	subl	$40, %esp
3128	movl	%edx, 36(%esp)	/ x, x
3129	movl	60(%esp), %edx	/ y,
3130	testl	%edx, %edx	/ tmp62
3131	movl	%eax, 32(%esp)	/ x, x
3132	movl	%edx, %ecx	/ tmp61, tmp62
3133	movl	%edx, %eax	/, tmp61
3134	jne	.LL26
3135	movl	36(%esp), %esi	/ x,
3136	cmpl	56(%esp), %esi	/ y, tmp67
3137	movl	%esi, %eax	/, tmp67
3138	movl	%esi, %edx	/ tmp67, div_hi
3139	jb	.LL28
3140	movl	%ecx, %edx	/ tmp62, div_hi
3141	divl	56(%esp)	/ y
3142	movl	%eax, %ecx	/, q1
3143.LL28:
3144	xorl	%esi, %esi	/ <result>
3145	movl	%ecx, %edi	/ <result>, <result>
3146	movl	32(%esp), %eax	/ x, q0
3147	xorl	%ecx, %ecx	/ q0
3148	divl	56(%esp)	/ y
3149	addl	%eax, %esi	/ q0, <result>
3150	adcl	%ecx, %edi	/ q0, <result>
3151.LL25:
3152	addl	$40, %esp
3153	movl	%esi, %eax	/ <result>, <result>
3154	popl	%esi
3155	movl	%edi, %edx	/ <result>, <result>
3156	popl	%edi
3157	popl	%ebp
3158	ret
3159	.align	16
3160.LL26:
3161	movl	36(%esp), %esi	/ x,
3162	xorl	%edi, %edi
3163	movl	%esi, 24(%esp)	/ tmp1,
3164	movl	%edi, 28(%esp)
3165	xorl	%esi, %esi	/ <result>
3166	xorl	%edi, %edi	/ <result>
3167	cmpl	%eax, 24(%esp)	/ tmp61,
3168	jb	.LL25
3169	bsrl	%eax,%ebp	/ tmp61, normshift
3170	movl	$31, %eax	/, tmp85
3171	subl	%ebp, %eax	/ normshift, normshift
3172	jne	.LL32
3173	movl	24(%esp), %eax	/, x1
3174	cmpl	%ecx, %eax	/ tmp62, x1
3175	movl	56(%esp), %esi	/ y, y0
3176	movl	32(%esp), %edx	/ x, x0
3177	ja	.LL34
3178	xorl	%eax, %eax	/ q0
3179	cmpl	%esi, %edx	/ y0, x0
3180	jb	.LL35
3181.LL34:
3182	movl	$1, %eax	/, q0
3183.LL35:
3184	movl	%eax, %esi	/ q0, <result>
3185	xorl	%edi, %edi	/ <result>
3186.LL45:
3187	addl	$40, %esp
3188	movl	%esi, %eax	/ <result>, <result>
3189	popl	%esi
3190	movl	%edi, %edx	/ <result>, <result>
3191	popl	%edi
3192	popl	%ebp
3193	ret
3194	.align	16
3195.LL32:
3196	movb	%al, %cl
3197	movl	56(%esp), %esi	/ y,
3198	movl	60(%esp), %edi	/ y,
3199	shldl	%esi, %edi
3200	sall	%cl, %esi
3201	andl	$32, %ecx
3202	jne	.LL43
3203.LL40:
3204	movl	$32, %ecx	/, tmp96
3205	subl	%eax, %ecx	/ normshift, tmp96
3206	movl	%edi, %edx
3207	movl	%edi, 20(%esp)	/, dt
3208	movl	24(%esp), %ebp	/, x2
3209	xorl	%edi, %edi
3210	shrl	%cl, %ebp	/ tmp96, x2
3211	movl	%esi, 16(%esp)	/, dt
3212	movb	%al, %cl
3213	movl	32(%esp), %esi	/ x, dt
3214	movl	%edi, 12(%esp)
3215	movl	36(%esp), %edi	/ x, dt
3216	shldl	%esi, %edi	/, dt, dt
3217	sall	%cl, %esi	/, dt
3218	andl	$32, %ecx
3219	movl	%edx, 8(%esp)
3220	je	.LL41
3221	movl	%esi, %edi	/ dt, dt
3222	xorl	%esi, %esi	/ dt
3223.LL41:
3224	xorl	%ecx, %ecx
3225	movl	%edi, %eax	/ tmp1,
3226	movl	%ebp, %edx	/ x2,
3227	divl	8(%esp)
3228	movl	%edx, %ebp	/, x1
3229	movl	%ecx, 4(%esp)
3230	movl	%eax, %ecx	/, q0
3231	movl	16(%esp), %eax	/ dt,
3232	mull	%ecx	/ q0
3233	cmpl	%ebp, %edx	/ x1, t1
3234	movl	%edi, (%esp)
3235	movl	%esi, %edi	/ dt, x0
3236	ja	.LL38
3237	je	.LL44
3238.LL39:
3239	movl	%ecx, %esi	/ q0, <result>
3240.LL46:
3241	xorl	%edi, %edi	/ <result>
3242	jmp	.LL45
3243.LL44:
3244	cmpl	%edi, %eax	/ x0, t0
3245	jbe	.LL39
3246.LL38:
3247	decl	%ecx		/ q0
3248	movl	%ecx, %esi	/ q0, <result>
3249	jmp	.LL46
3250.LL43:
3251	movl	%esi, %edi
3252	xorl	%esi, %esi
3253	jmp	.LL40
3254	SET_SIZE(UDiv)
3255
3256/*
3257 * __udivdi3
3258 *
3259 * Perform division of two unsigned 64-bit quantities, returning the
3260 * quotient in %edx:%eax.
3261 */
3262	ENTRY(__udivdi3)
3263	movl	4(%esp), %eax	/ x, x
3264	movl	8(%esp), %edx	/ x, x
3265	pushl	16(%esp)	/ y
3266	pushl	16(%esp)
3267	call	UDiv
3268	addl	$8, %esp
3269	ret
3270	SET_SIZE(__udivdi3)
3271
3272/*
3273 * __umoddi3
3274 *
3275 * Perform division of two unsigned 64-bit quantities, returning the
3276 * remainder in %edx:%eax.
3277 */
3278	ENTRY(__umoddi3)
3279	subl	$12, %esp
3280	movl	%esp, %ecx	/, tmp65
3281	movl	16(%esp), %eax	/ x, x
3282	movl	20(%esp), %edx	/ x, x
3283	pushl	%ecx		/ tmp65
3284	pushl	32(%esp)	/ y
3285	pushl	32(%esp)
3286	call	UDivRem
3287	movl	12(%esp), %eax	/ rem, rem
3288	movl	16(%esp), %edx	/ rem, rem
3289	addl	$24, %esp
3290	ret
3291	SET_SIZE(__umoddi3)
3292
3293/*
3294 * __divdi3
3295 *
3296 * Perform division of two signed 64-bit quantities, returning the
3297 * quotient in %edx:%eax.
3298 */
3299/ int64_t
3300/ __divdi3(int64_t x, int64_t y)
3301/ {
3302/ 	int		negative;
3303/ 	uint64_t	xt, yt, r;
3304/
3305/ 	if (x < 0) {
3306/ 		xt = -(uint64_t) x;
3307/ 		negative = 1;
3308/ 	} else {
3309/ 		xt = x;
3310/ 		negative = 0;
3311/ 	}
3312/ 	if (y < 0) {
3313/ 		yt = -(uint64_t) y;
3314/ 		negative ^= 1;
3315/ 	} else {
3316/ 		yt = y;
3317/ 	}
3318/ 	r = UDiv(xt, yt);
3319/ 	return (negative ? (int64_t) - r : r);
3320/ }
3321	ENTRY(__divdi3)
3322	pushl	%ebp
3323	pushl	%edi
3324	pushl	%esi
3325	subl	$8, %esp
3326	movl	28(%esp), %edx	/ x, x
3327	testl	%edx, %edx	/ x
3328	movl	24(%esp), %eax	/ x, x
3329	movl	32(%esp), %esi	/ y, y
3330	movl	36(%esp), %edi	/ y, y
3331	js	.LL55
3332	xorl	%ebp, %ebp	/ negative
3333	testl	%edi, %edi	/ y
3334	movl	%eax, (%esp)	/ x, xt
3335	movl	%edx, 4(%esp)	/ x, xt
3336	movl	%esi, %eax	/ y, yt
3337	movl	%edi, %edx	/ y, yt
3338	js	.LL56
3339.LL53:
3340	pushl	%edx		/ yt
3341	pushl	%eax		/ yt
3342	movl	8(%esp), %eax	/ xt, xt
3343	movl	12(%esp), %edx	/ xt, xt
3344	call	UDiv
3345	popl	%ecx
3346	testl	%ebp, %ebp	/ negative
3347	popl	%esi
3348	je	.LL54
3349	negl	%eax		/ r
3350	adcl	$0, %edx	/, r
3351	negl	%edx		/ r
3352.LL54:
3353	addl	$8, %esp
3354	popl	%esi
3355	popl	%edi
3356	popl	%ebp
3357	ret
3358	.align	16
3359.LL55:
3360	negl	%eax		/ x
3361	adcl	$0, %edx	/, x
3362	negl	%edx		/ x
3363	testl	%edi, %edi	/ y
3364	movl	%eax, (%esp)	/ x, xt
3365	movl	%edx, 4(%esp)	/ x, xt
3366	movl	$1, %ebp	/, negative
3367	movl	%esi, %eax	/ y, yt
3368	movl	%edi, %edx	/ y, yt
3369	jns	.LL53
3370	.align	16
3371.LL56:
3372	negl	%eax		/ yt
3373	adcl	$0, %edx	/, yt
3374	negl	%edx		/ yt
3375	xorl	$1, %ebp	/, negative
3376	jmp	.LL53
3377	SET_SIZE(__divdi3)
3378
3379/*
3380 * __moddi3
3381 *
3382 * Perform division of two signed 64-bit quantities, returning the
3383 * quotient in %edx:%eax.
3384 */
3385/ int64_t
3386/ __moddi3(int64_t x, int64_t y)
3387/ {
3388/ 	uint64_t	xt, yt, rem;
3389/
3390/ 	if (x < 0) {
3391/ 		xt = -(uint64_t) x;
3392/ 	} else {
3393/ 		xt = x;
3394/ 	}
3395/ 	if (y < 0) {
3396/ 		yt = -(uint64_t) y;
3397/ 	} else {
3398/ 		yt = y;
3399/ 	}
3400/ 	(void) UDivRem(xt, yt, &rem);
3401/ 	return (x < 0 ? (int64_t) - rem : rem);
3402/ }
3403	ENTRY(__moddi3)
3404	pushl	%edi
3405	pushl	%esi
3406	subl	$20, %esp
3407	movl	36(%esp), %ecx	/ x,
3408	movl	32(%esp), %esi	/ x,
3409	movl	36(%esp), %edi	/ x,
3410	testl	%ecx, %ecx
3411	movl	40(%esp), %eax	/ y, y
3412	movl	44(%esp), %edx	/ y, y
3413	movl	%esi, (%esp)	/, xt
3414	movl	%edi, 4(%esp)	/, xt
3415	js	.LL63
3416	testl	%edx, %edx	/ y
3417	movl	%eax, %esi	/ y, yt
3418	movl	%edx, %edi	/ y, yt
3419	js	.LL64
3420.LL61:
3421	leal	8(%esp), %eax	/, tmp66
3422	pushl	%eax		/ tmp66
3423	pushl	%edi		/ yt
3424	pushl	%esi		/ yt
3425	movl	12(%esp), %eax	/ xt, xt
3426	movl	16(%esp), %edx	/ xt, xt
3427	call	UDivRem
3428	addl	$12, %esp
3429	movl	36(%esp), %edi	/ x,
3430	testl	%edi, %edi
3431	movl	8(%esp), %eax	/ rem, rem
3432	movl	12(%esp), %edx	/ rem, rem
3433	js	.LL65
3434	addl	$20, %esp
3435	popl	%esi
3436	popl	%edi
3437	ret
3438	.align	16
3439.LL63:
3440	negl	%esi
3441	adcl	$0, %edi
3442	negl	%edi
3443	testl	%edx, %edx	/ y
3444	movl	%esi, (%esp)	/, xt
3445	movl	%edi, 4(%esp)	/, xt
3446	movl	%eax, %esi	/ y, yt
3447	movl	%edx, %edi	/ y, yt
3448	jns	.LL61
3449	.align	16
3450.LL64:
3451	negl	%esi		/ yt
3452	adcl	$0, %edi	/, yt
3453	negl	%edi		/ yt
3454	jmp	.LL61
3455	.align	16
3456.LL65:
3457	negl	%eax		/ rem
3458	adcl	$0, %edx	/, rem
3459	addl	$20, %esp
3460	popl	%esi
3461	negl	%edx		/ rem
3462	popl	%edi
3463	ret
3464	SET_SIZE(__moddi3)
3465
3466/*
3467 * __udiv64
3468 *
3469 * Perform division of two unsigned 64-bit quantities, returning the
3470 * quotient in %edx:%eax.  __udiv64 pops the arguments on return,
3471 */
3472	ENTRY(__udiv64)
3473	movl	4(%esp), %eax	/ x, x
3474	movl	8(%esp), %edx	/ x, x
3475	pushl	16(%esp)	/ y
3476	pushl	16(%esp)
3477	call	UDiv
3478	addl	$8, %esp
3479	ret     $16
3480	SET_SIZE(__udiv64)
3481
3482/*
3483 * __urem64
3484 *
3485 * Perform division of two unsigned 64-bit quantities, returning the
3486 * remainder in %edx:%eax.  __urem64 pops the arguments on return
3487 */
3488	ENTRY(__urem64)
3489	subl	$12, %esp
3490	movl	%esp, %ecx	/, tmp65
3491	movl	16(%esp), %eax	/ x, x
3492	movl	20(%esp), %edx	/ x, x
3493	pushl	%ecx		/ tmp65
3494	pushl	32(%esp)	/ y
3495	pushl	32(%esp)
3496	call	UDivRem
3497	movl	12(%esp), %eax	/ rem, rem
3498	movl	16(%esp), %edx	/ rem, rem
3499	addl	$24, %esp
3500	ret	$16
3501	SET_SIZE(__urem64)
3502
3503/*
3504 * __div64
3505 *
3506 * Perform division of two signed 64-bit quantities, returning the
3507 * quotient in %edx:%eax.  __div64 pops the arguments on return.
3508 */
3509/ int64_t
3510/ __div64(int64_t x, int64_t y)
3511/ {
3512/ 	int		negative;
3513/ 	uint64_t	xt, yt, r;
3514/
3515/ 	if (x < 0) {
3516/ 		xt = -(uint64_t) x;
3517/ 		negative = 1;
3518/ 	} else {
3519/ 		xt = x;
3520/ 		negative = 0;
3521/ 	}
3522/ 	if (y < 0) {
3523/ 		yt = -(uint64_t) y;
3524/ 		negative ^= 1;
3525/ 	} else {
3526/ 		yt = y;
3527/ 	}
3528/ 	r = UDiv(xt, yt);
3529/ 	return (negative ? (int64_t) - r : r);
3530/ }
3531	ENTRY(__div64)
3532	pushl	%ebp
3533	pushl	%edi
3534	pushl	%esi
3535	subl	$8, %esp
3536	movl	28(%esp), %edx	/ x, x
3537	testl	%edx, %edx	/ x
3538	movl	24(%esp), %eax	/ x, x
3539	movl	32(%esp), %esi	/ y, y
3540	movl	36(%esp), %edi	/ y, y
3541	js	.LL84
3542	xorl	%ebp, %ebp	/ negative
3543	testl	%edi, %edi	/ y
3544	movl	%eax, (%esp)	/ x, xt
3545	movl	%edx, 4(%esp)	/ x, xt
3546	movl	%esi, %eax	/ y, yt
3547	movl	%edi, %edx	/ y, yt
3548	js	.LL85
3549.LL82:
3550	pushl	%edx		/ yt
3551	pushl	%eax		/ yt
3552	movl	8(%esp), %eax	/ xt, xt
3553	movl	12(%esp), %edx	/ xt, xt
3554	call	UDiv
3555	popl	%ecx
3556	testl	%ebp, %ebp	/ negative
3557	popl	%esi
3558	je	.LL83
3559	negl	%eax		/ r
3560	adcl	$0, %edx	/, r
3561	negl	%edx		/ r
3562.LL83:
3563	addl	$8, %esp
3564	popl	%esi
3565	popl	%edi
3566	popl	%ebp
3567	ret	$16
3568	.align	16
3569.LL84:
3570	negl	%eax		/ x
3571	adcl	$0, %edx	/, x
3572	negl	%edx		/ x
3573	testl	%edi, %edi	/ y
3574	movl	%eax, (%esp)	/ x, xt
3575	movl	%edx, 4(%esp)	/ x, xt
3576	movl	$1, %ebp	/, negative
3577	movl	%esi, %eax	/ y, yt
3578	movl	%edi, %edx	/ y, yt
3579	jns	.LL82
3580	.align	16
3581.LL85:
3582	negl	%eax		/ yt
3583	adcl	$0, %edx	/, yt
3584	negl	%edx		/ yt
3585	xorl	$1, %ebp	/, negative
3586	jmp	.LL82
3587	SET_SIZE(__div64)
3588
3589/*
3590 * __rem64
3591 *
3592 * Perform division of two signed 64-bit quantities, returning the
3593 * remainder in %edx:%eax.  __rem64 pops the arguments on return.
3594 */
3595/ int64_t
3596/ __rem64(int64_t x, int64_t y)
3597/ {
3598/ 	uint64_t	xt, yt, rem;
3599/
3600/ 	if (x < 0) {
3601/ 		xt = -(uint64_t) x;
3602/ 	} else {
3603/ 		xt = x;
3604/ 	}
3605/ 	if (y < 0) {
3606/ 		yt = -(uint64_t) y;
3607/ 	} else {
3608/ 		yt = y;
3609/ 	}
3610/ 	(void) UDivRem(xt, yt, &rem);
3611/ 	return (x < 0 ? (int64_t) - rem : rem);
3612/ }
3613	ENTRY(__rem64)
3614	pushl	%edi
3615	pushl	%esi
3616	subl	$20, %esp
3617	movl	36(%esp), %ecx	/ x,
3618	movl	32(%esp), %esi	/ x,
3619	movl	36(%esp), %edi	/ x,
3620	testl	%ecx, %ecx
3621	movl	40(%esp), %eax	/ y, y
3622	movl	44(%esp), %edx	/ y, y
3623	movl	%esi, (%esp)	/, xt
3624	movl	%edi, 4(%esp)	/, xt
3625	js	.LL92
3626	testl	%edx, %edx	/ y
3627	movl	%eax, %esi	/ y, yt
3628	movl	%edx, %edi	/ y, yt
3629	js	.LL93
3630.LL90:
3631	leal	8(%esp), %eax	/, tmp66
3632	pushl	%eax		/ tmp66
3633	pushl	%edi		/ yt
3634	pushl	%esi		/ yt
3635	movl	12(%esp), %eax	/ xt, xt
3636	movl	16(%esp), %edx	/ xt, xt
3637	call	UDivRem
3638	addl	$12, %esp
3639	movl	36(%esp), %edi	/ x,
3640	testl	%edi, %edi
3641	movl	8(%esp), %eax	/ rem, rem
3642	movl	12(%esp), %edx	/ rem, rem
3643	js	.LL94
3644	addl	$20, %esp
3645	popl	%esi
3646	popl	%edi
3647	ret	$16
3648	.align	16
3649.LL92:
3650	negl	%esi
3651	adcl	$0, %edi
3652	negl	%edi
3653	testl	%edx, %edx	/ y
3654	movl	%esi, (%esp)	/, xt
3655	movl	%edi, 4(%esp)	/, xt
3656	movl	%eax, %esi	/ y, yt
3657	movl	%edx, %edi	/ y, yt
3658	jns	.LL90
3659	.align	16
3660.LL93:
3661	negl	%esi		/ yt
3662	adcl	$0, %edi	/, yt
3663	negl	%edi		/ yt
3664	jmp	.LL90
3665	.align	16
3666.LL94:
3667	negl	%eax		/ rem
3668	adcl	$0, %edx	/, rem
3669	addl	$20, %esp
3670	popl	%esi
3671	negl	%edx		/ rem
3672	popl	%edi
3673	ret	$16
3674	SET_SIZE(__rem64)
3675
3676/*
3677 * __udivrem64
3678 *
3679 * Perform division of two unsigned 64-bit quantities, returning the
3680 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __udivrem64
3681 * pops the arguments on return.
3682 */
3683	ENTRY(__udivrem64)
3684	subl	$12, %esp
3685	movl	%esp, %ecx	/, tmp64
3686	movl	16(%esp), %eax	/ x, x
3687	movl	20(%esp), %edx	/ x, x
3688	pushl	%ecx		/ tmp64
3689	pushl	32(%esp)	/ y
3690	pushl	32(%esp)
3691	call	UDivRem
3692	movl	16(%esp), %ecx	/ rem, tmp63
3693	movl	12(%esp), %esi	/ rem
3694	addl	$24, %esp
3695	ret	$16
3696	SET_SIZE(__udivrem64)
3697
3698/*
3699 * Signed division with remainder.
3700 */
3701/ int64_t
3702/ SDivRem(int64_t x, int64_t y, int64_t * pmod)
3703/ {
3704/ 	int		negative;
3705/ 	uint64_t	xt, yt, r, rem;
3706/
3707/ 	if (x < 0) {
3708/ 		xt = -(uint64_t) x;
3709/ 		negative = 1;
3710/ 	} else {
3711/ 		xt = x;
3712/ 		negative = 0;
3713/ 	}
3714/ 	if (y < 0) {
3715/ 		yt = -(uint64_t) y;
3716/ 		negative ^= 1;
3717/ 	} else {
3718/ 		yt = y;
3719/ 	}
3720/ 	r = UDivRem(xt, yt, &rem);
3721/ 	*pmod = (x < 0 ? (int64_t) - rem : rem);
3722/ 	return (negative ? (int64_t) - r : r);
3723/ }
3724	ENTRY(SDivRem)
3725	pushl	%ebp
3726	pushl	%edi
3727	pushl	%esi
3728	subl	$24, %esp
3729	testl	%edx, %edx	/ x
3730	movl	%edx, %edi	/ x, x
3731	js	.LL73
3732	movl	44(%esp), %esi	/ y,
3733	xorl	%ebp, %ebp	/ negative
3734	testl	%esi, %esi
3735	movl	%edx, 12(%esp)	/ x, xt
3736	movl	%eax, 8(%esp)	/ x, xt
3737	movl	40(%esp), %edx	/ y, yt
3738	movl	44(%esp), %ecx	/ y, yt
3739	js	.LL74
3740.LL70:
3741	leal	16(%esp), %eax	/, tmp70
3742	pushl	%eax		/ tmp70
3743	pushl	%ecx		/ yt
3744	pushl	%edx		/ yt
3745	movl	20(%esp), %eax	/ xt, xt
3746	movl	24(%esp), %edx	/ xt, xt
3747	call	UDivRem
3748	movl	%edx, 16(%esp)	/, r
3749	movl	%eax, 12(%esp)	/, r
3750	addl	$12, %esp
3751	testl	%edi, %edi	/ x
3752	movl	16(%esp), %edx	/ rem, rem
3753	movl	20(%esp), %ecx	/ rem, rem
3754	js	.LL75
3755.LL71:
3756	movl	48(%esp), %edi	/ pmod, pmod
3757	testl	%ebp, %ebp	/ negative
3758	movl	%edx, (%edi)	/ rem,* pmod
3759	movl	%ecx, 4(%edi)	/ rem,
3760	movl	(%esp), %eax	/ r, r
3761	movl	4(%esp), %edx	/ r, r
3762	je	.LL72
3763	negl	%eax		/ r
3764	adcl	$0, %edx	/, r
3765	negl	%edx		/ r
3766.LL72:
3767	addl	$24, %esp
3768	popl	%esi
3769	popl	%edi
3770	popl	%ebp
3771	ret
3772	.align	16
3773.LL73:
3774	negl	%eax
3775	adcl	$0, %edx
3776	movl	44(%esp), %esi	/ y,
3777	negl	%edx
3778	testl	%esi, %esi
3779	movl	%edx, 12(%esp)	/, xt
3780	movl	%eax, 8(%esp)	/, xt
3781	movl	$1, %ebp	/, negative
3782	movl	40(%esp), %edx	/ y, yt
3783	movl	44(%esp), %ecx	/ y, yt
3784	jns	.LL70
3785	.align	16
3786.LL74:
3787	negl	%edx		/ yt
3788	adcl	$0, %ecx	/, yt
3789	negl	%ecx		/ yt
3790	xorl	$1, %ebp	/, negative
3791	jmp	.LL70
3792	.align	16
3793.LL75:
3794	negl	%edx		/ rem
3795	adcl	$0, %ecx	/, rem
3796	negl	%ecx		/ rem
3797	jmp	.LL71
3798	SET_SIZE(SDivRem)
3799
3800/*
3801 * __divrem64
3802 *
3803 * Perform division of two signed 64-bit quantities, returning the
3804 * quotient in %edx:%eax, and the remainder in %ecx:%esi.  __divrem64
3805 * pops the arguments on return.
3806 */
3807	ENTRY(__divrem64)
3808	subl	$20, %esp
3809	movl	%esp, %ecx	/, tmp64
3810	movl	24(%esp), %eax	/ x, x
3811	movl	28(%esp), %edx	/ x, x
3812	pushl	%ecx		/ tmp64
3813	pushl	40(%esp)	/ y
3814	pushl	40(%esp)
3815	call	SDivRem
3816	movl	16(%esp), %ecx
3817	movl	12(%esp),%esi	/ rem
3818	addl	$32, %esp
3819	ret	$16
3820	SET_SIZE(__divrem64)
3821
3822#endif	/* __lint */
3823#endif	/* __i386 */
3824
3825#if defined(notused)
3826#if defined(__lint)
3827/* ARGSUSED */
3828void
3829load_pte64(uint64_t *pte, uint64_t pte_value)
3830{}
3831#else	/* __lint */
3832	.globl load_pte64
3833load_pte64:
3834	movl	4(%esp), %eax
3835	movl	8(%esp), %ecx
3836	movl	12(%esp), %edx
3837	movl	%edx, 4(%eax)
3838	movl	%ecx, (%eax)
3839	ret
3840#endif	/* __lint */
3841#endif	/* notused */
3842
3843#if defined(__lint)
3844
3845/*ARGSUSED*/
3846void
3847scan_memory(caddr_t addr, size_t size)
3848{}
3849
3850#else	/* __lint */
3851
3852#if defined(__amd64)
3853
3854	ENTRY(scan_memory)
3855	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
3856	jz	.scanm_done
3857	movq	%rsi, %rcx	/* move count into rep control register */
3858	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
3859	rep lodsq		/* scan the memory range */
3860.scanm_done:
3861	ret
3862	SET_SIZE(scan_memory)
3863
3864#elif defined(__i386)
3865
3866	ENTRY(scan_memory)
3867	pushl	%ecx
3868	pushl	%esi
3869	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
3870	shrl	$2, %ecx	/* convert from byte count to word count */
3871	jz	.scanm_done
3872	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
3873	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
3874	lodsl
3875.scanm_done:
3876	popl	%esi
3877	popl	%ecx
3878	ret
3879	SET_SIZE(scan_memory)
3880
3881#endif	/* __i386 */
3882#endif	/* __lint */
3883
3884
3885#if defined(__lint)
3886
3887/*ARGSUSED */
3888int
3889lowbit(ulong_t i)
3890{ return (0); }
3891
3892#else	/* __lint */
3893
3894#if defined(__amd64)
3895
3896	ENTRY(lowbit)
3897	movl	$-1, %eax
3898	bsfq	%rdi, %rax
3899	incl	%eax
3900	ret
3901	SET_SIZE(lowbit)
3902
3903#elif defined(__i386)
3904
3905	ENTRY(lowbit)
3906	movl	$-1, %eax
3907	bsfl	4(%esp), %eax
3908	incl	%eax
3909	ret
3910	SET_SIZE(lowbit)
3911
3912#endif	/* __i386 */
3913#endif	/* __lint */
3914
3915#if defined(__lint)
3916
3917/*ARGSUSED*/
3918int
3919highbit(ulong_t i)
3920{ return (0); }
3921
3922#else	/* __lint */
3923
3924#if defined(__amd64)
3925
3926	ENTRY(highbit)
3927	movl	$-1, %eax
3928	bsrq	%rdi, %rax
3929	incl	%eax
3930	ret
3931	SET_SIZE(highbit)
3932
3933#elif defined(__i386)
3934
3935	ENTRY(highbit)
3936	movl	$-1, %eax
3937	bsrl	4(%esp), %eax
3938	incl	%eax
3939	ret
3940	SET_SIZE(highbit)
3941
3942#endif	/* __i386 */
3943#endif	/* __lint */
3944
3945#if defined(__lint)
3946
3947/*ARGSUSED*/
3948uint64_t
3949rdmsr(uint_t r, uint64_t *mtr)
3950{ return (0); }
3951
3952/*ARGSUSED*/
3953void
3954wrmsr(uint_t r, const uint64_t *mtr)
3955{}
3956
3957void
3958invalidate_cache(void)
3959{}
3960
3961#else  /* __lint */
3962
3963#if defined(__amd64)
3964
3965	ENTRY(rdmsr)
3966	movl	%edi, %ecx
3967	rdmsr
3968	movl	%eax, (%rsi)
3969	movl	%edx, 4(%rsi)
3970	shlq	$32, %rdx
3971	orq	%rdx, %rax
3972	ret
3973	SET_SIZE(rdmsr)
3974
3975	ENTRY(wrmsr)
3976	movl	(%rsi), %eax
3977	movl	4(%rsi), %edx
3978	movl	%edi, %ecx
3979	wrmsr
3980	ret
3981	SET_SIZE(wrmsr)
3982
3983#elif defined(__i386)
3984
3985	ENTRY(rdmsr)
3986	movl	4(%esp), %ecx
3987	rdmsr
3988	movl	8(%esp), %ecx
3989	movl	%eax, (%ecx)
3990	movl	%edx, 4(%ecx)
3991	ret
3992	SET_SIZE(rdmsr)
3993
3994	ENTRY(wrmsr)
3995	movl	8(%esp), %ecx
3996	movl	(%ecx), %eax
3997	movl	4(%ecx), %edx
3998	movl	4(%esp), %ecx
3999	wrmsr
4000	ret
4001	SET_SIZE(wrmsr)
4002
4003#endif	/* __i386 */
4004
4005	ENTRY(invalidate_cache)
4006	wbinvd
4007	ret
4008	SET_SIZE(invalidate_cache)
4009
4010#endif	/* __lint */
4011
4012#if defined(__lint)
4013
4014/*ARGSUSED*/
4015void getcregs(struct cregs *crp)
4016{}
4017
4018#else	/* __lint */
4019
4020#if defined(__amd64)
4021
4022#define	GETMSR(r, off, d)	\
4023	movl	$r, %ecx;	\
4024	rdmsr;			\
4025	movl	%eax, off(d);	\
4026	movl	%edx, off+4(d)
4027
4028	ENTRY_NP(getcregs)
4029	xorl	%eax, %eax
4030	movq	%rax, CREG_GDT+8(%rdi)
4031	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
4032	movq	%rax, CREG_IDT+8(%rdi)
4033	sidt	CREG_IDT(%rdi)		/* 10 bytes */
4034	movq	%rax, CREG_LDT(%rdi)
4035	sldt	CREG_LDT(%rdi)		/* 2 bytes */
4036	movq	%rax, CREG_TASKR(%rdi)
4037	str	CREG_TASKR(%rdi)	/* 2 bytes */
4038	movq	%cr0, %rax
4039	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
4040	movq	%cr2, %rax
4041	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
4042	movq	%cr3, %rax
4043	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
4044	movq	%cr4, %rax
4045	movq	%rax, CREG_CR8(%rdi)	/* cr4 */
4046	movq	%cr8, %rax
4047	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
4048	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
4049	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
4050	SET_SIZE(getcregs)
4051
4052#undef GETMSR
4053
4054#elif defined(__i386)
4055
4056	ENTRY_NP(getcregs)
4057	movl	4(%esp), %edx
4058	movw	$0, CREG_GDT+6(%edx)
4059	movw	$0, CREG_IDT+6(%edx)
4060	sgdt	CREG_GDT(%edx)		/* gdt */
4061	sidt	CREG_IDT(%edx)		/* idt */
4062	sldt	CREG_LDT(%edx)		/* ldt */
4063	str	CREG_TASKR(%edx)	/* task */
4064	movl	%cr0, %eax
4065	movl	%eax, CREG_CR0(%edx)	/* cr0 */
4066	movl	%cr2, %eax
4067	movl	%eax, CREG_CR2(%edx)	/* cr2 */
4068	movl	%cr3, %eax
4069	movl	%eax, CREG_CR3(%edx)	/* cr3 */
4070	testl	$X86_LARGEPAGE, x86_feature
4071	jz	.nocr4
4072	movl	%cr4, %eax
4073	movl	%eax, CREG_CR4(%edx)	/* cr4 */
4074	jmp	.skip
4075.nocr4:
4076	movl	$0, CREG_CR4(%edx)
4077.skip:
4078	ret
4079	SET_SIZE(getcregs)
4080
4081#endif	/* __i386 */
4082#endif	/* __lint */
4083
4084
4085/*
4086 * A panic trigger is a word which is updated atomically and can only be set
4087 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
4088 * previous value was 0, we succeed and return 1; otherwise return 0.
4089 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
4090 * has its own version of this function to allow it to panic correctly from
4091 * probe context.
4092 */
4093#if defined(__lint)
4094
4095/*ARGSUSED*/
4096int
4097panic_trigger(int *tp)
4098{ return (0); }
4099
4100/*ARGSUSED*/
4101int
4102dtrace_panic_trigger(int *tp)
4103{ return (0); }
4104
4105#else	/* __lint */
4106
4107#if defined(__amd64)
4108
4109	ENTRY_NP(panic_trigger)
4110	xorl	%eax, %eax
4111	movl	$0xdefacedd, %edx
4112	lock
4113	  xchgl	%edx, (%rdi)
4114	cmpl	$0, %edx
4115	je	0f
4116	movl	$0, %eax
4117	ret
41180:	movl	$1, %eax
4119	ret
4120	SET_SIZE(panic_trigger)
4121
4122	ENTRY_NP(dtrace_panic_trigger)
4123	xorl	%eax, %eax
4124	movl	$0xdefacedd, %edx
4125	lock
4126	  xchgl	%edx, (%rdi)
4127	cmpl	$0, %edx
4128	je	0f
4129	movl	$0, %eax
4130	ret
41310:	movl	$1, %eax
4132	ret
4133	SET_SIZE(dtrace_panic_trigger)
4134
4135#elif defined(__i386)
4136
4137	ENTRY_NP(panic_trigger)
4138	movl	4(%esp), %edx		/ %edx = address of trigger
4139	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
4140	lock				/ assert lock
4141	xchgl %eax, (%edx)		/ exchange %eax and the trigger
4142	cmpl	$0, %eax		/ if (%eax == 0x0)
4143	je	0f			/   return (1);
4144	movl	$0, %eax		/ else
4145	ret				/   return (0);
41460:	movl	$1, %eax
4147	ret
4148	SET_SIZE(panic_trigger)
4149
4150	ENTRY_NP(dtrace_panic_trigger)
4151	movl	4(%esp), %edx		/ %edx = address of trigger
4152	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
4153	lock				/ assert lock
4154	xchgl %eax, (%edx)		/ exchange %eax and the trigger
4155	cmpl	$0, %eax		/ if (%eax == 0x0)
4156	je	0f			/   return (1);
4157	movl	$0, %eax		/ else
4158	ret				/   return (0);
41590:	movl	$1, %eax
4160	ret
4161	SET_SIZE(dtrace_panic_trigger)
4162
4163#endif	/* __i386 */
4164#endif	/* __lint */
4165
4166/*
4167 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
4168 * into the panic code implemented in panicsys().  vpanic() is responsible
4169 * for passing through the format string and arguments, and constructing a
4170 * regs structure on the stack into which it saves the current register
4171 * values.  If we are not dying due to a fatal trap, these registers will
4172 * then be preserved in panicbuf as the current processor state.  Before
4173 * invoking panicsys(), vpanic() activates the first panic trigger (see
4174 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
4175 * DTrace takes a slightly different panic path if it must panic from probe
4176 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
4177 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
4178 * branches back into vpanic().
4179 */
4180#if defined(__lint)
4181
4182/*ARGSUSED*/
4183void
4184vpanic(const char *format, va_list alist)
4185{}
4186
4187/*ARGSUSED*/
4188void
4189dtrace_vpanic(const char *format, va_list alist)
4190{}
4191
4192#else	/* __lint */
4193
4194#if defined(__amd64)
4195
4196	ENTRY_NP(vpanic)			/* Initial stack layout: */
4197
4198	pushq	%rbp				/* | %rip | 	0x60	*/
4199	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
4200	pushfq					/* | rfl  |	0x50	*/
4201	pushq	%r11				/* | %r11 |	0x48	*/
4202	pushq	%r10				/* | %r10 |	0x40	*/
4203	pushq	%rbx				/* | %rbx |	0x38	*/
4204	pushq	%rax				/* | %rax |	0x30	*/
4205	pushq	%r9				/* | %r9  |	0x28	*/
4206	pushq	%r8				/* | %r8  |	0x20	*/
4207	pushq	%rcx				/* | %rcx |	0x18	*/
4208	pushq	%rdx				/* | %rdx |	0x10	*/
4209	pushq	%rsi				/* | %rsi |	0x8 alist */
4210	pushq	%rdi				/* | %rdi |	0x0 format */
4211
4212	movq	%rsp, %rbx			/* %rbx = current %rsp */
4213
4214	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
4215	call	panic_trigger			/* %eax = panic_trigger() */
4216
4217vpanic_common:
4218	cmpl	$0, %eax
4219	je	0f
4220
4221	/*
4222	 * If panic_trigger() was successful, we are the first to initiate a
4223	 * panic: we now switch to the reserved panic_stack before continuing.
4224	 */
4225	leaq	panic_stack(%rip), %rsp
4226	addq	$PANICSTKSIZE, %rsp
42270:	subq	$REGSIZE, %rsp
4228	/*
4229	 * Now that we've got everything set up, store the register values as
4230	 * they were when we entered vpanic() to the designated location in
4231	 * the regs structure we allocated on the stack.
4232	 */
4233	movq	0x0(%rbx), %rcx
4234	movq	%rcx, REGOFF_RDI(%rsp)
4235	movq	0x8(%rbx), %rcx
4236	movq	%rcx, REGOFF_RSI(%rsp)
4237	movq	0x10(%rbx), %rcx
4238	movq	%rcx, REGOFF_RDX(%rsp)
4239	movq	0x18(%rbx), %rcx
4240	movq	%rcx, REGOFF_RCX(%rsp)
4241	movq	0x20(%rbx), %rcx
4242
4243	movq	%rcx, REGOFF_R8(%rsp)
4244	movq	0x28(%rbx), %rcx
4245	movq	%rcx, REGOFF_R9(%rsp)
4246	movq	0x30(%rbx), %rcx
4247	movq	%rcx, REGOFF_RAX(%rsp)
4248	movq	0x38(%rbx), %rcx
4249	movq	%rbx, REGOFF_RBX(%rsp)
4250	movq	0x58(%rbx), %rcx
4251
4252	movq	%rcx, REGOFF_RBP(%rsp)
4253	movq	0x40(%rbx), %rcx
4254	movq	%rcx, REGOFF_R10(%rsp)
4255	movq	0x48(%rbx), %rcx
4256	movq	%rcx, REGOFF_R11(%rsp)
4257	movq	%r12, REGOFF_R12(%rsp)
4258
4259	movq	%r13, REGOFF_R13(%rsp)
4260	movq	%r14, REGOFF_R14(%rsp)
4261	movq	%r15, REGOFF_R15(%rsp)
4262
4263	movl	$MSR_AMD_FSBASE, %ecx
4264	rdmsr
4265	movl	%eax, REGOFF_FSBASE(%rsp)
4266	movl	%edx, REGOFF_FSBASE+4(%rsp)
4267
4268	movl	$MSR_AMD_GSBASE, %ecx
4269	rdmsr
4270	movl	%eax, REGOFF_GSBASE(%rsp)
4271	movl	%edx, REGOFF_GSBASE+4(%rsp)
4272
4273	xorl	%ecx, %ecx
4274	movw	%ds, %cx
4275	movq	%rcx, REGOFF_DS(%rsp)
4276	movw	%es, %cx
4277	movq	%rcx, REGOFF_ES(%rsp)
4278	movw	%fs, %cx
4279	movq	%rcx, REGOFF_FS(%rsp)
4280	movw	%gs, %cx
4281	movq	%rcx, REGOFF_GS(%rsp)
4282
4283	movq	$0, REGOFF_TRAPNO(%rsp)
4284
4285	movq	$0, REGOFF_ERR(%rsp)
4286	leaq	vpanic(%rip), %rcx
4287	movq	%rcx, REGOFF_RIP(%rsp)
4288	movw	%cs, %cx
4289	movzwq	%cx, %rcx
4290	movq	%rcx, REGOFF_CS(%rsp)
4291	movq	0x50(%rbx), %rcx
4292	movq	%rcx, REGOFF_RFL(%rsp)
4293	movq	%rbx, %rcx
4294	addq	$0x60, %rcx
4295	movq	%rcx, REGOFF_RSP(%rsp)
4296	movw	%ss, %cx
4297	movzwq	%cx, %rcx
4298	movq	%rcx, REGOFF_SS(%rsp)
4299
4300	/*
4301	 * panicsys(format, alist, rp, on_panic_stack)
4302	 */
4303	movq	REGOFF_RDI(%rsp), %rdi		/* format */
4304	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
4305	movq	%rsp, %rdx			/* struct regs */
4306	movl	%eax, %ecx			/* on_panic_stack */
4307	call	panicsys
4308	addq	$REGSIZE, %rsp
4309	popq	%rdi
4310	popq	%rsi
4311	popq	%rdx
4312	popq	%rcx
4313	popq	%r8
4314	popq	%r9
4315	popq	%rax
4316	popq	%rbx
4317	popq	%r10
4318	popq	%r11
4319	popfq
4320	leave
4321	ret
4322	SET_SIZE(vpanic)
4323
4324	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
4325
4326	pushq	%rbp				/* | %rip | 	0x60	*/
4327	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
4328	pushfq					/* | rfl  |	0x50	*/
4329	pushq	%r11				/* | %r11 |	0x48	*/
4330	pushq	%r10				/* | %r10 |	0x40	*/
4331	pushq	%rbx				/* | %rbx |	0x38	*/
4332	pushq	%rax				/* | %rax |	0x30	*/
4333	pushq	%r9				/* | %r9  |	0x28	*/
4334	pushq	%r8				/* | %r8  |	0x20	*/
4335	pushq	%rcx				/* | %rcx |	0x18	*/
4336	pushq	%rdx				/* | %rdx |	0x10	*/
4337	pushq	%rsi				/* | %rsi |	0x8 alist */
4338	pushq	%rdi				/* | %rdi |	0x0 format */
4339
4340	movq	%rsp, %rbx			/* %rbx = current %rsp */
4341
4342	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
4343	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
4344	jmp	vpanic_common
4345
4346	SET_SIZE(dtrace_vpanic)
4347
4348#elif defined(__i386)
4349
4350	ENTRY_NP(vpanic)			/ Initial stack layout:
4351
4352	pushl	%ebp				/ | %eip | 20
4353	movl	%esp, %ebp			/ | %ebp | 16
4354	pushl	%eax				/ | %eax | 12
4355	pushl	%ebx				/ | %ebx |  8
4356	pushl	%ecx				/ | %ecx |  4
4357	pushl	%edx				/ | %edx |  0
4358
4359	movl	%esp, %ebx			/ %ebx = current stack pointer
4360
4361	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
4362	pushl	%eax				/ push &panic_quiesce
4363	call	panic_trigger			/ %eax = panic_trigger()
4364	addl	$4, %esp			/ reset stack pointer
4365
4366vpanic_common:
4367	cmpl	$0, %eax			/ if (%eax == 0)
4368	je	0f				/   goto 0f;
4369
4370	/*
4371	 * If panic_trigger() was successful, we are the first to initiate a
4372	 * panic: we now switch to the reserved panic_stack before continuing.
4373	 */
4374	lea	panic_stack, %esp		/ %esp  = panic_stack
4375	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
4376
43770:	subl	$REGSIZE, %esp			/ allocate struct regs
4378
4379	/*
4380	 * Now that we've got everything set up, store the register values as
4381	 * they were when we entered vpanic() to the designated location in
4382	 * the regs structure we allocated on the stack.
4383	 */
4384#if !defined(__GNUC_AS__)
4385	movw	%gs, %edx
4386	movl	%edx, REGOFF_GS(%esp)
4387	movw	%fs, %edx
4388	movl	%edx, REGOFF_FS(%esp)
4389	movw	%es, %edx
4390	movl	%edx, REGOFF_ES(%esp)
4391	movw	%ds, %edx
4392	movl	%edx, REGOFF_DS(%esp)
4393#else	/* __GNUC_AS__ */
4394	mov	%gs, %edx
4395	mov	%edx, REGOFF_GS(%esp)
4396	mov	%fs, %edx
4397	mov	%edx, REGOFF_FS(%esp)
4398	mov	%es, %edx
4399	mov	%edx, REGOFF_ES(%esp)
4400	mov	%ds, %edx
4401	mov	%edx, REGOFF_DS(%esp)
4402#endif	/* __GNUC_AS__ */
4403	movl	%edi, REGOFF_EDI(%esp)
4404	movl	%esi, REGOFF_ESI(%esp)
4405	movl	16(%ebx), %ecx
4406	movl	%ecx, REGOFF_EBP(%esp)
4407	movl	%ebx, %ecx
4408	addl	$20, %ecx
4409	movl	%ecx, REGOFF_ESP(%esp)
4410	movl	8(%ebx), %ecx
4411	movl	%ecx, REGOFF_EBX(%esp)
4412	movl	0(%ebx), %ecx
4413	movl	%ecx, REGOFF_EDX(%esp)
4414	movl	4(%ebx), %ecx
4415	movl	%ecx, REGOFF_ECX(%esp)
4416	movl	12(%ebx), %ecx
4417	movl	%ecx, REGOFF_EAX(%esp)
4418	movl	$0, REGOFF_TRAPNO(%esp)
4419	movl	$0, REGOFF_ERR(%esp)
4420	lea	vpanic, %ecx
4421	movl	%ecx, REGOFF_EIP(%esp)
4422#if !defined(__GNUC_AS__)
4423	movw	%cs, %edx
4424#else	/* __GNUC_AS__ */
4425	mov	%cs, %edx
4426#endif	/* __GNUC_AS__ */
4427	movl	%edx, REGOFF_CS(%esp)
4428	pushfl
4429	popl	%ecx
4430	movl	%ecx, REGOFF_EFL(%esp)
4431	movl	$0, REGOFF_UESP(%esp)
4432#if !defined(__GNUC_AS__)
4433	movw	%ss, %edx
4434#else	/* __GNUC_AS__ */
4435	mov	%ss, %edx
4436#endif	/* __GNUC_AS__ */
4437	movl	%edx, REGOFF_SS(%esp)
4438
4439	movl	%esp, %ecx			/ %ecx = &regs
4440	pushl	%eax				/ push on_panic_stack
4441	pushl	%ecx				/ push &regs
4442	movl	12(%ebp), %ecx			/ %ecx = alist
4443	pushl	%ecx				/ push alist
4444	movl	8(%ebp), %ecx			/ %ecx = format
4445	pushl	%ecx				/ push format
4446	call	panicsys			/ panicsys();
4447	addl	$16, %esp			/ pop arguments
4448
4449	addl	$REGSIZE, %esp
4450	popl	%edx
4451	popl	%ecx
4452	popl	%ebx
4453	popl	%eax
4454	leave
4455	ret
4456	SET_SIZE(vpanic)
4457
4458	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
4459
4460	pushl	%ebp				/ | %eip | 20
4461	movl	%esp, %ebp			/ | %ebp | 16
4462	pushl	%eax				/ | %eax | 12
4463	pushl	%ebx				/ | %ebx |  8
4464	pushl	%ecx				/ | %ecx |  4
4465	pushl	%edx				/ | %edx |  0
4466
4467	movl	%esp, %ebx			/ %ebx = current stack pointer
4468
4469	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
4470	pushl	%eax				/ push &panic_quiesce
4471	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
4472	addl	$4, %esp			/ reset stack pointer
4473	jmp	vpanic_common			/ jump back to common code
4474
4475	SET_SIZE(dtrace_vpanic)
4476
4477#endif	/* __i386 */
4478#endif	/* __lint */
4479
4480#if defined(__lint)
4481
4482void
4483hres_tick(void)
4484{}
4485
4486int64_t timedelta;
4487hrtime_t hres_last_tick;
4488timestruc_t hrestime;
4489int64_t hrestime_adj;
4490volatile int hres_lock;
4491uint_t nsec_scale;
4492hrtime_t hrtime_base;
4493
4494#else	/* __lint */
4495
4496	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
4497	.NWORD	0, 0
4498
4499	DGDEF3(hrestime_adj, 8, 8)
4500	.long	0, 0
4501
4502	DGDEF3(hres_last_tick, 8, 8)
4503	.long	0, 0
4504
4505	DGDEF3(timedelta, 8, 8)
4506	.long	0, 0
4507
4508	DGDEF3(hres_lock, 4, 8)
4509	.long	0
4510
4511	/*
4512	 * initialized to a non zero value to make pc_gethrtime()
4513	 * work correctly even before clock is initialized
4514	 */
4515	DGDEF3(hrtime_base, 8, 8)
4516	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
4517
4518	DGDEF3(adj_shift, 4, 4)
4519	.long	ADJ_SHIFT
4520
4521#if defined(__amd64)
4522
4523	ENTRY_NP(hres_tick)
4524	pushq	%rbp
4525	movq	%rsp, %rbp
4526
4527	/*
4528	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
4529	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
4530	 * At worst, performing this now instead of under CLOCK_LOCK may
4531	 * introduce some jitter in pc_gethrestime().
4532	 */
4533	call	*gethrtimef(%rip)
4534	movq	%rax, %r8
4535
4536	leaq	hres_lock(%rip), %rax
4537	movb	$-1, %dl
4538.CL1:
4539	xchgb	%dl, (%rax)
4540	testb	%dl, %dl
4541	jz	.CL3			/* got it */
4542.CL2:
4543	cmpb	$0, (%rax)		/* possible to get lock? */
4544	pause
4545	jne	.CL2
4546	jmp	.CL1			/* yes, try again */
4547.CL3:
4548	/*
4549	 * compute the interval since last time hres_tick was called
4550	 * and adjust hrtime_base and hrestime accordingly
4551	 * hrtime_base is an 8 byte value (in nsec), hrestime is
4552	 * a timestruc_t (sec, nsec)
4553	 */
4554	leaq	hres_last_tick(%rip), %rax
4555	movq	%r8, %r11
4556	subq	(%rax), %r8
4557	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
4558	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
4559	/*
4560	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
4561	 */
4562	movq	%r11, (%rax)
4563
4564	call	__adj_hrestime
4565
4566	/*
4567	 * release the hres_lock
4568	 */
4569	incl	hres_lock(%rip)
4570	leave
4571	ret
4572	SET_SIZE(hres_tick)
4573
4574#elif defined(__i386)
4575
4576	ENTRY_NP(hres_tick)
4577	pushl	%ebp
4578	movl	%esp, %ebp
4579	pushl	%esi
4580	pushl	%ebx
4581
4582	/*
4583	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
4584	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
4585	 * At worst, performing this now instead of under CLOCK_LOCK may
4586	 * introduce some jitter in pc_gethrestime().
4587	 */
4588	call	*gethrtimef
4589	movl	%eax, %ebx
4590	movl	%edx, %esi
4591
4592	movl	$hres_lock, %eax
4593	movl	$-1, %edx
4594.CL1:
4595	xchgb	%dl, (%eax)
4596	testb	%dl, %dl
4597	jz	.CL3			/ got it
4598.CL2:
4599	cmpb	$0, (%eax)		/ possible to get lock?
4600	pause
4601	jne	.CL2
4602	jmp	.CL1			/ yes, try again
4603.CL3:
4604	/*
4605	 * compute the interval since last time hres_tick was called
4606	 * and adjust hrtime_base and hrestime accordingly
4607	 * hrtime_base is an 8 byte value (in nsec), hrestime is
4608	 * timestruc_t (sec, nsec)
4609	 */
4610
4611	lea	hres_last_tick, %eax
4612
4613	movl	%ebx, %edx
4614	movl	%esi, %ecx
4615
4616	subl 	(%eax), %edx
4617	sbbl 	4(%eax), %ecx
4618
4619	addl	%edx, hrtime_base	/ add interval to hrtime_base
4620	adcl	%ecx, hrtime_base+4
4621
4622	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
4623
4624	/
4625	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
4626	/
4627	movl	%ebx, (%eax)
4628	movl	%esi,  4(%eax)
4629
4630	/ get hrestime at this moment. used as base for pc_gethrestime
4631	/
4632	/ Apply adjustment, if any
4633	/
4634	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
4635	/ (max_hres_adj)
4636	/
4637	/ void
4638	/ adj_hrestime()
4639	/ {
4640	/	long long adj;
4641	/
4642	/	if (hrestime_adj == 0)
4643	/		adj = 0;
4644	/	else if (hrestime_adj > 0) {
4645	/		if (hrestime_adj < HRES_ADJ)
4646	/			adj = hrestime_adj;
4647	/		else
4648	/			adj = HRES_ADJ;
4649	/	}
4650	/	else {
4651	/		if (hrestime_adj < -(HRES_ADJ))
4652	/			adj = -(HRES_ADJ);
4653	/		else
4654	/			adj = hrestime_adj;
4655	/	}
4656	/
4657	/	timedelta -= adj;
4658	/	hrestime_adj = timedelta;
4659	/	hrestime.tv_nsec += adj;
4660	/
4661	/	while (hrestime.tv_nsec >= NANOSEC) {
4662	/		one_sec++;
4663	/		hrestime.tv_sec++;
4664	/		hrestime.tv_nsec -= NANOSEC;
4665	/	}
4666	/ }
4667__adj_hrestime:
4668	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
4669	movl	hrestime_adj+4, %edx
4670	andl	%esi, %esi
4671	jne	.CL4			/ no
4672	andl	%edx, %edx
4673	jne	.CL4			/ no
4674	subl	%ecx, %ecx		/ yes, adj = 0;
4675	subl	%edx, %edx
4676	jmp	.CL5
4677.CL4:
4678	subl	%ecx, %ecx
4679	subl	%eax, %eax
4680	subl	%esi, %ecx
4681	sbbl	%edx, %eax
4682	andl	%eax, %eax		/ if (hrestime_adj > 0)
4683	jge	.CL6
4684
4685	/ In the following comments, HRES_ADJ is used, while in the code
4686	/ max_hres_adj is used.
4687	/
4688	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
4689	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
4690	/ on the logical equivalence of:
4691	/
4692	/	!(hrestime_adj < HRES_ADJ)
4693	/
4694	/ and the two step sequence:
4695	/
4696	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
4697	/
4698	/ which computes whether or not the least significant 32-bits
4699	/ of hrestime_adj is greater than HRES_ADJ, followed by:
4700	/
4701	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
4702	/
4703	/ which generates a carry whenever step 1 is true or the most
4704	/ significant long of the longlong hrestime_adj is non-zero.
4705
4706	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
4707	subl	%esi, %ecx
4708	movl	%edx, %eax
4709	adcl	$-1, %eax
4710	jnc	.CL7
4711	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
4712	subl	%edx, %edx
4713	jmp	.CL5
4714
4715	/ The following computation is similar to the one above.
4716	/
4717	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
4718	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
4719	/ on the logical equivalence of:
4720	/
4721	/	(hrestime_adj > -HRES_ADJ)
4722	/
4723	/ and the two step sequence:
4724	/
4725	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
4726	/
4727	/ which means the least significant 32-bits of hrestime_adj is
4728	/ greater than -HRES_ADJ, followed by:
4729	/
4730	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
4731	/
4732	/ which generates a carry only when step 1 is true and the most
4733	/ significant long of the longlong hrestime_adj is -1.
4734
4735.CL6:					/ hrestime_adj is negative
4736	movl	%esi, %ecx
4737	addl	max_hres_adj, %ecx
4738	movl	%edx, %eax
4739	adcl	$0, %eax
4740	jc	.CL7
4741	xor	%ecx, %ecx
4742	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
4743	movl	$-1, %edx
4744	jmp	.CL5
4745.CL7:
4746	movl	%esi, %ecx		/ adj = hrestime_adj;
4747.CL5:
4748	movl	timedelta, %esi
4749	subl	%ecx, %esi
4750	movl	timedelta+4, %eax
4751	sbbl	%edx, %eax
4752	movl	%esi, timedelta
4753	movl	%eax, timedelta+4	/ timedelta -= adj;
4754	movl	%esi, hrestime_adj
4755	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
4756	addl	hrestime+4, %ecx
4757
4758	movl	%ecx, %eax		/ eax = tv_nsec
47591:
4760	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
4761	jb	.CL8			/ no
4762	incl	one_sec			/ yes,  one_sec++;
4763	incl	hrestime		/ hrestime.tv_sec++;
4764	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
4765	jmp	1b			/ check for more seconds
4766
4767.CL8:
4768	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
4769	incl	hres_lock		/ release the hres_lock
4770
4771	popl	%ebx
4772	popl	%esi
4773	leave
4774	ret
4775	SET_SIZE(hres_tick)
4776
4777#endif	/* __i386 */
4778#endif	/* __lint */
4779
4780/*
4781 * void prefetch_smap_w(void *)
4782 *
4783 * Prefetch ahead within a linear list of smap structures.
4784 * Not implemented for ia32.  Stub for compatibility.
4785 */
4786
4787#if defined(__lint)
4788
4789/*ARGSUSED*/
4790void prefetch_smap_w(void *smp)
4791{}
4792
4793#else	/* __lint */
4794
4795	ENTRY(prefetch_smap_w)
4796	ret
4797	SET_SIZE(prefetch_smap_w)
4798
4799#endif	/* __lint */
4800
4801/*
4802 * prefetch_page_r(page_t *)
4803 * issue prefetch instructions for a page_t
4804 */
4805#if defined(__lint)
4806
4807/*ARGSUSED*/
4808void
4809prefetch_page_r(void *pp)
4810{}
4811
4812#else	/* __lint */
4813
4814	ENTRY(prefetch_page_r)
4815	ret
4816	SET_SIZE(prefetch_page_r)
4817
4818#endif	/* __lint */
4819
4820#if defined(__lint)
4821
4822/*ARGSUSED*/
4823int
4824bcmp(const void *s1, const void *s2, size_t count)
4825{ return (0); }
4826
4827#else   /* __lint */
4828
4829#if defined(__amd64)
4830
4831	ENTRY(bcmp)
4832	pushq	%rbp
4833	movq	%rsp, %rbp
4834#ifdef DEBUG
4835	movq	kernelbase(%rip), %r11
4836	cmpq	%r11, %rdi
4837	jb	0f
4838	cmpq	%r11, %rsi
4839	jnb	1f
48400:	leaq	.bcmp_panic_msg(%rip), %rdi
4841	xorl	%eax, %eax
4842	call	panic
48431:
4844#endif	/* DEBUG */
4845	call	memcmp
4846	testl	%eax, %eax
4847	setne	%dl
4848	leave
4849	movzbl	%dl, %eax
4850	ret
4851	SET_SIZE(bcmp)
4852
4853#elif defined(__i386)
4854
4855#define	ARG_S1		8
4856#define	ARG_S2		12
4857#define	ARG_LENGTH	16
4858
4859	ENTRY(bcmp)
4860#ifdef DEBUG
4861	pushl   %ebp
4862	movl    %esp, %ebp
4863	movl    kernelbase, %eax
4864	cmpl    %eax, ARG_S1(%ebp)
4865	jb	0f
4866	cmpl    %eax, ARG_S2(%ebp)
4867	jnb	1f
48680:	pushl   $.bcmp_panic_msg
4869	call    panic
48701:	popl    %ebp
4871#endif	/* DEBUG */
4872
4873	pushl	%edi		/ save register variable
4874	movl	ARG_S1(%esp), %eax	/ %eax = address of string 1
4875	movl	ARG_S2(%esp), %ecx	/ %ecx = address of string 2
4876	cmpl	%eax, %ecx	/ if the same string
4877	je	.equal		/ goto .equal
4878	movl	ARG_LENGTH(%esp), %edi	/ %edi = length in bytes
4879	cmpl	$4, %edi	/ if %edi < 4
4880	jb	.byte_check	/ goto .byte_check
4881	.align	4
4882.word_loop:
4883	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
4884	leal	-4(%edi), %edi	/ %edi -= 4
4885	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
4886	jne	.word_not_equal	/ if not equal, goto .word_not_equal
4887	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
4888	leal	4(%eax), %eax	/ %eax += 4 (next word)
4889	cmpl	$4, %edi	/ if %edi >= 4
4890	jae	.word_loop	/ goto .word_loop
4891.byte_check:
4892	cmpl	$0, %edi	/ if %edi == 0
4893	je	.equal		/ goto .equal
4894	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
4895.word_not_equal:
4896	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
4897	.align	4
4898.byte_loop:
4899	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
4900	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
4901	jne	.not_equal	/ if not equal, goto .not_equal
4902	incl	%ecx		/ %ecx++ (next byte)
4903	incl	%eax		/ %eax++ (next byte)
4904	decl	%edi		/ %edi--
4905	jnz	.byte_loop	/ if not zero, goto .byte_loop
4906.equal:
4907	xorl	%eax, %eax	/ %eax = 0
4908	popl	%edi		/ restore register variable
4909	ret			/ return (NULL)
4910	.align	4
4911.not_equal:
4912	movl	$1, %eax	/ return 1
4913	popl	%edi		/ restore register variable
4914	ret			/ return (NULL)
4915	SET_SIZE(bcmp)
4916
4917#endif	/* __i386 */
4918
4919#ifdef DEBUG
4920	.text
4921.bcmp_panic_msg:
4922	.string "bcmp: arguments below kernelbase"
4923#endif	/* DEBUG */
4924
4925#endif	/* __lint */
4926