xref: /titanic_51/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 1c9de0c9325f9f5d3540e19a4ad3691e6d50c0f8)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33#pragma ident	"%Z%%M%	%I%	%E% SMI"
34
35/*
36 * General assembly language routines.
37 * It is the intent of this file to contain routines that are
38 * independent of the specific kernel architecture, and those that are
39 * common across kernel architectures.
40 * As architectures diverge, and implementations of specific
41 * architecture-dependent routines change, the routines should be moved
42 * from this file into the respective ../`arch -k`/subr.s file.
43 */
44
45#include <sys/asm_linkage.h>
46#include <sys/asm_misc.h>
47#include <sys/panic.h>
48#include <sys/ontrap.h>
49#include <sys/regset.h>
50#include <sys/privregs.h>
51#include <sys/reboot.h>
52#include <sys/psw.h>
53#include <sys/x86_archext.h>
54
55#if defined(__lint)
56#include <sys/types.h>
57#include <sys/systm.h>
58#include <sys/thread.h>
59#include <sys/archsystm.h>
60#include <sys/byteorder.h>
61#include <sys/dtrace.h>
62#include <sys/ftrace.h>
63#else	/* __lint */
64#include "assym.h"
65#endif	/* __lint */
66#include <sys/dditypes.h>
67
68/*
69 * on_fault()
70 * Catch lofault faults. Like setjmp except it returns one
71 * if code following causes uncorrectable fault. Turned off
72 * by calling no_fault().
73 */
74
75#if defined(__lint)
76
77/* ARGSUSED */
78int
79on_fault(label_t *ljb)
80{ return (0); }
81
82void
83no_fault(void)
84{}
85
86#else	/* __lint */
87
88#if defined(__amd64)
89
90	ENTRY(on_fault)
91	movq	%gs:CPU_THREAD, %rsi
92	leaq	catch_fault(%rip), %rdx
93	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
94	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
95	jmp	setjmp				/* let setjmp do the rest */
96
97catch_fault:
98	movq	%gs:CPU_THREAD, %rsi
99	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
100	xorl	%eax, %eax
101	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
102	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
103	jmp	longjmp				/* let longjmp do the rest */
104	SET_SIZE(on_fault)
105
106	ENTRY(no_fault)
107	movq	%gs:CPU_THREAD, %rsi
108	xorl	%eax, %eax
109	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
110	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
111	ret
112	SET_SIZE(no_fault)
113
114#elif defined(__i386)
115
116	ENTRY(on_fault)
117	movl	%gs:CPU_THREAD, %edx
118	movl	4(%esp), %eax			/* jumpbuf address */
119	leal	catch_fault, %ecx
120	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
121	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
122	jmp	setjmp				/* let setjmp do the rest */
123
124catch_fault:
125	movl	%gs:CPU_THREAD, %edx
126	xorl	%eax, %eax
127	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
128	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
129	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
130	pushl	%ecx
131	call	longjmp				/* let longjmp do the rest */
132	SET_SIZE(on_fault)
133
134	ENTRY(no_fault)
135	movl	%gs:CPU_THREAD, %edx
136	xorl	%eax, %eax
137	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
138	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
139	ret
140	SET_SIZE(no_fault)
141
142#endif	/* __i386 */
143#endif	/* __lint */
144
145/*
146 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
147 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
148 */
149
150#if defined(lint)
151
152void
153on_trap_trampoline(void)
154{}
155
156#else	/* __lint */
157
158#if defined(__amd64)
159
160	ENTRY(on_trap_trampoline)
161	movq	%gs:CPU_THREAD, %rsi
162	movq	T_ONTRAP(%rsi), %rdi
163	addq	$OT_JMPBUF, %rdi
164	jmp	longjmp
165	SET_SIZE(on_trap_trampoline)
166
167#elif defined(__i386)
168
169	ENTRY(on_trap_trampoline)
170	movl	%gs:CPU_THREAD, %eax
171	movl	T_ONTRAP(%eax), %eax
172	addl	$OT_JMPBUF, %eax
173	pushl	%eax
174	call	longjmp
175	SET_SIZE(on_trap_trampoline)
176
177#endif	/* __i386 */
178#endif	/* __lint */
179
180/*
181 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
182 * more information about the on_trap() mechanism.  If the on_trap_data is the
183 * same as the topmost stack element, we just modify that element.
184 */
185#if defined(lint)
186
187/*ARGSUSED*/
188int
189on_trap(on_trap_data_t *otp, uint_t prot)
190{ return (0); }
191
192#else	/* __lint */
193
194#if defined(__amd64)
195
196	ENTRY(on_trap)
197	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
198	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
199	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
200	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
201	xorl	%ecx, %ecx
202	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
203	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
204	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
205	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
206	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
207	je	0f				/*	don't modify t_ontrap */
208
209	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
210	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
211
2120:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
213	jmp	setjmp
214	SET_SIZE(on_trap)
215
216#elif defined(__i386)
217
218	ENTRY(on_trap)
219	movl	4(%esp), %eax			/* %eax = otp */
220	movl	8(%esp), %edx			/* %edx = prot */
221
222	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
223	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
224	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
225	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
226	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
227	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
228	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
229	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
230	cmpl	%eax, %ecx			/* if (otp == %ecx) */
231	je	0f				/*    don't modify t_ontrap */
232
233	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
234	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
235
2360:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
237	movl	%eax, 4(%esp)			/* put %eax back on the stack */
238	jmp	setjmp				/* let setjmp do the rest */
239	SET_SIZE(on_trap)
240
241#endif	/* __i386 */
242#endif	/* __lint */
243
244/*
245 * Setjmp and longjmp implement non-local gotos using state vectors
246 * type label_t.
247 */
248
249#if defined(__lint)
250
251/* ARGSUSED */
252int
253setjmp(label_t *lp)
254{ return (0); }
255
256/* ARGSUSED */
257void
258longjmp(label_t *lp)
259{}
260
261#else	/* __lint */
262
263#if LABEL_PC != 0
264#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
265#endif	/* LABEL_PC != 0 */
266
267#if defined(__amd64)
268
269	ENTRY(setjmp)
270	movq	%rsp, LABEL_SP(%rdi)
271	movq	%rbp, LABEL_RBP(%rdi)
272	movq	%rbx, LABEL_RBX(%rdi)
273	movq	%r12, LABEL_R12(%rdi)
274	movq	%r13, LABEL_R13(%rdi)
275	movq	%r14, LABEL_R14(%rdi)
276	movq	%r15, LABEL_R15(%rdi)
277	movq	(%rsp), %rdx		/* return address */
278	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
279	xorl	%eax, %eax		/* return 0 */
280	ret
281	SET_SIZE(setjmp)
282
283	ENTRY(longjmp)
284	movq	LABEL_SP(%rdi), %rsp
285	movq	LABEL_RBP(%rdi), %rbp
286	movq	LABEL_RBX(%rdi), %rbx
287	movq	LABEL_R12(%rdi), %r12
288	movq	LABEL_R13(%rdi), %r13
289	movq	LABEL_R14(%rdi), %r14
290	movq	LABEL_R15(%rdi), %r15
291	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
292	movq	%rdx, (%rsp)
293	xorl	%eax, %eax
294	incl	%eax			/* return 1 */
295	ret
296	SET_SIZE(longjmp)
297
298#elif defined(__i386)
299
300	ENTRY(setjmp)
301	movl	4(%esp), %edx		/* address of save area */
302	movl	%ebp, LABEL_EBP(%edx)
303	movl	%ebx, LABEL_EBX(%edx)
304	movl	%esi, LABEL_ESI(%edx)
305	movl	%edi, LABEL_EDI(%edx)
306	movl	%esp, 4(%edx)
307	movl	(%esp), %ecx		/* %eip (return address) */
308	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
309	subl	%eax, %eax		/* return 0 */
310	ret
311	SET_SIZE(setjmp)
312
313	ENTRY(longjmp)
314	movl	4(%esp), %edx		/* address of save area */
315	movl	LABEL_EBP(%edx), %ebp
316	movl	LABEL_EBX(%edx), %ebx
317	movl	LABEL_ESI(%edx), %esi
318	movl	LABEL_EDI(%edx), %edi
319	movl	4(%edx), %esp
320	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
321	movl	$1, %eax
322	addl	$4, %esp		/* pop ret adr */
323	jmp	*%ecx			/* indirect */
324	SET_SIZE(longjmp)
325
326#endif	/* __i386 */
327#endif	/* __lint */
328
329/*
330 * if a() calls b() calls caller(),
331 * caller() returns return address in a().
332 * (Note: We assume a() and b() are C routines which do the normal entry/exit
333 *  sequence.)
334 */
335
336#if defined(__lint)
337
338caddr_t
339caller(void)
340{ return (0); }
341
342#else	/* __lint */
343
344#if defined(__amd64)
345
346	ENTRY(caller)
347	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
348	ret
349	SET_SIZE(caller)
350
351#elif defined(__i386)
352
353	ENTRY(caller)
354	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
355	ret
356	SET_SIZE(caller)
357
358#endif	/* __i386 */
359#endif	/* __lint */
360
361/*
362 * if a() calls callee(), callee() returns the
363 * return address in a();
364 */
365
366#if defined(__lint)
367
368caddr_t
369callee(void)
370{ return (0); }
371
372#else	/* __lint */
373
374#if defined(__amd64)
375
376	ENTRY(callee)
377	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
378	ret
379	SET_SIZE(callee)
380
381#elif defined(__i386)
382
383	ENTRY(callee)
384	movl	(%esp), %eax		/* callee()'s return pc, in a() */
385	ret
386	SET_SIZE(callee)
387
388#endif	/* __i386 */
389#endif	/* __lint */
390
391/*
392 * return the current frame pointer
393 */
394
395#if defined(__lint)
396
397greg_t
398getfp(void)
399{ return (0); }
400
401#else	/* __lint */
402
403#if defined(__amd64)
404
405	ENTRY(getfp)
406	movq	%rbp, %rax
407	ret
408	SET_SIZE(getfp)
409
410#elif defined(__i386)
411
412	ENTRY(getfp)
413	movl	%ebp, %eax
414	ret
415	SET_SIZE(getfp)
416
417#endif	/* __i386 */
418#endif	/* __lint */
419
420/*
421 * Invalidate a single page table entry in the TLB
422 */
423
424#if defined(__lint)
425
426/* ARGSUSED */
427void
428mmu_tlbflush_entry(caddr_t m)
429{}
430
431#else	/* __lint */
432
433#if defined(__amd64)
434
435	ENTRY(mmu_tlbflush_entry)
436	invlpg	(%rdi)
437	ret
438	SET_SIZE(mmu_tlbflush_entry)
439
440#elif defined(__i386)
441
442	ENTRY(mmu_tlbflush_entry)
443	movl	4(%esp), %eax
444	invlpg	(%eax)
445	ret
446	SET_SIZE(mmu_tlbflush_entry)
447
448#endif	/* __i386 */
449#endif	/* __lint */
450
451
452/*
453 * Get/Set the value of various control registers
454 */
455
456#if defined(__lint)
457
458ulong_t
459getcr0(void)
460{ return (0); }
461
462/* ARGSUSED */
463void
464setcr0(ulong_t value)
465{}
466
467ulong_t
468getcr2(void)
469{ return (0); }
470
471ulong_t
472getcr3(void)
473{ return (0); }
474
475#if !defined(__xpv)
476/* ARGSUSED */
477void
478setcr3(ulong_t val)
479{}
480
481void
482reload_cr3(void)
483{}
484#endif
485
486ulong_t
487getcr4(void)
488{ return (0); }
489
490/* ARGSUSED */
491void
492setcr4(ulong_t val)
493{}
494
495#if defined(__amd64)
496
497ulong_t
498getcr8(void)
499{ return (0); }
500
501/* ARGSUSED */
502void
503setcr8(ulong_t val)
504{}
505
506#endif	/* __amd64 */
507
508#else	/* __lint */
509
510#if defined(__amd64)
511
512	ENTRY(getcr0)
513	movq	%cr0, %rax
514	ret
515	SET_SIZE(getcr0)
516
517	ENTRY(setcr0)
518	movq	%rdi, %cr0
519	ret
520	SET_SIZE(setcr0)
521
522        ENTRY(getcr2)
523#if defined(__xpv)
524	movq	%gs:CPU_VCPU_INFO, %rax
525	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
526#else
527        movq    %cr2, %rax
528#endif
529        ret
530	SET_SIZE(getcr2)
531
532	ENTRY(getcr3)
533	movq    %cr3, %rax
534	ret
535	SET_SIZE(getcr3)
536
537#if !defined(__xpv)
538
539        ENTRY(setcr3)
540        movq    %rdi, %cr3
541        ret
542	SET_SIZE(setcr3)
543
544	ENTRY(reload_cr3)
545	movq	%cr3, %rdi
546	movq	%rdi, %cr3
547	ret
548	SET_SIZE(reload_cr3)
549
550#endif	/* __xpv */
551
552	ENTRY(getcr4)
553	movq	%cr4, %rax
554	ret
555	SET_SIZE(getcr4)
556
557	ENTRY(setcr4)
558	movq	%rdi, %cr4
559	ret
560	SET_SIZE(setcr4)
561
562	ENTRY(getcr8)
563	movq	%cr8, %rax
564	ret
565	SET_SIZE(getcr8)
566
567	ENTRY(setcr8)
568	movq	%rdi, %cr8
569	ret
570	SET_SIZE(setcr8)
571
572#elif defined(__i386)
573
574        ENTRY(getcr0)
575        movl    %cr0, %eax
576        ret
577	SET_SIZE(getcr0)
578
579        ENTRY(setcr0)
580        movl    4(%esp), %eax
581        movl    %eax, %cr0
582        ret
583	SET_SIZE(setcr0)
584
585        ENTRY(getcr2)
586#if defined(__xpv)
587	movl	%gs:CPU_VCPU_INFO, %eax
588	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
589#else
590        movl    %cr2, %eax
591#endif
592        ret
593	SET_SIZE(getcr2)
594
595	ENTRY(getcr3)
596	movl    %cr3, %eax
597	ret
598	SET_SIZE(getcr3)
599
600#if !defined(__xpv)
601
602        ENTRY(setcr3)
603        movl    4(%esp), %eax
604        movl    %eax, %cr3
605        ret
606	SET_SIZE(setcr3)
607
608	ENTRY(reload_cr3)
609	movl    %cr3, %eax
610	movl    %eax, %cr3
611	ret
612	SET_SIZE(reload_cr3)
613
614#endif	/* __xpv */
615
616	ENTRY(getcr4)
617	movl    %cr4, %eax
618	ret
619	SET_SIZE(getcr4)
620
621        ENTRY(setcr4)
622        movl    4(%esp), %eax
623        movl    %eax, %cr4
624        ret
625	SET_SIZE(setcr4)
626
627#endif	/* __i386 */
628#endif	/* __lint */
629
630#if defined(__lint)
631
632/*ARGSUSED*/
633uint32_t
634__cpuid_insn(struct cpuid_regs *regs)
635{ return (0); }
636
637#else	/* __lint */
638
639#if defined(__amd64)
640
641	ENTRY(__cpuid_insn)
642	movq	%rbx, %r8
643	movq	%rcx, %r9
644	movq	%rdx, %r11
645	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
646	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
647	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
648	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
649	cpuid
650	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
651	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
652	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
653	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
654	movq	%r8, %rbx
655	movq	%r9, %rcx
656	movq	%r11, %rdx
657	ret
658	SET_SIZE(__cpuid_insn)
659
660#elif defined(__i386)
661
662        ENTRY(__cpuid_insn)
663	pushl	%ebp
664	movl	0x8(%esp), %ebp		/* %ebp = regs */
665	pushl	%ebx
666	pushl	%ecx
667	pushl	%edx
668	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
669	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
670	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
671	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
672	cpuid
673	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
674	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
675	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
676	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
677	popl	%edx
678	popl	%ecx
679	popl	%ebx
680	popl	%ebp
681	ret
682	SET_SIZE(__cpuid_insn)
683
684#endif	/* __i386 */
685#endif	/* __lint */
686
687#if defined(__xpv)
688	/*
689	 * Defined in C
690	 */
691#else
692
693#if defined(__lint)
694
695/*ARGSUSED*/
696void
697i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
698{ return; }
699
700#else   /* __lint */
701
702#if defined(__amd64)
703
704	ENTRY_NP(i86_monitor)
705	pushq	%rbp
706	movq	%rsp, %rbp
707	movq	%rdi, %rax		/* addr */
708	movq	%rsi, %rcx		/* extensions */
709	/* rdx contains input arg3: hints */
710	.byte	0x0f, 0x01, 0xc8	/* monitor */
711	leave
712	ret
713	SET_SIZE(i86_monitor)
714
715#elif defined(__i386)
716
717ENTRY_NP(i86_monitor)
718	pushl	%ebp
719	movl	%esp, %ebp
720	movl	0x8(%ebp),%eax		/* addr */
721	movl	0xc(%ebp),%ecx		/* extensions */
722	movl	0x10(%ebp),%edx		/* hints */
723	.byte	0x0f, 0x01, 0xc8	/* monitor */
724	leave
725	ret
726	SET_SIZE(i86_monitor)
727
728#endif	/* __i386 */
729#endif	/* __lint */
730
731#if defined(__lint)
732
733/*ARGSUSED*/
734void
735i86_mwait(uint32_t data, uint32_t extensions)
736{ return; }
737
738#else	/* __lint */
739
740#if defined(__amd64)
741
742	ENTRY_NP(i86_mwait)
743	pushq	%rbp
744	movq	%rsp, %rbp
745	movq	%rdi, %rax		/* data */
746	movq	%rsi, %rcx		/* extensions */
747	.byte	0x0f, 0x01, 0xc9	/* mwait */
748	leave
749	ret
750	SET_SIZE(i86_mwait)
751
752#elif defined(__i386)
753
754	ENTRY_NP(i86_mwait)
755	pushl	%ebp
756	movl	%esp, %ebp
757	movl	0x8(%ebp),%eax		/* data */
758	movl	0xc(%ebp),%ecx		/* extensions */
759	.byte	0x0f, 0x01, 0xc9	/* mwait */
760	leave
761	ret
762	SET_SIZE(i86_mwait)
763
764#endif	/* __i386 */
765#endif	/* __lint */
766
767#if defined(__lint)
768
769hrtime_t
770tsc_read(void)
771{
772	return (0);
773}
774
775#else	/* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(tsc_read)
780	movq	%rbx, %r11
781	movl	$0, %eax
782	cpuid
783	rdtsc
784	movq	%r11, %rbx
785	shlq	$32, %rdx
786	orq	%rdx, %rax
787	ret
788	.globl _tsc_mfence_start
789_tsc_mfence_start:
790	mfence
791	rdtsc
792	shlq	$32, %rdx
793	orq	%rdx, %rax
794	ret
795	.globl _tsc_mfence_end
796_tsc_mfence_end:
797	.globl _tscp_start
798_tscp_start:
799	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
800	shlq	$32, %rdx
801	orq	%rdx, %rax
802	ret
803	.globl _tscp_end
804_tscp_end:
805	.globl _no_rdtsc_start
806_no_rdtsc_start:
807	xorl	%edx, %edx
808	xorl	%eax, %eax
809	ret
810	.globl _no_rdtsc_end
811_no_rdtsc_end:
812	.globl _tsc_lfence_start
813_tsc_lfence_start:
814	lfence
815	rdtsc
816	shlq	$32, %rdx
817	orq	%rdx, %rax
818	ret
819	.globl _tsc_lfence_end
820_tsc_lfence_end:
821	SET_SIZE(tsc_read)
822
823#else /* __i386 */
824
825	ENTRY_NP(tsc_read)
826	pushl	%ebx
827	movl	$0, %eax
828	cpuid
829	rdtsc
830	popl	%ebx
831	ret
832	.globl _tsc_mfence_start
833_tsc_mfence_start:
834	mfence
835	rdtsc
836	ret
837	.globl _tsc_mfence_end
838_tsc_mfence_end:
839	.globl	_tscp_start
840_tscp_start:
841	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
842	ret
843	.globl _tscp_end
844_tscp_end:
845	.globl _no_rdtsc_start
846_no_rdtsc_start:
847	xorl	%edx, %edx
848	xorl	%eax, %eax
849	ret
850	.globl _no_rdtsc_end
851_no_rdtsc_end:
852	.globl _tsc_lfence_start
853_tsc_lfence_start:
854	lfence
855	rdtsc
856	ret
857	.globl _tsc_lfence_end
858_tsc_lfence_end:
859	SET_SIZE(tsc_read)
860
861#endif	/* __i386 */
862
863#endif	/* __lint */
864
865
866#endif	/* __xpv */
867
868#ifdef __lint
869/*
870 * Do not use this function for obtaining clock tick.  This
871 * is called by callers who do not need to have a guarenteed
872 * correct tick value.  The proper routine to use is tsc_read().
873 */
874hrtime_t
875randtick(void)
876{
877	return (0);
878}
879#else
880#if defined(__amd64)
881	ENTRY_NP(randtick)
882	rdtsc
883	shlq    $32, %rdx
884	orq     %rdx, %rax
885	ret
886	SET_SIZE(randtick)
887#else
888	ENTRY_NP(randtick)
889	rdtsc
890	ret
891	SET_SIZE(randtick)
892#endif /* __i386 */
893#endif /* __lint */
894/*
895 * Insert entryp after predp in a doubly linked list.
896 */
897
898#if defined(__lint)
899
900/*ARGSUSED*/
901void
902_insque(caddr_t entryp, caddr_t predp)
903{}
904
905#else	/* __lint */
906
907#if defined(__amd64)
908
909	ENTRY(_insque)
910	movq	(%rsi), %rax		/* predp->forw 			*/
911	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
912	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
913	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
914	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
915	ret
916	SET_SIZE(_insque)
917
918#elif defined(__i386)
919
920	ENTRY(_insque)
921	movl	8(%esp), %edx
922	movl	4(%esp), %ecx
923	movl	(%edx), %eax		/* predp->forw			*/
924	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
925	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
926	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
927	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
928	ret
929	SET_SIZE(_insque)
930
931#endif	/* __i386 */
932#endif	/* __lint */
933
934/*
935 * Remove entryp from a doubly linked list
936 */
937
938#if defined(__lint)
939
940/*ARGSUSED*/
941void
942_remque(caddr_t entryp)
943{}
944
945#else	/* __lint */
946
947#if defined(__amd64)
948
949	ENTRY(_remque)
950	movq	(%rdi), %rax		/* entry->forw */
951	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
952	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
953	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
954	ret
955	SET_SIZE(_remque)
956
957#elif defined(__i386)
958
959	ENTRY(_remque)
960	movl	4(%esp), %ecx
961	movl	(%ecx), %eax		/* entry->forw */
962	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
963	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
964	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
965	ret
966	SET_SIZE(_remque)
967
968#endif	/* __i386 */
969#endif	/* __lint */
970
971/*
972 * Returns the number of
973 * non-NULL bytes in string argument.
974 */
975
976#if defined(__lint)
977
978/* ARGSUSED */
979size_t
980strlen(const char *str)
981{ return (0); }
982
983#else	/* __lint */
984
985#if defined(__amd64)
986
987/*
988 * This is close to a simple transliteration of a C version of this
989 * routine.  We should either just -make- this be a C version, or
990 * justify having it in assembler by making it significantly faster.
991 *
992 * size_t
993 * strlen(const char *s)
994 * {
995 *	const char *s0;
996 * #if defined(DEBUG)
997 *	if ((uintptr_t)s < KERNELBASE)
998 *		panic(.str_panic_msg);
999 * #endif
1000 *	for (s0 = s; *s; s++)
1001 *		;
1002 *	return (s - s0);
1003 * }
1004 */
1005
1006	ENTRY(strlen)
1007#ifdef DEBUG
1008	movq	postbootkernelbase(%rip), %rax
1009	cmpq	%rax, %rdi
1010	jae	str_valid
1011	pushq	%rbp
1012	movq	%rsp, %rbp
1013	leaq	.str_panic_msg(%rip), %rdi
1014	xorl	%eax, %eax
1015	call	panic
1016#endif	/* DEBUG */
1017str_valid:
1018	cmpb	$0, (%rdi)
1019	movq	%rdi, %rax
1020	je	.null_found
1021	.align	4
1022.strlen_loop:
1023	incq	%rdi
1024	cmpb	$0, (%rdi)
1025	jne	.strlen_loop
1026.null_found:
1027	subq	%rax, %rdi
1028	movq	%rdi, %rax
1029	ret
1030	SET_SIZE(strlen)
1031
1032#elif defined(__i386)
1033
1034	ENTRY(strlen)
1035#ifdef DEBUG
1036	movl	postbootkernelbase, %eax
1037	cmpl	%eax, 4(%esp)
1038	jae	str_valid
1039	pushl	%ebp
1040	movl	%esp, %ebp
1041	pushl	$.str_panic_msg
1042	call	panic
1043#endif /* DEBUG */
1044
1045str_valid:
1046	movl	4(%esp), %eax		/* %eax = string address */
1047	testl	$3, %eax		/* if %eax not word aligned */
1048	jnz	.not_word_aligned	/* goto .not_word_aligned */
1049	.align	4
1050.word_aligned:
1051	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1052	movl	$0x7f7f7f7f, %ecx
1053	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1054	addl	$4, %eax		/* next word */
1055	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1056	orl	%edx, %ecx		/* %ecx |= %edx */
1057	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1058	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1059	je	.word_aligned		/* goto .word_aligned */
1060	subl	$4, %eax		/* post-incremented */
1061.not_word_aligned:
1062	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1063	je	.null_found		/* goto .null_found */
1064	incl	%eax			/* next byte */
1065	testl	$3, %eax		/* if %eax not word aligned */
1066	jnz	.not_word_aligned	/* goto .not_word_aligned */
1067	jmp	.word_aligned		/* goto .word_aligned */
1068	.align	4
1069.null_found:
1070	subl	4(%esp), %eax		/* %eax -= string address */
1071	ret
1072	SET_SIZE(strlen)
1073
1074#endif	/* __i386 */
1075
1076#ifdef DEBUG
1077	.text
1078.str_panic_msg:
1079	.string "strlen: argument below kernelbase"
1080#endif /* DEBUG */
1081
1082#endif	/* __lint */
1083
1084	/*
1085	 * Berkley 4.3 introduced symbolically named interrupt levels
1086	 * as a way deal with priority in a machine independent fashion.
1087	 * Numbered priorities are machine specific, and should be
1088	 * discouraged where possible.
1089	 *
1090	 * Note, for the machine specific priorities there are
1091	 * examples listed for devices that use a particular priority.
1092	 * It should not be construed that all devices of that
1093	 * type should be at that priority.  It is currently were
1094	 * the current devices fit into the priority scheme based
1095	 * upon time criticalness.
1096	 *
1097	 * The underlying assumption of these assignments is that
1098	 * IPL 10 is the highest level from which a device
1099	 * routine can call wakeup.  Devices that interrupt from higher
1100	 * levels are restricted in what they can do.  If they need
1101	 * kernels services they should schedule a routine at a lower
1102	 * level (via software interrupt) to do the required
1103	 * processing.
1104	 *
1105	 * Examples of this higher usage:
1106	 *	Level	Usage
1107	 *	14	Profiling clock (and PROM uart polling clock)
1108	 *	12	Serial ports
1109	 *
1110	 * The serial ports request lower level processing on level 6.
1111	 *
1112	 * Also, almost all splN routines (where N is a number or a
1113	 * mnemonic) will do a RAISE(), on the assumption that they are
1114	 * never used to lower our priority.
1115	 * The exceptions are:
1116	 *	spl8()		Because you can't be above 15 to begin with!
1117	 *	splzs()		Because this is used at boot time to lower our
1118	 *			priority, to allow the PROM to poll the uart.
1119	 *	spl0()		Used to lower priority to 0.
1120	 */
1121
1122#if defined(__lint)
1123
1124int spl0(void)		{ return (0); }
1125int spl6(void)		{ return (0); }
1126int spl7(void)		{ return (0); }
1127int spl8(void)		{ return (0); }
1128int splhigh(void)	{ return (0); }
1129int splhi(void)		{ return (0); }
1130int splzs(void)		{ return (0); }
1131
1132/* ARGSUSED */
1133void
1134splx(int level)
1135{}
1136
1137#else	/* __lint */
1138
1139#if defined(__amd64)
1140
1141#define	SETPRI(level) \
1142	movl	$/**/level, %edi;	/* new priority */		\
1143	jmp	do_splx			/* redirect to do_splx */
1144
1145#define	RAISE(level) \
1146	movl	$/**/level, %edi;	/* new priority */		\
1147	jmp	splr			/* redirect to splr */
1148
1149#elif defined(__i386)
1150
1151#define	SETPRI(level) \
1152	pushl	$/**/level;	/* new priority */			\
1153	call	do_splx;	/* invoke common splx code */		\
1154	addl	$4, %esp;	/* unstack arg */			\
1155	ret
1156
1157#define	RAISE(level) \
1158	pushl	$/**/level;	/* new priority */			\
1159	call	splr;		/* invoke common splr code */		\
1160	addl	$4, %esp;	/* unstack args */			\
1161	ret
1162
1163#endif	/* __i386 */
1164
1165	/* locks out all interrupts, including memory errors */
1166	ENTRY(spl8)
1167	SETPRI(15)
1168	SET_SIZE(spl8)
1169
1170	/* just below the level that profiling runs */
1171	ENTRY(spl7)
1172	RAISE(13)
1173	SET_SIZE(spl7)
1174
1175	/* sun specific - highest priority onboard serial i/o asy ports */
1176	ENTRY(splzs)
1177	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1178	SET_SIZE(splzs)
1179
1180	ENTRY(splhi)
1181	ALTENTRY(splhigh)
1182	ALTENTRY(spl6)
1183	ALTENTRY(i_ddi_splhigh)
1184
1185	RAISE(DISP_LEVEL)
1186
1187	SET_SIZE(i_ddi_splhigh)
1188	SET_SIZE(spl6)
1189	SET_SIZE(splhigh)
1190	SET_SIZE(splhi)
1191
1192	/* allow all interrupts */
1193	ENTRY(spl0)
1194	SETPRI(0)
1195	SET_SIZE(spl0)
1196
1197
1198	/* splx implentation */
1199	ENTRY(splx)
1200	jmp	do_splx		/* redirect to common splx code */
1201	SET_SIZE(splx)
1202
1203#endif	/* __lint */
1204
1205#if defined(__i386)
1206
1207/*
1208 * Read and write the %gs register
1209 */
1210
1211#if defined(__lint)
1212
1213/*ARGSUSED*/
1214uint16_t
1215getgs(void)
1216{ return (0); }
1217
1218/*ARGSUSED*/
1219void
1220setgs(uint16_t sel)
1221{}
1222
1223#else	/* __lint */
1224
1225	ENTRY(getgs)
1226	clr	%eax
1227	movw	%gs, %ax
1228	ret
1229	SET_SIZE(getgs)
1230
1231	ENTRY(setgs)
1232	movw	4(%esp), %gs
1233	ret
1234	SET_SIZE(setgs)
1235
1236#endif	/* __lint */
1237#endif	/* __i386 */
1238
1239#if defined(__lint)
1240
1241void
1242pc_reset(void)
1243{}
1244
1245void
1246efi_reset(void)
1247{}
1248
1249#else	/* __lint */
1250
1251	ENTRY(wait_500ms)
1252	push	%ebx
1253	movl	$50000, %ebx
12541:
1255	call	tenmicrosec
1256	decl	%ebx
1257	jnz	1b
1258	pop	%ebx
1259	ret
1260	SET_SIZE(wait_500ms)
1261
1262#define	RESET_METHOD_KBC	1
1263#define	RESET_METHOD_PORT92	2
1264#define RESET_METHOD_PCI	4
1265
1266	DGDEF3(pc_reset_methods, 4, 8)
1267	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1268
1269	ENTRY(pc_reset)
1270
1271#if defined(__i386)
1272	testl	$RESET_METHOD_KBC, pc_reset_methods
1273#elif defined(__amd64)
1274	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1275#endif
1276	jz	1f
1277
1278	/
1279	/ Try the classic keyboard controller-triggered reset.
1280	/
1281	movw	$0x64, %dx
1282	movb	$0xfe, %al
1283	outb	(%dx)
1284
1285	/ Wait up to 500 milliseconds here for the keyboard controller
1286	/ to pull the reset line.  On some systems where the keyboard
1287	/ controller is slow to pull the reset line, the next reset method
1288	/ may be executed (which may be bad if those systems hang when the
1289	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1290	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1291
1292	call	wait_500ms
1293
12941:
1295#if defined(__i386)
1296	testl	$RESET_METHOD_PORT92, pc_reset_methods
1297#elif defined(__amd64)
1298	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1299#endif
1300	jz	3f
1301
1302	/
1303	/ Try port 0x92 fast reset
1304	/
1305	movw	$0x92, %dx
1306	inb	(%dx)
1307	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1308	je	1f
1309	testb	$1, %al		/ If bit 0
1310	jz	2f		/ is clear, jump to perform the reset
1311	andb	$0xfe, %al	/ otherwise,
1312	outb	(%dx)		/ clear bit 0 first, then
13132:
1314	orb	$1, %al		/ Set bit 0
1315	outb	(%dx)		/ and reset the system
13161:
1317
1318	call	wait_500ms
1319
13203:
1321#if defined(__i386)
1322	testl	$RESET_METHOD_PCI, pc_reset_methods
1323#elif defined(__amd64)
1324	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1325#endif
1326	jz	4f
1327
1328	/ Try the PCI (soft) reset vector (should work on all modern systems,
1329	/ but has been shown to cause problems on 450NX systems, and some newer
1330	/ systems (e.g. ATI IXP400-equipped systems))
1331	/ When resetting via this method, 2 writes are required.  The first
1332	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1333	/ power cycle).
1334	/ The reset occurs on the second write, during bit 2's transition from
1335	/ 0->1.
1336	movw	$0xcf9, %dx
1337	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1338	outb	(%dx)
1339	movb	$0x6, %al
1340	outb	(%dx)
1341
1342	call	wait_500ms
1343
13444:
1345	/
1346	/ port 0xcf9 failed also.  Last-ditch effort is to
1347	/ triple-fault the CPU.
1348	/ Also, use triple fault for EFI firmware
1349	/
1350	ENTRY(efi_reset)
1351#if defined(__amd64)
1352	pushq	$0x0
1353	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1354	lidt	(%rsp)
1355#elif defined(__i386)
1356	pushl	$0x0
1357	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1358	lidt	(%esp)
1359#endif
1360	int	$0x0		/ Trigger interrupt, generate triple-fault
1361
1362	cli
1363	hlt			/ Wait forever
1364	/*NOTREACHED*/
1365	SET_SIZE(efi_reset)
1366	SET_SIZE(pc_reset)
1367
1368#endif	/* __lint */
1369
1370/*
1371 * C callable in and out routines
1372 */
1373
1374#if defined(__lint)
1375
1376/* ARGSUSED */
1377void
1378outl(int port_address, uint32_t val)
1379{}
1380
1381#else	/* __lint */
1382
1383#if defined(__amd64)
1384
1385	ENTRY(outl)
1386	movw	%di, %dx
1387	movl	%esi, %eax
1388	outl	(%dx)
1389	ret
1390	SET_SIZE(outl)
1391
1392#elif defined(__i386)
1393
1394	.set	PORT, 4
1395	.set	VAL, 8
1396
1397	ENTRY(outl)
1398	movw	PORT(%esp), %dx
1399	movl	VAL(%esp), %eax
1400	outl	(%dx)
1401	ret
1402	SET_SIZE(outl)
1403
1404#endif	/* __i386 */
1405#endif	/* __lint */
1406
1407#if defined(__lint)
1408
1409/* ARGSUSED */
1410void
1411outw(int port_address, uint16_t val)
1412{}
1413
1414#else	/* __lint */
1415
1416#if defined(__amd64)
1417
1418	ENTRY(outw)
1419	movw	%di, %dx
1420	movw	%si, %ax
1421	D16 outl (%dx)		/* XX64 why not outw? */
1422	ret
1423	SET_SIZE(outw)
1424
1425#elif defined(__i386)
1426
1427	ENTRY(outw)
1428	movw	PORT(%esp), %dx
1429	movw	VAL(%esp), %ax
1430	D16 outl (%dx)
1431	ret
1432	SET_SIZE(outw)
1433
1434#endif	/* __i386 */
1435#endif	/* __lint */
1436
1437#if defined(__lint)
1438
1439/* ARGSUSED */
1440void
1441outb(int port_address, uint8_t val)
1442{}
1443
1444#else	/* __lint */
1445
1446#if defined(__amd64)
1447
1448	ENTRY(outb)
1449	movw	%di, %dx
1450	movb	%sil, %al
1451	outb	(%dx)
1452	ret
1453	SET_SIZE(outb)
1454
1455#elif defined(__i386)
1456
1457	ENTRY(outb)
1458	movw	PORT(%esp), %dx
1459	movb	VAL(%esp), %al
1460	outb	(%dx)
1461	ret
1462	SET_SIZE(outb)
1463
1464#endif	/* __i386 */
1465#endif	/* __lint */
1466
1467#if defined(__lint)
1468
1469/* ARGSUSED */
1470uint32_t
1471inl(int port_address)
1472{ return (0); }
1473
1474#else	/* __lint */
1475
1476#if defined(__amd64)
1477
1478	ENTRY(inl)
1479	xorl	%eax, %eax
1480	movw	%di, %dx
1481	inl	(%dx)
1482	ret
1483	SET_SIZE(inl)
1484
1485#elif defined(__i386)
1486
1487	ENTRY(inl)
1488	movw	PORT(%esp), %dx
1489	inl	(%dx)
1490	ret
1491	SET_SIZE(inl)
1492
1493#endif	/* __i386 */
1494#endif	/* __lint */
1495
1496#if defined(__lint)
1497
1498/* ARGSUSED */
1499uint16_t
1500inw(int port_address)
1501{ return (0); }
1502
1503#else	/* __lint */
1504
1505#if defined(__amd64)
1506
1507	ENTRY(inw)
1508	xorl	%eax, %eax
1509	movw	%di, %dx
1510	D16 inl	(%dx)
1511	ret
1512	SET_SIZE(inw)
1513
1514#elif defined(__i386)
1515
1516	ENTRY(inw)
1517	subl	%eax, %eax
1518	movw	PORT(%esp), %dx
1519	D16 inl	(%dx)
1520	ret
1521	SET_SIZE(inw)
1522
1523#endif	/* __i386 */
1524#endif	/* __lint */
1525
1526
1527#if defined(__lint)
1528
1529/* ARGSUSED */
1530uint8_t
1531inb(int port_address)
1532{ return (0); }
1533
1534#else	/* __lint */
1535
1536#if defined(__amd64)
1537
1538	ENTRY(inb)
1539	xorl	%eax, %eax
1540	movw	%di, %dx
1541	inb	(%dx)
1542	ret
1543	SET_SIZE(inb)
1544
1545#elif defined(__i386)
1546
1547	ENTRY(inb)
1548	subl    %eax, %eax
1549	movw	PORT(%esp), %dx
1550	inb	(%dx)
1551	ret
1552	SET_SIZE(inb)
1553
1554#endif	/* __i386 */
1555#endif	/* __lint */
1556
1557
1558#if defined(__lint)
1559
1560/* ARGSUSED */
1561void
1562repoutsw(int port, uint16_t *addr, int cnt)
1563{}
1564
1565#else	/* __lint */
1566
1567#if defined(__amd64)
1568
1569	ENTRY(repoutsw)
1570	movl	%edx, %ecx
1571	movw	%di, %dx
1572	rep
1573	  D16 outsl
1574	ret
1575	SET_SIZE(repoutsw)
1576
1577#elif defined(__i386)
1578
1579	/*
1580	 * The arguments and saved registers are on the stack in the
1581	 *  following order:
1582	 *      |  cnt  |  +16
1583	 *      | *addr |  +12
1584	 *      | port  |  +8
1585	 *      |  eip  |  +4
1586	 *      |  esi  |  <-- %esp
1587	 * If additional values are pushed onto the stack, make sure
1588	 * to adjust the following constants accordingly.
1589	 */
1590	.set	PORT, 8
1591	.set	ADDR, 12
1592	.set	COUNT, 16
1593
1594	ENTRY(repoutsw)
1595	pushl	%esi
1596	movl	PORT(%esp), %edx
1597	movl	ADDR(%esp), %esi
1598	movl	COUNT(%esp), %ecx
1599	rep
1600	  D16 outsl
1601	popl	%esi
1602	ret
1603	SET_SIZE(repoutsw)
1604
1605#endif	/* __i386 */
1606#endif	/* __lint */
1607
1608
1609#if defined(__lint)
1610
1611/* ARGSUSED */
1612void
1613repinsw(int port_addr, uint16_t *addr, int cnt)
1614{}
1615
1616#else	/* __lint */
1617
1618#if defined(__amd64)
1619
1620	ENTRY(repinsw)
1621	movl	%edx, %ecx
1622	movw	%di, %dx
1623	rep
1624	  D16 insl
1625	ret
1626	SET_SIZE(repinsw)
1627
1628#elif defined(__i386)
1629
1630	ENTRY(repinsw)
1631	pushl	%edi
1632	movl	PORT(%esp), %edx
1633	movl	ADDR(%esp), %edi
1634	movl	COUNT(%esp), %ecx
1635	rep
1636	  D16 insl
1637	popl	%edi
1638	ret
1639	SET_SIZE(repinsw)
1640
1641#endif	/* __i386 */
1642#endif	/* __lint */
1643
1644
1645#if defined(__lint)
1646
1647/* ARGSUSED */
1648void
1649repinsb(int port, uint8_t *addr, int count)
1650{}
1651
1652#else	/* __lint */
1653
1654#if defined(__amd64)
1655
1656	ENTRY(repinsb)
1657	movl	%edx, %ecx
1658	movw	%di, %dx
1659	movq	%rsi, %rdi
1660	rep
1661	  insb
1662	ret
1663	SET_SIZE(repinsb)
1664
1665#elif defined(__i386)
1666
1667	/*
1668	 * The arguments and saved registers are on the stack in the
1669	 *  following order:
1670	 *      |  cnt  |  +16
1671	 *      | *addr |  +12
1672	 *      | port  |  +8
1673	 *      |  eip  |  +4
1674	 *      |  esi  |  <-- %esp
1675	 * If additional values are pushed onto the stack, make sure
1676	 * to adjust the following constants accordingly.
1677	 */
1678	.set	IO_PORT, 8
1679	.set	IO_ADDR, 12
1680	.set	IO_COUNT, 16
1681
1682	ENTRY(repinsb)
1683	pushl	%edi
1684	movl	IO_ADDR(%esp), %edi
1685	movl	IO_COUNT(%esp), %ecx
1686	movl	IO_PORT(%esp), %edx
1687	rep
1688	  insb
1689	popl	%edi
1690	ret
1691	SET_SIZE(repinsb)
1692
1693#endif	/* __i386 */
1694#endif	/* __lint */
1695
1696
1697/*
1698 * Input a stream of 32-bit words.
1699 * NOTE: count is a DWORD count.
1700 */
1701#if defined(__lint)
1702
1703/* ARGSUSED */
1704void
1705repinsd(int port, uint32_t *addr, int count)
1706{}
1707
1708#else	/* __lint */
1709
1710#if defined(__amd64)
1711
1712	ENTRY(repinsd)
1713	movl	%edx, %ecx
1714	movw	%di, %dx
1715	movq	%rsi, %rdi
1716	rep
1717	  insl
1718	ret
1719	SET_SIZE(repinsd)
1720
1721#elif defined(__i386)
1722
1723	ENTRY(repinsd)
1724	pushl	%edi
1725	movl	IO_ADDR(%esp), %edi
1726	movl	IO_COUNT(%esp), %ecx
1727	movl	IO_PORT(%esp), %edx
1728	rep
1729	  insl
1730	popl	%edi
1731	ret
1732	SET_SIZE(repinsd)
1733
1734#endif	/* __i386 */
1735#endif	/* __lint */
1736
1737/*
1738 * Output a stream of bytes
1739 * NOTE: count is a byte count
1740 */
1741#if defined(__lint)
1742
1743/* ARGSUSED */
1744void
1745repoutsb(int port, uint8_t *addr, int count)
1746{}
1747
1748#else	/* __lint */
1749
1750#if defined(__amd64)
1751
1752	ENTRY(repoutsb)
1753	movl	%edx, %ecx
1754	movw	%di, %dx
1755	rep
1756	  outsb
1757	ret
1758	SET_SIZE(repoutsb)
1759
1760#elif defined(__i386)
1761
1762	ENTRY(repoutsb)
1763	pushl	%esi
1764	movl	IO_ADDR(%esp), %esi
1765	movl	IO_COUNT(%esp), %ecx
1766	movl	IO_PORT(%esp), %edx
1767	rep
1768	  outsb
1769	popl	%esi
1770	ret
1771	SET_SIZE(repoutsb)
1772
1773#endif	/* __i386 */
1774#endif	/* __lint */
1775
1776/*
1777 * Output a stream of 32-bit words
1778 * NOTE: count is a DWORD count
1779 */
1780#if defined(__lint)
1781
1782/* ARGSUSED */
1783void
1784repoutsd(int port, uint32_t *addr, int count)
1785{}
1786
1787#else	/* __lint */
1788
1789#if defined(__amd64)
1790
1791	ENTRY(repoutsd)
1792	movl	%edx, %ecx
1793	movw	%di, %dx
1794	rep
1795	  outsl
1796	ret
1797	SET_SIZE(repoutsd)
1798
1799#elif defined(__i386)
1800
1801	ENTRY(repoutsd)
1802	pushl	%esi
1803	movl	IO_ADDR(%esp), %esi
1804	movl	IO_COUNT(%esp), %ecx
1805	movl	IO_PORT(%esp), %edx
1806	rep
1807	  outsl
1808	popl	%esi
1809	ret
1810	SET_SIZE(repoutsd)
1811
1812#endif	/* __i386 */
1813#endif	/* __lint */
1814
1815/*
1816 * void int3(void)
1817 * void int18(void)
1818 * void int20(void)
1819 */
1820
1821#if defined(__lint)
1822
1823void
1824int3(void)
1825{}
1826
1827void
1828int18(void)
1829{}
1830
1831void
1832int20(void)
1833{}
1834
1835#else	/* __lint */
1836
1837	ENTRY(int3)
1838	int	$T_BPTFLT
1839	ret
1840	SET_SIZE(int3)
1841
1842	ENTRY(int18)
1843	int	$T_MCE
1844	ret
1845	SET_SIZE(int18)
1846
1847	ENTRY(int20)
1848	movl	boothowto, %eax
1849	andl	$RB_DEBUG, %eax
1850	jz	1f
1851
1852	int	$T_DBGENTR
18531:
1854	rep;	ret	/* use 2 byte return instruction when branch target */
1855			/* AMD Software Optimization Guide - Section 6.2 */
1856	SET_SIZE(int20)
1857
1858#endif	/* __lint */
1859
1860#if defined(__lint)
1861
1862/* ARGSUSED */
1863int
1864scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1865{ return (0); }
1866
1867#else	/* __lint */
1868
1869#if defined(__amd64)
1870
1871	ENTRY(scanc)
1872					/* rdi == size */
1873					/* rsi == cp */
1874					/* rdx == table */
1875					/* rcx == mask */
1876	addq	%rsi, %rdi		/* end = &cp[size] */
1877.scanloop:
1878	cmpq	%rdi, %rsi		/* while (cp < end */
1879	jnb	.scandone
1880	movzbq	(%rsi), %r8		/* %r8 = *cp */
1881	incq	%rsi			/* cp++ */
1882	testb	%cl, (%r8, %rdx)
1883	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1884	decq	%rsi			/* (fix post-increment) */
1885.scandone:
1886	movl	%edi, %eax
1887	subl	%esi, %eax		/* return (end - cp) */
1888	ret
1889	SET_SIZE(scanc)
1890
1891#elif defined(__i386)
1892
1893	ENTRY(scanc)
1894	pushl	%edi
1895	pushl	%esi
1896	movb	24(%esp), %cl		/* mask = %cl */
1897	movl	16(%esp), %esi		/* cp = %esi */
1898	movl	20(%esp), %edx		/* table = %edx */
1899	movl	%esi, %edi
1900	addl	12(%esp), %edi		/* end = &cp[size]; */
1901.scanloop:
1902	cmpl	%edi, %esi		/* while (cp < end */
1903	jnb	.scandone
1904	movzbl	(%esi),  %eax		/* %al = *cp */
1905	incl	%esi			/* cp++ */
1906	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1907	testb	%al, %cl
1908	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1909	dec	%esi			/* post-incremented */
1910.scandone:
1911	movl	%edi, %eax
1912	subl	%esi, %eax		/* return (end - cp) */
1913	popl	%esi
1914	popl	%edi
1915	ret
1916	SET_SIZE(scanc)
1917
1918#endif	/* __i386 */
1919#endif	/* __lint */
1920
1921/*
1922 * Replacement functions for ones that are normally inlined.
1923 * In addition to the copy in i86.il, they are defined here just in case.
1924 */
1925
1926#if defined(__lint)
1927
1928ulong_t
1929intr_clear(void)
1930{ return (0); }
1931
1932ulong_t
1933clear_int_flag(void)
1934{ return (0); }
1935
1936#else	/* __lint */
1937
1938#if defined(__amd64)
1939
1940	ENTRY(intr_clear)
1941	ENTRY(clear_int_flag)
1942	pushfq
1943	popq	%rax
1944#if defined(__xpv)
1945	leaq	xpv_panicking, %rdi
1946	movl	(%rdi), %edi
1947	cmpl	$0, %edi
1948	jne	2f
1949	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1950	/*
1951	 * Synthesize the PS_IE bit from the event mask bit
1952	 */
1953	andq    $_BITNOT(PS_IE), %rax
1954	testb	$1, %dl
1955	jnz	1f
1956	orq	$PS_IE, %rax
19571:
1958	ret
19592:
1960#endif
1961	CLI(%rdi)
1962	ret
1963	SET_SIZE(clear_int_flag)
1964	SET_SIZE(intr_clear)
1965
1966#elif defined(__i386)
1967
1968	ENTRY(intr_clear)
1969	ENTRY(clear_int_flag)
1970	pushfl
1971	popl	%eax
1972#if defined(__xpv)
1973	leal	xpv_panicking, %edx
1974	movl	(%edx), %edx
1975	cmpl	$0, %edx
1976	jne	2f
1977	CLIRET(%edx, %cl)	/* returns event mask in %cl */
1978	/*
1979	 * Synthesize the PS_IE bit from the event mask bit
1980	 */
1981	andl    $_BITNOT(PS_IE), %eax
1982	testb	$1, %cl
1983	jnz	1f
1984	orl	$PS_IE, %eax
19851:
1986	ret
19872:
1988#endif
1989	CLI(%edx)
1990	ret
1991	SET_SIZE(clear_int_flag)
1992	SET_SIZE(intr_clear)
1993
1994#endif	/* __i386 */
1995#endif	/* __lint */
1996
1997#if defined(__lint)
1998
1999struct cpu *
2000curcpup(void)
2001{ return 0; }
2002
2003#else	/* __lint */
2004
2005#if defined(__amd64)
2006
2007	ENTRY(curcpup)
2008	movq	%gs:CPU_SELF, %rax
2009	ret
2010	SET_SIZE(curcpup)
2011
2012#elif defined(__i386)
2013
2014	ENTRY(curcpup)
2015	movl	%gs:CPU_SELF, %eax
2016	ret
2017	SET_SIZE(curcpup)
2018
2019#endif	/* __i386 */
2020#endif	/* __lint */
2021
2022#if defined(__lint)
2023
2024/* ARGSUSED */
2025uint32_t
2026htonl(uint32_t i)
2027{ return (0); }
2028
2029/* ARGSUSED */
2030uint32_t
2031ntohl(uint32_t i)
2032{ return (0); }
2033
2034#else	/* __lint */
2035
2036#if defined(__amd64)
2037
2038	/* XX64 there must be shorter sequences for this */
2039	ENTRY(htonl)
2040	ALTENTRY(ntohl)
2041	movl	%edi, %eax
2042	bswap	%eax
2043	ret
2044	SET_SIZE(ntohl)
2045	SET_SIZE(htonl)
2046
2047#elif defined(__i386)
2048
2049	ENTRY(htonl)
2050	ALTENTRY(ntohl)
2051	movl	4(%esp), %eax
2052	bswap	%eax
2053	ret
2054	SET_SIZE(ntohl)
2055	SET_SIZE(htonl)
2056
2057#endif	/* __i386 */
2058#endif	/* __lint */
2059
2060#if defined(__lint)
2061
2062/* ARGSUSED */
2063uint16_t
2064htons(uint16_t i)
2065{ return (0); }
2066
2067/* ARGSUSED */
2068uint16_t
2069ntohs(uint16_t i)
2070{ return (0); }
2071
2072
2073#else	/* __lint */
2074
2075#if defined(__amd64)
2076
2077	/* XX64 there must be better sequences for this */
2078	ENTRY(htons)
2079	ALTENTRY(ntohs)
2080	movl	%edi, %eax
2081	bswap	%eax
2082	shrl	$16, %eax
2083	ret
2084	SET_SIZE(ntohs)
2085	SET_SIZE(htons)
2086
2087#elif defined(__i386)
2088
2089	ENTRY(htons)
2090	ALTENTRY(ntohs)
2091	movl	4(%esp), %eax
2092	bswap	%eax
2093	shrl	$16, %eax
2094	ret
2095	SET_SIZE(ntohs)
2096	SET_SIZE(htons)
2097
2098#endif	/* __i386 */
2099#endif	/* __lint */
2100
2101
2102#if defined(__lint)
2103
2104/* ARGSUSED */
2105void
2106intr_restore(ulong_t i)
2107{ return; }
2108
2109/* ARGSUSED */
2110void
2111restore_int_flag(ulong_t i)
2112{ return; }
2113
2114#else	/* __lint */
2115
2116#if defined(__amd64)
2117
2118	ENTRY(intr_restore)
2119	ENTRY(restore_int_flag)
2120	testq	$PS_IE, %rdi
2121	jz	1f
2122#if defined(__xpv)
2123	leaq	xpv_panicking, %rsi
2124	movl	(%rsi), %esi
2125	cmpl	$0, %esi
2126	jne	1f
2127	/*
2128	 * Since we're -really- running unprivileged, our attempt
2129	 * to change the state of the IF bit will be ignored.
2130	 * The virtual IF bit is tweaked by CLI and STI.
2131	 */
2132	IE_TO_EVENT_MASK(%rsi, %rdi)
2133#else
2134	sti
2135#endif
21361:
2137	ret
2138	SET_SIZE(restore_int_flag)
2139	SET_SIZE(intr_restore)
2140
2141#elif defined(__i386)
2142
2143	ENTRY(intr_restore)
2144	ENTRY(restore_int_flag)
2145	testl	$PS_IE, 4(%esp)
2146	jz	1f
2147#if defined(__xpv)
2148	leal	xpv_panicking, %edx
2149	movl	(%edx), %edx
2150	cmpl	$0, %edx
2151	jne	1f
2152	/*
2153	 * Since we're -really- running unprivileged, our attempt
2154	 * to change the state of the IF bit will be ignored.
2155	 * The virtual IF bit is tweaked by CLI and STI.
2156	 */
2157	IE_TO_EVENT_MASK(%edx, 4(%esp))
2158#else
2159	sti
2160#endif
21611:
2162	ret
2163	SET_SIZE(restore_int_flag)
2164	SET_SIZE(intr_restore)
2165
2166#endif	/* __i386 */
2167#endif	/* __lint */
2168
2169#if defined(__lint)
2170
2171void
2172sti(void)
2173{}
2174
2175void
2176cli(void)
2177{}
2178
2179#else	/* __lint */
2180
2181	ENTRY(sti)
2182	STI
2183	ret
2184	SET_SIZE(sti)
2185
2186	ENTRY(cli)
2187#if defined(__amd64)
2188	CLI(%rax)
2189#elif defined(__i386)
2190	CLI(%eax)
2191#endif	/* __i386 */
2192	ret
2193	SET_SIZE(cli)
2194
2195#endif	/* __lint */
2196
2197#if defined(__lint)
2198
2199dtrace_icookie_t
2200dtrace_interrupt_disable(void)
2201{ return (0); }
2202
2203#else   /* __lint */
2204
2205#if defined(__amd64)
2206
2207	ENTRY(dtrace_interrupt_disable)
2208	pushfq
2209	popq	%rax
2210#if defined(__xpv)
2211	leaq	xpv_panicking, %rdi
2212	movl	(%rdi), %edi
2213	cmpl	$0, %edi
2214	jne	1f
2215	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2216	/*
2217	 * Synthesize the PS_IE bit from the event mask bit
2218	 */
2219	andq    $_BITNOT(PS_IE), %rax
2220	testb	$1, %dl
2221	jnz	1f
2222	orq	$PS_IE, %rax
22231:
2224#else
2225	CLI(%rdx)
2226#endif
2227	ret
2228	SET_SIZE(dtrace_interrupt_disable)
2229
2230#elif defined(__i386)
2231
2232	ENTRY(dtrace_interrupt_disable)
2233	pushfl
2234	popl	%eax
2235#if defined(__xpv)
2236	leal	xpv_panicking, %edx
2237	movl	(%edx), %edx
2238	cmpl	$0, %edx
2239	jne	1f
2240	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2241	/*
2242	 * Synthesize the PS_IE bit from the event mask bit
2243	 */
2244	andl    $_BITNOT(PS_IE), %eax
2245	testb	$1, %cl
2246	jnz	1f
2247	orl	$PS_IE, %eax
22481:
2249#else
2250	CLI(%edx)
2251#endif
2252	ret
2253	SET_SIZE(dtrace_interrupt_disable)
2254
2255#endif	/* __i386 */
2256#endif	/* __lint */
2257
2258#if defined(__lint)
2259
2260/*ARGSUSED*/
2261void
2262dtrace_interrupt_enable(dtrace_icookie_t cookie)
2263{}
2264
2265#else	/* __lint */
2266
2267#if defined(__amd64)
2268
2269	ENTRY(dtrace_interrupt_enable)
2270	pushq	%rdi
2271	popfq
2272#if defined(__xpv)
2273	leaq	xpv_panicking, %rdx
2274	movl	(%rdx), %edx
2275	cmpl	$0, %edx
2276	jne	1f
2277	/*
2278	 * Since we're -really- running unprivileged, our attempt
2279	 * to change the state of the IF bit will be ignored. The
2280	 * virtual IF bit is tweaked by CLI and STI.
2281	 */
2282	IE_TO_EVENT_MASK(%rdx, %rdi)
2283#endif
2284	ret
2285	SET_SIZE(dtrace_interrupt_enable)
2286
2287#elif defined(__i386)
2288
2289	ENTRY(dtrace_interrupt_enable)
2290	movl	4(%esp), %eax
2291	pushl	%eax
2292	popfl
2293#if defined(__xpv)
2294	leal	xpv_panicking, %edx
2295	movl	(%edx), %edx
2296	cmpl	$0, %edx
2297	jne	1f
2298	/*
2299	 * Since we're -really- running unprivileged, our attempt
2300	 * to change the state of the IF bit will be ignored. The
2301	 * virtual IF bit is tweaked by CLI and STI.
2302	 */
2303	IE_TO_EVENT_MASK(%edx, %eax)
2304#endif
2305	ret
2306	SET_SIZE(dtrace_interrupt_enable)
2307
2308#endif	/* __i386 */
2309#endif	/* __lint */
2310
2311
2312#if defined(lint)
2313
2314void
2315dtrace_membar_producer(void)
2316{}
2317
2318void
2319dtrace_membar_consumer(void)
2320{}
2321
2322#else	/* __lint */
2323
2324	ENTRY(dtrace_membar_producer)
2325	rep;	ret	/* use 2 byte return instruction when branch target */
2326			/* AMD Software Optimization Guide - Section 6.2 */
2327	SET_SIZE(dtrace_membar_producer)
2328
2329	ENTRY(dtrace_membar_consumer)
2330	rep;	ret	/* use 2 byte return instruction when branch target */
2331			/* AMD Software Optimization Guide - Section 6.2 */
2332	SET_SIZE(dtrace_membar_consumer)
2333
2334#endif	/* __lint */
2335
2336#if defined(__lint)
2337
2338kthread_id_t
2339threadp(void)
2340{ return ((kthread_id_t)0); }
2341
2342#else	/* __lint */
2343
2344#if defined(__amd64)
2345
2346	ENTRY(threadp)
2347	movq	%gs:CPU_THREAD, %rax
2348	ret
2349	SET_SIZE(threadp)
2350
2351#elif defined(__i386)
2352
2353	ENTRY(threadp)
2354	movl	%gs:CPU_THREAD, %eax
2355	ret
2356	SET_SIZE(threadp)
2357
2358#endif	/* __i386 */
2359#endif	/* __lint */
2360
2361/*
2362 *   Checksum routine for Internet Protocol Headers
2363 */
2364
2365#if defined(__lint)
2366
2367/* ARGSUSED */
2368unsigned int
2369ip_ocsum(
2370	ushort_t *address,	/* ptr to 1st message buffer */
2371	int halfword_count,	/* length of data */
2372	unsigned int sum)	/* partial checksum */
2373{
2374	int		i;
2375	unsigned int	psum = 0;	/* partial sum */
2376
2377	for (i = 0; i < halfword_count; i++, address++) {
2378		psum += *address;
2379	}
2380
2381	while ((psum >> 16) != 0) {
2382		psum = (psum & 0xffff) + (psum >> 16);
2383	}
2384
2385	psum += sum;
2386
2387	while ((psum >> 16) != 0) {
2388		psum = (psum & 0xffff) + (psum >> 16);
2389	}
2390
2391	return (psum);
2392}
2393
2394#else	/* __lint */
2395
2396#if defined(__amd64)
2397
2398	ENTRY(ip_ocsum)
2399	pushq	%rbp
2400	movq	%rsp, %rbp
2401#ifdef DEBUG
2402	movq	postbootkernelbase(%rip), %rax
2403	cmpq	%rax, %rdi
2404	jnb	1f
2405	xorl	%eax, %eax
2406	movq	%rdi, %rsi
2407	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2408	call	panic
2409	/*NOTREACHED*/
2410.ip_ocsum_panic_msg:
2411	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24121:
2413#endif
2414	movl	%esi, %ecx	/* halfword_count */
2415	movq	%rdi, %rsi	/* address */
2416				/* partial sum in %edx */
2417	xorl	%eax, %eax
2418	testl	%ecx, %ecx
2419	jz	.ip_ocsum_done
2420	testq	$3, %rsi
2421	jnz	.ip_csum_notaligned
2422.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2423.next_iter:
2424	/* XX64 opportunities for prefetch? */
2425	/* XX64 compute csum with 64 bit quantities? */
2426	subl	$32, %ecx
2427	jl	.less_than_32
2428
2429	addl	0(%rsi), %edx
2430.only60:
2431	adcl	4(%rsi), %eax
2432.only56:
2433	adcl	8(%rsi), %edx
2434.only52:
2435	adcl	12(%rsi), %eax
2436.only48:
2437	adcl	16(%rsi), %edx
2438.only44:
2439	adcl	20(%rsi), %eax
2440.only40:
2441	adcl	24(%rsi), %edx
2442.only36:
2443	adcl	28(%rsi), %eax
2444.only32:
2445	adcl	32(%rsi), %edx
2446.only28:
2447	adcl	36(%rsi), %eax
2448.only24:
2449	adcl	40(%rsi), %edx
2450.only20:
2451	adcl	44(%rsi), %eax
2452.only16:
2453	adcl	48(%rsi), %edx
2454.only12:
2455	adcl	52(%rsi), %eax
2456.only8:
2457	adcl	56(%rsi), %edx
2458.only4:
2459	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2460.only0:
2461	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2462	adcl	$0, %eax
2463
2464	addq	$64, %rsi
2465	testl	%ecx, %ecx
2466	jnz	.next_iter
2467
2468.ip_ocsum_done:
2469	addl	%eax, %edx
2470	adcl	$0, %edx
2471	movl	%edx, %eax	/* form a 16 bit checksum by */
2472	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2473	addw	%dx, %ax
2474	adcw	$0, %ax
2475	andl	$0xffff, %eax
2476	leave
2477	ret
2478
2479.ip_csum_notaligned:
2480	xorl	%edi, %edi
2481	movw	(%rsi), %di
2482	addl	%edi, %edx
2483	adcl	$0, %edx
2484	addq	$2, %rsi
2485	decl	%ecx
2486	jmp	.ip_csum_aligned
2487
2488.less_than_32:
2489	addl	$32, %ecx
2490	testl	$1, %ecx
2491	jz	.size_aligned
2492	andl	$0xfe, %ecx
2493	movzwl	(%rsi, %rcx, 2), %edi
2494	addl	%edi, %edx
2495	adcl	$0, %edx
2496.size_aligned:
2497	movl	%ecx, %edi
2498	shrl	$1, %ecx
2499	shl	$1, %edi
2500	subq	$64, %rdi
2501	addq	%rdi, %rsi
2502	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2503	leaq	(%rdi, %rcx, 8), %rdi
2504	xorl	%ecx, %ecx
2505	clc
2506	jmp 	*(%rdi)
2507
2508	.align	8
2509.ip_ocsum_jmptbl:
2510	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2511	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2512	.quad	.only48, .only52, .only56, .only60
2513	SET_SIZE(ip_ocsum)
2514
2515#elif defined(__i386)
2516
2517	ENTRY(ip_ocsum)
2518	pushl	%ebp
2519	movl	%esp, %ebp
2520	pushl	%ebx
2521	pushl	%esi
2522	pushl	%edi
2523	movl	12(%ebp), %ecx	/* count of half words */
2524	movl	16(%ebp), %edx	/* partial checksum */
2525	movl	8(%ebp), %esi
2526	xorl	%eax, %eax
2527	testl	%ecx, %ecx
2528	jz	.ip_ocsum_done
2529
2530	testl	$3, %esi
2531	jnz	.ip_csum_notaligned
2532.ip_csum_aligned:
2533.next_iter:
2534	subl	$32, %ecx
2535	jl	.less_than_32
2536
2537	addl	0(%esi), %edx
2538.only60:
2539	adcl	4(%esi), %eax
2540.only56:
2541	adcl	8(%esi), %edx
2542.only52:
2543	adcl	12(%esi), %eax
2544.only48:
2545	adcl	16(%esi), %edx
2546.only44:
2547	adcl	20(%esi), %eax
2548.only40:
2549	adcl	24(%esi), %edx
2550.only36:
2551	adcl	28(%esi), %eax
2552.only32:
2553	adcl	32(%esi), %edx
2554.only28:
2555	adcl	36(%esi), %eax
2556.only24:
2557	adcl	40(%esi), %edx
2558.only20:
2559	adcl	44(%esi), %eax
2560.only16:
2561	adcl	48(%esi), %edx
2562.only12:
2563	adcl	52(%esi), %eax
2564.only8:
2565	adcl	56(%esi), %edx
2566.only4:
2567	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2568.only0:
2569	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2570	adcl	$0, %eax
2571
2572	addl	$64, %esi
2573	andl	%ecx, %ecx
2574	jnz	.next_iter
2575
2576.ip_ocsum_done:
2577	addl	%eax, %edx
2578	adcl	$0, %edx
2579	movl	%edx, %eax	/* form a 16 bit checksum by */
2580	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2581	addw	%dx, %ax
2582	adcw	$0, %ax
2583	andl	$0xffff, %eax
2584	popl	%edi		/* restore registers */
2585	popl	%esi
2586	popl	%ebx
2587	leave
2588	ret
2589
2590.ip_csum_notaligned:
2591	xorl	%edi, %edi
2592	movw	(%esi), %di
2593	addl	%edi, %edx
2594	adcl	$0, %edx
2595	addl	$2, %esi
2596	decl	%ecx
2597	jmp	.ip_csum_aligned
2598
2599.less_than_32:
2600	addl	$32, %ecx
2601	testl	$1, %ecx
2602	jz	.size_aligned
2603	andl	$0xfe, %ecx
2604	movzwl	(%esi, %ecx, 2), %edi
2605	addl	%edi, %edx
2606	adcl	$0, %edx
2607.size_aligned:
2608	movl	%ecx, %edi
2609	shrl	$1, %ecx
2610	shl	$1, %edi
2611	subl	$64, %edi
2612	addl	%edi, %esi
2613	movl	$.ip_ocsum_jmptbl, %edi
2614	lea	(%edi, %ecx, 4), %edi
2615	xorl	%ecx, %ecx
2616	clc
2617	jmp 	*(%edi)
2618	SET_SIZE(ip_ocsum)
2619
2620	.data
2621	.align	4
2622
2623.ip_ocsum_jmptbl:
2624	.long	.only0, .only4, .only8, .only12, .only16, .only20
2625	.long	.only24, .only28, .only32, .only36, .only40, .only44
2626	.long	.only48, .only52, .only56, .only60
2627
2628
2629#endif	/* __i386 */
2630#endif	/* __lint */
2631
2632/*
2633 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2634 * Provided to manipulate hrtime_t values.
2635 */
2636#if defined(__lint)
2637
2638/* result = a * b; */
2639
2640/* ARGSUSED */
2641unsigned long long
2642mul32(uint_t a, uint_t b)
2643{ return (0); }
2644
2645#else	/* __lint */
2646
2647#if defined(__amd64)
2648
2649	ENTRY(mul32)
2650	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2651	movl	%edi, %eax
2652	mull	%esi
2653	shlq	$32, %rdx
2654	orq	%rdx, %rax
2655	ret
2656	SET_SIZE(mul32)
2657
2658#elif defined(__i386)
2659
2660	ENTRY(mul32)
2661	movl	8(%esp), %eax
2662	movl	4(%esp), %ecx
2663	mull	%ecx
2664	ret
2665	SET_SIZE(mul32)
2666
2667#endif	/* __i386 */
2668#endif	/* __lint */
2669
2670#if defined(notused)
2671#if defined(__lint)
2672/* ARGSUSED */
2673void
2674load_pte64(uint64_t *pte, uint64_t pte_value)
2675{}
2676#else	/* __lint */
2677	.globl load_pte64
2678load_pte64:
2679	movl	4(%esp), %eax
2680	movl	8(%esp), %ecx
2681	movl	12(%esp), %edx
2682	movl	%edx, 4(%eax)
2683	movl	%ecx, (%eax)
2684	ret
2685#endif	/* __lint */
2686#endif	/* notused */
2687
2688#if defined(__lint)
2689
2690/*ARGSUSED*/
2691void
2692scan_memory(caddr_t addr, size_t size)
2693{}
2694
2695#else	/* __lint */
2696
2697#if defined(__amd64)
2698
2699	ENTRY(scan_memory)
2700	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2701	jz	.scanm_done
2702	movq	%rsi, %rcx	/* move count into rep control register */
2703	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2704	rep lodsq		/* scan the memory range */
2705.scanm_done:
2706	rep;	ret	/* use 2 byte return instruction when branch target */
2707			/* AMD Software Optimization Guide - Section 6.2 */
2708	SET_SIZE(scan_memory)
2709
2710#elif defined(__i386)
2711
2712	ENTRY(scan_memory)
2713	pushl	%ecx
2714	pushl	%esi
2715	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2716	shrl	$2, %ecx	/* convert from byte count to word count */
2717	jz	.scanm_done
2718	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2719	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2720	lodsl
2721.scanm_done:
2722	popl	%esi
2723	popl	%ecx
2724	ret
2725	SET_SIZE(scan_memory)
2726
2727#endif	/* __i386 */
2728#endif	/* __lint */
2729
2730
2731#if defined(__lint)
2732
2733/*ARGSUSED */
2734int
2735lowbit(ulong_t i)
2736{ return (0); }
2737
2738#else	/* __lint */
2739
2740#if defined(__amd64)
2741
2742	ENTRY(lowbit)
2743	movl	$-1, %eax
2744	bsfq	%rdi, %rax
2745	incl	%eax
2746	ret
2747	SET_SIZE(lowbit)
2748
2749#elif defined(__i386)
2750
2751	ENTRY(lowbit)
2752	movl	$-1, %eax
2753	bsfl	4(%esp), %eax
2754	incl	%eax
2755	ret
2756	SET_SIZE(lowbit)
2757
2758#endif	/* __i386 */
2759#endif	/* __lint */
2760
2761#if defined(__lint)
2762
2763/*ARGSUSED*/
2764int
2765highbit(ulong_t i)
2766{ return (0); }
2767
2768#else	/* __lint */
2769
2770#if defined(__amd64)
2771
2772	ENTRY(highbit)
2773	movl	$-1, %eax
2774	bsrq	%rdi, %rax
2775	incl	%eax
2776	ret
2777	SET_SIZE(highbit)
2778
2779#elif defined(__i386)
2780
2781	ENTRY(highbit)
2782	movl	$-1, %eax
2783	bsrl	4(%esp), %eax
2784	incl	%eax
2785	ret
2786	SET_SIZE(highbit)
2787
2788#endif	/* __i386 */
2789#endif	/* __lint */
2790
2791#if defined(__lint)
2792
2793/*ARGSUSED*/
2794uint64_t
2795rdmsr(uint_t r)
2796{ return (0); }
2797
2798/*ARGSUSED*/
2799void
2800wrmsr(uint_t r, const uint64_t val)
2801{}
2802
2803/*ARGSUSED*/
2804uint64_t
2805xrdmsr(uint_t r)
2806{ return (0); }
2807
2808/*ARGSUSED*/
2809void
2810xwrmsr(uint_t r, const uint64_t val)
2811{}
2812
2813void
2814invalidate_cache(void)
2815{}
2816
2817#else  /* __lint */
2818
2819#define	XMSR_ACCESS_VAL		$0x9c5a203a
2820
2821#if defined(__amd64)
2822
2823	ENTRY(rdmsr)
2824	movl	%edi, %ecx
2825	rdmsr
2826	shlq	$32, %rdx
2827	orq	%rdx, %rax
2828	ret
2829	SET_SIZE(rdmsr)
2830
2831	ENTRY(wrmsr)
2832	movq	%rsi, %rdx
2833	shrq	$32, %rdx
2834	movl	%esi, %eax
2835	movl	%edi, %ecx
2836	wrmsr
2837	ret
2838	SET_SIZE(wrmsr)
2839
2840	ENTRY(xrdmsr)
2841	pushq	%rbp
2842	movq	%rsp, %rbp
2843	movl	%edi, %ecx
2844	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2845	rdmsr
2846	shlq	$32, %rdx
2847	orq	%rdx, %rax
2848	leave
2849	ret
2850	SET_SIZE(xrdmsr)
2851
2852	ENTRY(xwrmsr)
2853	pushq	%rbp
2854	movq	%rsp, %rbp
2855	movl	%edi, %ecx
2856	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2857	movq	%rsi, %rdx
2858	shrq	$32, %rdx
2859	movl	%esi, %eax
2860	wrmsr
2861	leave
2862	ret
2863	SET_SIZE(xwrmsr)
2864
2865#elif defined(__i386)
2866
2867	ENTRY(rdmsr)
2868	movl	4(%esp), %ecx
2869	rdmsr
2870	ret
2871	SET_SIZE(rdmsr)
2872
2873	ENTRY(wrmsr)
2874	movl	4(%esp), %ecx
2875	movl	8(%esp), %eax
2876	movl	12(%esp), %edx
2877	wrmsr
2878	ret
2879	SET_SIZE(wrmsr)
2880
2881	ENTRY(xrdmsr)
2882	pushl	%ebp
2883	movl	%esp, %ebp
2884	movl	8(%esp), %ecx
2885	pushl	%edi
2886	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2887	rdmsr
2888	popl	%edi
2889	leave
2890	ret
2891	SET_SIZE(xrdmsr)
2892
2893	ENTRY(xwrmsr)
2894	pushl	%ebp
2895	movl	%esp, %ebp
2896	movl	8(%esp), %ecx
2897	movl	12(%esp), %eax
2898	movl	16(%esp), %edx
2899	pushl	%edi
2900	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2901	wrmsr
2902	popl	%edi
2903	leave
2904	ret
2905	SET_SIZE(xwrmsr)
2906
2907#endif	/* __i386 */
2908
2909	ENTRY(invalidate_cache)
2910	wbinvd
2911	ret
2912	SET_SIZE(invalidate_cache)
2913
2914#endif	/* __lint */
2915
2916#if defined(__lint)
2917
2918/*ARGSUSED*/
2919void
2920getcregs(struct cregs *crp)
2921{}
2922
2923#else	/* __lint */
2924
2925#if defined(__amd64)
2926
2927	ENTRY_NP(getcregs)
2928#if defined(__xpv)
2929	/*
2930	 * Only a few of the hardware control registers or descriptor tables
2931	 * are directly accessible to us, so just zero the structure.
2932	 *
2933	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2934	 *	virtualized versions of these for post-mortem use.
2935	 *	(Need to reevaluate - perhaps it already does!)
2936	 */
2937	pushq	%rdi		/* save *crp */
2938	movq	$CREGSZ, %rsi
2939	call	bzero
2940	popq	%rdi
2941
2942	/*
2943	 * Dump what limited information we can
2944	 */
2945	movq	%cr0, %rax
2946	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2947	movq	%cr2, %rax
2948	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2949	movq	%cr3, %rax
2950	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2951	movq	%cr4, %rax
2952	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2953
2954#else	/* __xpv */
2955
2956#define	GETMSR(r, off, d)	\
2957	movl	$r, %ecx;	\
2958	rdmsr;			\
2959	movl	%eax, off(d);	\
2960	movl	%edx, off+4(d)
2961
2962	xorl	%eax, %eax
2963	movq	%rax, CREG_GDT+8(%rdi)
2964	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2965	movq	%rax, CREG_IDT+8(%rdi)
2966	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2967	movq	%rax, CREG_LDT(%rdi)
2968	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2969	movq	%rax, CREG_TASKR(%rdi)
2970	str	CREG_TASKR(%rdi)	/* 2 bytes */
2971	movq	%cr0, %rax
2972	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2973	movq	%cr2, %rax
2974	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2975	movq	%cr3, %rax
2976	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2977	movq	%cr4, %rax
2978	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2979	movq	%cr8, %rax
2980	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2981	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2982	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2983#endif	/* __xpv */
2984	ret
2985	SET_SIZE(getcregs)
2986
2987#undef GETMSR
2988
2989#elif defined(__i386)
2990
2991	ENTRY_NP(getcregs)
2992#if defined(__xpv)
2993	/*
2994	 * Only a few of the hardware control registers or descriptor tables
2995	 * are directly accessible to us, so just zero the structure.
2996	 *
2997	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2998	 *	virtualized versions of these for post-mortem use.
2999	 *	(Need to reevaluate - perhaps it already does!)
3000	 */
3001	movl	4(%esp), %edx
3002	pushl	$CREGSZ
3003	pushl	%edx
3004	call	bzero
3005	addl	$8, %esp
3006	movl	4(%esp), %edx
3007
3008	/*
3009	 * Dump what limited information we can
3010	 */
3011	movl	%cr0, %eax
3012	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3013	movl	%cr2, %eax
3014	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3015	movl	%cr3, %eax
3016	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3017	movl	%cr4, %eax
3018	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3019
3020#else	/* __xpv */
3021
3022	movl	4(%esp), %edx
3023	movw	$0, CREG_GDT+6(%edx)
3024	movw	$0, CREG_IDT+6(%edx)
3025	sgdt	CREG_GDT(%edx)		/* gdt */
3026	sidt	CREG_IDT(%edx)		/* idt */
3027	sldt	CREG_LDT(%edx)		/* ldt */
3028	str	CREG_TASKR(%edx)	/* task */
3029	movl	%cr0, %eax
3030	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3031	movl	%cr2, %eax
3032	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3033	movl	%cr3, %eax
3034	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3035	testl	$X86_LARGEPAGE, x86_feature
3036	jz	.nocr4
3037	movl	%cr4, %eax
3038	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3039	jmp	.skip
3040.nocr4:
3041	movl	$0, CREG_CR4(%edx)
3042.skip:
3043#endif
3044	ret
3045	SET_SIZE(getcregs)
3046
3047#endif	/* __i386 */
3048#endif	/* __lint */
3049
3050
3051/*
3052 * A panic trigger is a word which is updated atomically and can only be set
3053 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3054 * previous value was 0, we succeed and return 1; otherwise return 0.
3055 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3056 * has its own version of this function to allow it to panic correctly from
3057 * probe context.
3058 */
3059#if defined(__lint)
3060
3061/*ARGSUSED*/
3062int
3063panic_trigger(int *tp)
3064{ return (0); }
3065
3066/*ARGSUSED*/
3067int
3068dtrace_panic_trigger(int *tp)
3069{ return (0); }
3070
3071#else	/* __lint */
3072
3073#if defined(__amd64)
3074
3075	ENTRY_NP(panic_trigger)
3076	xorl	%eax, %eax
3077	movl	$0xdefacedd, %edx
3078	lock
3079	  xchgl	%edx, (%rdi)
3080	cmpl	$0, %edx
3081	je	0f
3082	movl	$0, %eax
3083	ret
30840:	movl	$1, %eax
3085	ret
3086	SET_SIZE(panic_trigger)
3087
3088	ENTRY_NP(dtrace_panic_trigger)
3089	xorl	%eax, %eax
3090	movl	$0xdefacedd, %edx
3091	lock
3092	  xchgl	%edx, (%rdi)
3093	cmpl	$0, %edx
3094	je	0f
3095	movl	$0, %eax
3096	ret
30970:	movl	$1, %eax
3098	ret
3099	SET_SIZE(dtrace_panic_trigger)
3100
3101#elif defined(__i386)
3102
3103	ENTRY_NP(panic_trigger)
3104	movl	4(%esp), %edx		/ %edx = address of trigger
3105	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3106	lock				/ assert lock
3107	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3108	cmpl	$0, %eax		/ if (%eax == 0x0)
3109	je	0f			/   return (1);
3110	movl	$0, %eax		/ else
3111	ret				/   return (0);
31120:	movl	$1, %eax
3113	ret
3114	SET_SIZE(panic_trigger)
3115
3116	ENTRY_NP(dtrace_panic_trigger)
3117	movl	4(%esp), %edx		/ %edx = address of trigger
3118	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3119	lock				/ assert lock
3120	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3121	cmpl	$0, %eax		/ if (%eax == 0x0)
3122	je	0f			/   return (1);
3123	movl	$0, %eax		/ else
3124	ret				/   return (0);
31250:	movl	$1, %eax
3126	ret
3127	SET_SIZE(dtrace_panic_trigger)
3128
3129#endif	/* __i386 */
3130#endif	/* __lint */
3131
3132/*
3133 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3134 * into the panic code implemented in panicsys().  vpanic() is responsible
3135 * for passing through the format string and arguments, and constructing a
3136 * regs structure on the stack into which it saves the current register
3137 * values.  If we are not dying due to a fatal trap, these registers will
3138 * then be preserved in panicbuf as the current processor state.  Before
3139 * invoking panicsys(), vpanic() activates the first panic trigger (see
3140 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3141 * DTrace takes a slightly different panic path if it must panic from probe
3142 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3143 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3144 * branches back into vpanic().
3145 */
3146#if defined(__lint)
3147
3148/*ARGSUSED*/
3149void
3150vpanic(const char *format, va_list alist)
3151{}
3152
3153/*ARGSUSED*/
3154void
3155dtrace_vpanic(const char *format, va_list alist)
3156{}
3157
3158#else	/* __lint */
3159
3160#if defined(__amd64)
3161
3162	ENTRY_NP(vpanic)			/* Initial stack layout: */
3163
3164	pushq	%rbp				/* | %rip | 	0x60	*/
3165	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3166	pushfq					/* | rfl  |	0x50	*/
3167	pushq	%r11				/* | %r11 |	0x48	*/
3168	pushq	%r10				/* | %r10 |	0x40	*/
3169	pushq	%rbx				/* | %rbx |	0x38	*/
3170	pushq	%rax				/* | %rax |	0x30	*/
3171	pushq	%r9				/* | %r9  |	0x28	*/
3172	pushq	%r8				/* | %r8  |	0x20	*/
3173	pushq	%rcx				/* | %rcx |	0x18	*/
3174	pushq	%rdx				/* | %rdx |	0x10	*/
3175	pushq	%rsi				/* | %rsi |	0x8 alist */
3176	pushq	%rdi				/* | %rdi |	0x0 format */
3177
3178	movq	%rsp, %rbx			/* %rbx = current %rsp */
3179
3180	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3181	call	panic_trigger			/* %eax = panic_trigger() */
3182
3183vpanic_common:
3184	/*
3185	 * The panic_trigger result is in %eax from the call above, and
3186	 * dtrace_panic places it in %eax before branching here.
3187	 * The rdmsr instructions that follow below will clobber %eax so
3188	 * we stash the panic_trigger result in %r11d.
3189	 */
3190	movl	%eax, %r11d
3191	cmpl	$0, %r11d
3192	je	0f
3193
3194	/*
3195	 * If panic_trigger() was successful, we are the first to initiate a
3196	 * panic: we now switch to the reserved panic_stack before continuing.
3197	 */
3198	leaq	panic_stack(%rip), %rsp
3199	addq	$PANICSTKSIZE, %rsp
32000:	subq	$REGSIZE, %rsp
3201	/*
3202	 * Now that we've got everything set up, store the register values as
3203	 * they were when we entered vpanic() to the designated location in
3204	 * the regs structure we allocated on the stack.
3205	 */
3206	movq	0x0(%rbx), %rcx
3207	movq	%rcx, REGOFF_RDI(%rsp)
3208	movq	0x8(%rbx), %rcx
3209	movq	%rcx, REGOFF_RSI(%rsp)
3210	movq	0x10(%rbx), %rcx
3211	movq	%rcx, REGOFF_RDX(%rsp)
3212	movq	0x18(%rbx), %rcx
3213	movq	%rcx, REGOFF_RCX(%rsp)
3214	movq	0x20(%rbx), %rcx
3215
3216	movq	%rcx, REGOFF_R8(%rsp)
3217	movq	0x28(%rbx), %rcx
3218	movq	%rcx, REGOFF_R9(%rsp)
3219	movq	0x30(%rbx), %rcx
3220	movq	%rcx, REGOFF_RAX(%rsp)
3221	movq	0x38(%rbx), %rcx
3222	movq	%rcx, REGOFF_RBX(%rsp)
3223	movq	0x58(%rbx), %rcx
3224
3225	movq	%rcx, REGOFF_RBP(%rsp)
3226	movq	0x40(%rbx), %rcx
3227	movq	%rcx, REGOFF_R10(%rsp)
3228	movq	0x48(%rbx), %rcx
3229	movq	%rcx, REGOFF_R11(%rsp)
3230	movq	%r12, REGOFF_R12(%rsp)
3231
3232	movq	%r13, REGOFF_R13(%rsp)
3233	movq	%r14, REGOFF_R14(%rsp)
3234	movq	%r15, REGOFF_R15(%rsp)
3235
3236	xorl	%ecx, %ecx
3237	movw	%ds, %cx
3238	movq	%rcx, REGOFF_DS(%rsp)
3239	movw	%es, %cx
3240	movq	%rcx, REGOFF_ES(%rsp)
3241	movw	%fs, %cx
3242	movq	%rcx, REGOFF_FS(%rsp)
3243	movw	%gs, %cx
3244	movq	%rcx, REGOFF_GS(%rsp)
3245
3246	movq	$0, REGOFF_TRAPNO(%rsp)
3247
3248	movq	$0, REGOFF_ERR(%rsp)
3249	leaq	vpanic(%rip), %rcx
3250	movq	%rcx, REGOFF_RIP(%rsp)
3251	movw	%cs, %cx
3252	movzwq	%cx, %rcx
3253	movq	%rcx, REGOFF_CS(%rsp)
3254	movq	0x50(%rbx), %rcx
3255	movq	%rcx, REGOFF_RFL(%rsp)
3256	movq	%rbx, %rcx
3257	addq	$0x60, %rcx
3258	movq	%rcx, REGOFF_RSP(%rsp)
3259	movw	%ss, %cx
3260	movzwq	%cx, %rcx
3261	movq	%rcx, REGOFF_SS(%rsp)
3262
3263	/*
3264	 * panicsys(format, alist, rp, on_panic_stack)
3265	 */
3266	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3267	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3268	movq	%rsp, %rdx			/* struct regs */
3269	movl	%r11d, %ecx			/* on_panic_stack */
3270	call	panicsys
3271	addq	$REGSIZE, %rsp
3272	popq	%rdi
3273	popq	%rsi
3274	popq	%rdx
3275	popq	%rcx
3276	popq	%r8
3277	popq	%r9
3278	popq	%rax
3279	popq	%rbx
3280	popq	%r10
3281	popq	%r11
3282	popfq
3283	leave
3284	ret
3285	SET_SIZE(vpanic)
3286
3287	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3288
3289	pushq	%rbp				/* | %rip | 	0x60	*/
3290	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3291	pushfq					/* | rfl  |	0x50	*/
3292	pushq	%r11				/* | %r11 |	0x48	*/
3293	pushq	%r10				/* | %r10 |	0x40	*/
3294	pushq	%rbx				/* | %rbx |	0x38	*/
3295	pushq	%rax				/* | %rax |	0x30	*/
3296	pushq	%r9				/* | %r9  |	0x28	*/
3297	pushq	%r8				/* | %r8  |	0x20	*/
3298	pushq	%rcx				/* | %rcx |	0x18	*/
3299	pushq	%rdx				/* | %rdx |	0x10	*/
3300	pushq	%rsi				/* | %rsi |	0x8 alist */
3301	pushq	%rdi				/* | %rdi |	0x0 format */
3302
3303	movq	%rsp, %rbx			/* %rbx = current %rsp */
3304
3305	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3306	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3307	jmp	vpanic_common
3308
3309	SET_SIZE(dtrace_vpanic)
3310
3311#elif defined(__i386)
3312
3313	ENTRY_NP(vpanic)			/ Initial stack layout:
3314
3315	pushl	%ebp				/ | %eip | 20
3316	movl	%esp, %ebp			/ | %ebp | 16
3317	pushl	%eax				/ | %eax | 12
3318	pushl	%ebx				/ | %ebx |  8
3319	pushl	%ecx				/ | %ecx |  4
3320	pushl	%edx				/ | %edx |  0
3321
3322	movl	%esp, %ebx			/ %ebx = current stack pointer
3323
3324	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3325	pushl	%eax				/ push &panic_quiesce
3326	call	panic_trigger			/ %eax = panic_trigger()
3327	addl	$4, %esp			/ reset stack pointer
3328
3329vpanic_common:
3330	cmpl	$0, %eax			/ if (%eax == 0)
3331	je	0f				/   goto 0f;
3332
3333	/*
3334	 * If panic_trigger() was successful, we are the first to initiate a
3335	 * panic: we now switch to the reserved panic_stack before continuing.
3336	 */
3337	lea	panic_stack, %esp		/ %esp  = panic_stack
3338	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3339
33400:	subl	$REGSIZE, %esp			/ allocate struct regs
3341
3342	/*
3343	 * Now that we've got everything set up, store the register values as
3344	 * they were when we entered vpanic() to the designated location in
3345	 * the regs structure we allocated on the stack.
3346	 */
3347#if !defined(__GNUC_AS__)
3348	movw	%gs, %edx
3349	movl	%edx, REGOFF_GS(%esp)
3350	movw	%fs, %edx
3351	movl	%edx, REGOFF_FS(%esp)
3352	movw	%es, %edx
3353	movl	%edx, REGOFF_ES(%esp)
3354	movw	%ds, %edx
3355	movl	%edx, REGOFF_DS(%esp)
3356#else	/* __GNUC_AS__ */
3357	mov	%gs, %edx
3358	mov	%edx, REGOFF_GS(%esp)
3359	mov	%fs, %edx
3360	mov	%edx, REGOFF_FS(%esp)
3361	mov	%es, %edx
3362	mov	%edx, REGOFF_ES(%esp)
3363	mov	%ds, %edx
3364	mov	%edx, REGOFF_DS(%esp)
3365#endif	/* __GNUC_AS__ */
3366	movl	%edi, REGOFF_EDI(%esp)
3367	movl	%esi, REGOFF_ESI(%esp)
3368	movl	16(%ebx), %ecx
3369	movl	%ecx, REGOFF_EBP(%esp)
3370	movl	%ebx, %ecx
3371	addl	$20, %ecx
3372	movl	%ecx, REGOFF_ESP(%esp)
3373	movl	8(%ebx), %ecx
3374	movl	%ecx, REGOFF_EBX(%esp)
3375	movl	0(%ebx), %ecx
3376	movl	%ecx, REGOFF_EDX(%esp)
3377	movl	4(%ebx), %ecx
3378	movl	%ecx, REGOFF_ECX(%esp)
3379	movl	12(%ebx), %ecx
3380	movl	%ecx, REGOFF_EAX(%esp)
3381	movl	$0, REGOFF_TRAPNO(%esp)
3382	movl	$0, REGOFF_ERR(%esp)
3383	lea	vpanic, %ecx
3384	movl	%ecx, REGOFF_EIP(%esp)
3385#if !defined(__GNUC_AS__)
3386	movw	%cs, %edx
3387#else	/* __GNUC_AS__ */
3388	mov	%cs, %edx
3389#endif	/* __GNUC_AS__ */
3390	movl	%edx, REGOFF_CS(%esp)
3391	pushfl
3392	popl	%ecx
3393#if defined(__xpv)
3394	/*
3395	 * Synthesize the PS_IE bit from the event mask bit
3396	 */
3397	CURTHREAD(%edx)
3398	KPREEMPT_DISABLE(%edx)
3399	EVENT_MASK_TO_IE(%edx, %ecx)
3400	CURTHREAD(%edx)
3401	KPREEMPT_ENABLE_NOKP(%edx)
3402#endif
3403	movl	%ecx, REGOFF_EFL(%esp)
3404	movl	$0, REGOFF_UESP(%esp)
3405#if !defined(__GNUC_AS__)
3406	movw	%ss, %edx
3407#else	/* __GNUC_AS__ */
3408	mov	%ss, %edx
3409#endif	/* __GNUC_AS__ */
3410	movl	%edx, REGOFF_SS(%esp)
3411
3412	movl	%esp, %ecx			/ %ecx = &regs
3413	pushl	%eax				/ push on_panic_stack
3414	pushl	%ecx				/ push &regs
3415	movl	12(%ebp), %ecx			/ %ecx = alist
3416	pushl	%ecx				/ push alist
3417	movl	8(%ebp), %ecx			/ %ecx = format
3418	pushl	%ecx				/ push format
3419	call	panicsys			/ panicsys();
3420	addl	$16, %esp			/ pop arguments
3421
3422	addl	$REGSIZE, %esp
3423	popl	%edx
3424	popl	%ecx
3425	popl	%ebx
3426	popl	%eax
3427	leave
3428	ret
3429	SET_SIZE(vpanic)
3430
3431	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3432
3433	pushl	%ebp				/ | %eip | 20
3434	movl	%esp, %ebp			/ | %ebp | 16
3435	pushl	%eax				/ | %eax | 12
3436	pushl	%ebx				/ | %ebx |  8
3437	pushl	%ecx				/ | %ecx |  4
3438	pushl	%edx				/ | %edx |  0
3439
3440	movl	%esp, %ebx			/ %ebx = current stack pointer
3441
3442	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3443	pushl	%eax				/ push &panic_quiesce
3444	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3445	addl	$4, %esp			/ reset stack pointer
3446	jmp	vpanic_common			/ jump back to common code
3447
3448	SET_SIZE(dtrace_vpanic)
3449
3450#endif	/* __i386 */
3451#endif	/* __lint */
3452
3453#if defined(__lint)
3454
3455void
3456hres_tick(void)
3457{}
3458
3459int64_t timedelta;
3460hrtime_t hres_last_tick;
3461volatile timestruc_t hrestime;
3462int64_t hrestime_adj;
3463volatile int hres_lock;
3464hrtime_t hrtime_base;
3465
3466#else	/* __lint */
3467
3468	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3469	.NWORD	0, 0
3470
3471	DGDEF3(hrestime_adj, 8, 8)
3472	.long	0, 0
3473
3474	DGDEF3(hres_last_tick, 8, 8)
3475	.long	0, 0
3476
3477	DGDEF3(timedelta, 8, 8)
3478	.long	0, 0
3479
3480	DGDEF3(hres_lock, 4, 8)
3481	.long	0
3482
3483	/*
3484	 * initialized to a non zero value to make pc_gethrtime()
3485	 * work correctly even before clock is initialized
3486	 */
3487	DGDEF3(hrtime_base, 8, 8)
3488	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3489
3490	DGDEF3(adj_shift, 4, 4)
3491	.long	ADJ_SHIFT
3492
3493#if defined(__amd64)
3494
3495	ENTRY_NP(hres_tick)
3496	pushq	%rbp
3497	movq	%rsp, %rbp
3498
3499	/*
3500	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3501	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3502	 * At worst, performing this now instead of under CLOCK_LOCK may
3503	 * introduce some jitter in pc_gethrestime().
3504	 */
3505	call	*gethrtimef(%rip)
3506	movq	%rax, %r8
3507
3508	leaq	hres_lock(%rip), %rax
3509	movb	$-1, %dl
3510.CL1:
3511	xchgb	%dl, (%rax)
3512	testb	%dl, %dl
3513	jz	.CL3			/* got it */
3514.CL2:
3515	cmpb	$0, (%rax)		/* possible to get lock? */
3516	pause
3517	jne	.CL2
3518	jmp	.CL1			/* yes, try again */
3519.CL3:
3520	/*
3521	 * compute the interval since last time hres_tick was called
3522	 * and adjust hrtime_base and hrestime accordingly
3523	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3524	 * a timestruc_t (sec, nsec)
3525	 */
3526	leaq	hres_last_tick(%rip), %rax
3527	movq	%r8, %r11
3528	subq	(%rax), %r8
3529	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3530	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3531	/*
3532	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3533	 */
3534	movq	%r11, (%rax)
3535
3536	call	__adj_hrestime
3537
3538	/*
3539	 * release the hres_lock
3540	 */
3541	incl	hres_lock(%rip)
3542	leave
3543	ret
3544	SET_SIZE(hres_tick)
3545
3546#elif defined(__i386)
3547
3548	ENTRY_NP(hres_tick)
3549	pushl	%ebp
3550	movl	%esp, %ebp
3551	pushl	%esi
3552	pushl	%ebx
3553
3554	/*
3555	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3556	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3557	 * At worst, performing this now instead of under CLOCK_LOCK may
3558	 * introduce some jitter in pc_gethrestime().
3559	 */
3560	call	*gethrtimef
3561	movl	%eax, %ebx
3562	movl	%edx, %esi
3563
3564	movl	$hres_lock, %eax
3565	movl	$-1, %edx
3566.CL1:
3567	xchgb	%dl, (%eax)
3568	testb	%dl, %dl
3569	jz	.CL3			/ got it
3570.CL2:
3571	cmpb	$0, (%eax)		/ possible to get lock?
3572	pause
3573	jne	.CL2
3574	jmp	.CL1			/ yes, try again
3575.CL3:
3576	/*
3577	 * compute the interval since last time hres_tick was called
3578	 * and adjust hrtime_base and hrestime accordingly
3579	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3580	 * timestruc_t (sec, nsec)
3581	 */
3582
3583	lea	hres_last_tick, %eax
3584
3585	movl	%ebx, %edx
3586	movl	%esi, %ecx
3587
3588	subl 	(%eax), %edx
3589	sbbl 	4(%eax), %ecx
3590
3591	addl	%edx, hrtime_base	/ add interval to hrtime_base
3592	adcl	%ecx, hrtime_base+4
3593
3594	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3595
3596	/
3597	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3598	/
3599	movl	%ebx, (%eax)
3600	movl	%esi,  4(%eax)
3601
3602	/ get hrestime at this moment. used as base for pc_gethrestime
3603	/
3604	/ Apply adjustment, if any
3605	/
3606	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3607	/ (max_hres_adj)
3608	/
3609	/ void
3610	/ adj_hrestime()
3611	/ {
3612	/	long long adj;
3613	/
3614	/	if (hrestime_adj == 0)
3615	/		adj = 0;
3616	/	else if (hrestime_adj > 0) {
3617	/		if (hrestime_adj < HRES_ADJ)
3618	/			adj = hrestime_adj;
3619	/		else
3620	/			adj = HRES_ADJ;
3621	/	}
3622	/	else {
3623	/		if (hrestime_adj < -(HRES_ADJ))
3624	/			adj = -(HRES_ADJ);
3625	/		else
3626	/			adj = hrestime_adj;
3627	/	}
3628	/
3629	/	timedelta -= adj;
3630	/	hrestime_adj = timedelta;
3631	/	hrestime.tv_nsec += adj;
3632	/
3633	/	while (hrestime.tv_nsec >= NANOSEC) {
3634	/		one_sec++;
3635	/		hrestime.tv_sec++;
3636	/		hrestime.tv_nsec -= NANOSEC;
3637	/	}
3638	/ }
3639__adj_hrestime:
3640	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3641	movl	hrestime_adj+4, %edx
3642	andl	%esi, %esi
3643	jne	.CL4			/ no
3644	andl	%edx, %edx
3645	jne	.CL4			/ no
3646	subl	%ecx, %ecx		/ yes, adj = 0;
3647	subl	%edx, %edx
3648	jmp	.CL5
3649.CL4:
3650	subl	%ecx, %ecx
3651	subl	%eax, %eax
3652	subl	%esi, %ecx
3653	sbbl	%edx, %eax
3654	andl	%eax, %eax		/ if (hrestime_adj > 0)
3655	jge	.CL6
3656
3657	/ In the following comments, HRES_ADJ is used, while in the code
3658	/ max_hres_adj is used.
3659	/
3660	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3661	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3662	/ on the logical equivalence of:
3663	/
3664	/	!(hrestime_adj < HRES_ADJ)
3665	/
3666	/ and the two step sequence:
3667	/
3668	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3669	/
3670	/ which computes whether or not the least significant 32-bits
3671	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3672	/
3673	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3674	/
3675	/ which generates a carry whenever step 1 is true or the most
3676	/ significant long of the longlong hrestime_adj is non-zero.
3677
3678	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3679	subl	%esi, %ecx
3680	movl	%edx, %eax
3681	adcl	$-1, %eax
3682	jnc	.CL7
3683	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3684	subl	%edx, %edx
3685	jmp	.CL5
3686
3687	/ The following computation is similar to the one above.
3688	/
3689	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3690	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3691	/ on the logical equivalence of:
3692	/
3693	/	(hrestime_adj > -HRES_ADJ)
3694	/
3695	/ and the two step sequence:
3696	/
3697	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3698	/
3699	/ which means the least significant 32-bits of hrestime_adj is
3700	/ greater than -HRES_ADJ, followed by:
3701	/
3702	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3703	/
3704	/ which generates a carry only when step 1 is true and the most
3705	/ significant long of the longlong hrestime_adj is -1.
3706
3707.CL6:					/ hrestime_adj is negative
3708	movl	%esi, %ecx
3709	addl	max_hres_adj, %ecx
3710	movl	%edx, %eax
3711	adcl	$0, %eax
3712	jc	.CL7
3713	xor	%ecx, %ecx
3714	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3715	movl	$-1, %edx
3716	jmp	.CL5
3717.CL7:
3718	movl	%esi, %ecx		/ adj = hrestime_adj;
3719.CL5:
3720	movl	timedelta, %esi
3721	subl	%ecx, %esi
3722	movl	timedelta+4, %eax
3723	sbbl	%edx, %eax
3724	movl	%esi, timedelta
3725	movl	%eax, timedelta+4	/ timedelta -= adj;
3726	movl	%esi, hrestime_adj
3727	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3728	addl	hrestime+4, %ecx
3729
3730	movl	%ecx, %eax		/ eax = tv_nsec
37311:
3732	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3733	jb	.CL8			/ no
3734	incl	one_sec			/ yes,  one_sec++;
3735	incl	hrestime		/ hrestime.tv_sec++;
3736	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3737	jmp	1b			/ check for more seconds
3738
3739.CL8:
3740	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3741	incl	hres_lock		/ release the hres_lock
3742
3743	popl	%ebx
3744	popl	%esi
3745	leave
3746	ret
3747	SET_SIZE(hres_tick)
3748
3749#endif	/* __i386 */
3750#endif	/* __lint */
3751
3752/*
3753 * void prefetch_smap_w(void *)
3754 *
3755 * Prefetch ahead within a linear list of smap structures.
3756 * Not implemented for ia32.  Stub for compatibility.
3757 */
3758
3759#if defined(__lint)
3760
3761/*ARGSUSED*/
3762void prefetch_smap_w(void *smp)
3763{}
3764
3765#else	/* __lint */
3766
3767	ENTRY(prefetch_smap_w)
3768	rep;	ret	/* use 2 byte return instruction when branch target */
3769			/* AMD Software Optimization Guide - Section 6.2 */
3770	SET_SIZE(prefetch_smap_w)
3771
3772#endif	/* __lint */
3773
3774/*
3775 * prefetch_page_r(page_t *)
3776 * issue prefetch instructions for a page_t
3777 */
3778#if defined(__lint)
3779
3780/*ARGSUSED*/
3781void
3782prefetch_page_r(void *pp)
3783{}
3784
3785#else	/* __lint */
3786
3787	ENTRY(prefetch_page_r)
3788	rep;	ret	/* use 2 byte return instruction when branch target */
3789			/* AMD Software Optimization Guide - Section 6.2 */
3790	SET_SIZE(prefetch_page_r)
3791
3792#endif	/* __lint */
3793
3794#if defined(__lint)
3795
3796/*ARGSUSED*/
3797int
3798bcmp(const void *s1, const void *s2, size_t count)
3799{ return (0); }
3800
3801#else   /* __lint */
3802
3803#if defined(__amd64)
3804
3805	ENTRY(bcmp)
3806	pushq	%rbp
3807	movq	%rsp, %rbp
3808#ifdef DEBUG
3809	movq	postbootkernelbase(%rip), %r11
3810	cmpq	%r11, %rdi
3811	jb	0f
3812	cmpq	%r11, %rsi
3813	jnb	1f
38140:	leaq	.bcmp_panic_msg(%rip), %rdi
3815	xorl	%eax, %eax
3816	call	panic
38171:
3818#endif	/* DEBUG */
3819	call	memcmp
3820	testl	%eax, %eax
3821	setne	%dl
3822	leave
3823	movzbl	%dl, %eax
3824	ret
3825	SET_SIZE(bcmp)
3826
3827#elif defined(__i386)
3828
3829#define	ARG_S1		8
3830#define	ARG_S2		12
3831#define	ARG_LENGTH	16
3832
3833	ENTRY(bcmp)
3834	pushl	%ebp
3835	movl	%esp, %ebp	/ create new stack frame
3836#ifdef DEBUG
3837	movl    postbootkernelbase, %eax
3838	cmpl    %eax, ARG_S1(%ebp)
3839	jb	0f
3840	cmpl    %eax, ARG_S2(%ebp)
3841	jnb	1f
38420:	pushl   $.bcmp_panic_msg
3843	call    panic
38441:
3845#endif	/* DEBUG */
3846
3847	pushl	%edi		/ save register variable
3848	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3849	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3850	cmpl	%eax, %ecx	/ if the same string
3851	je	.equal		/ goto .equal
3852	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3853	cmpl	$4, %edi	/ if %edi < 4
3854	jb	.byte_check	/ goto .byte_check
3855	.align	4
3856.word_loop:
3857	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3858	leal	-4(%edi), %edi	/ %edi -= 4
3859	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3860	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3861	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3862	leal	4(%eax), %eax	/ %eax += 4 (next word)
3863	cmpl	$4, %edi	/ if %edi >= 4
3864	jae	.word_loop	/ goto .word_loop
3865.byte_check:
3866	cmpl	$0, %edi	/ if %edi == 0
3867	je	.equal		/ goto .equal
3868	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3869.word_not_equal:
3870	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3871	.align	4
3872.byte_loop:
3873	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3874	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3875	jne	.not_equal	/ if not equal, goto .not_equal
3876	incl	%ecx		/ %ecx++ (next byte)
3877	incl	%eax		/ %eax++ (next byte)
3878	decl	%edi		/ %edi--
3879	jnz	.byte_loop	/ if not zero, goto .byte_loop
3880.equal:
3881	xorl	%eax, %eax	/ %eax = 0
3882	popl	%edi		/ restore register variable
3883	leave			/ restore old stack frame
3884	ret			/ return (NULL)
3885	.align	4
3886.not_equal:
3887	movl	$1, %eax	/ return 1
3888	popl	%edi		/ restore register variable
3889	leave			/ restore old stack frame
3890	ret			/ return (NULL)
3891	SET_SIZE(bcmp)
3892
3893#endif	/* __i386 */
3894
3895#ifdef DEBUG
3896	.text
3897.bcmp_panic_msg:
3898	.string "bcmp: arguments below kernelbase"
3899#endif	/* DEBUG */
3900
3901#endif	/* __lint */
3902
3903#if defined(__lint)
3904
3905uint_t
3906bsrw_insn(uint16_t mask)
3907{
3908	uint_t index = sizeof (mask) * NBBY - 1;
3909
3910	while ((mask & (1 << index)) == 0)
3911		index--;
3912	return (index);
3913}
3914
3915#else	/* __lint */
3916
3917#if defined(__amd64)
3918
3919	ENTRY_NP(bsrw_insn)
3920	xorl	%eax, %eax
3921	bsrw	%di, %ax
3922	ret
3923	SET_SIZE(bsrw_insn)
3924
3925#elif defined(__i386)
3926
3927	ENTRY_NP(bsrw_insn)
3928	movw	4(%esp), %cx
3929	xorl	%eax, %eax
3930	bsrw	%cx, %ax
3931	ret
3932	SET_SIZE(bsrw_insn)
3933
3934#endif	/* __i386 */
3935#endif	/* __lint */
3936
3937#if defined(__lint)
3938
3939uint_t
3940atomic_btr32(uint32_t *pending, uint_t pil)
3941{
3942	return (*pending &= ~(1 << pil));
3943}
3944
3945#else	/* __lint */
3946
3947#if defined(__i386)
3948
3949	ENTRY_NP(atomic_btr32)
3950	movl	4(%esp), %ecx
3951	movl	8(%esp), %edx
3952	xorl	%eax, %eax
3953	lock
3954	btrl	%edx, (%ecx)
3955	setc	%al
3956	ret
3957	SET_SIZE(atomic_btr32)
3958
3959#endif	/* __i386 */
3960#endif	/* __lint */
3961
3962#if defined(__lint)
3963
3964/*ARGSUSED*/
3965void
3966switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
3967	    uint_t arg2)
3968{}
3969
3970#else	/* __lint */
3971
3972#if defined(__amd64)
3973
3974	ENTRY_NP(switch_sp_and_call)
3975	pushq	%rbp
3976	movq	%rsp, %rbp		/* set up stack frame */
3977	movq	%rdi, %rsp		/* switch stack pointer */
3978	movq	%rdx, %rdi		/* pass func arg 1 */
3979	movq	%rsi, %r11		/* save function to call */
3980	movq	%rcx, %rsi		/* pass func arg 2 */
3981	call	*%r11			/* call function */
3982	leave				/* restore stack */
3983	ret
3984	SET_SIZE(switch_sp_and_call)
3985
3986#elif defined(__i386)
3987
3988	ENTRY_NP(switch_sp_and_call)
3989	pushl	%ebp
3990	mov	%esp, %ebp		/* set up stack frame */
3991	movl	8(%ebp), %esp		/* switch stack pointer */
3992	pushl	20(%ebp)		/* push func arg 2 */
3993	pushl	16(%ebp)		/* push func arg 1 */
3994	call	*12(%ebp)		/* call function */
3995	addl	$8, %esp		/* pop arguments */
3996	leave				/* restore stack */
3997	ret
3998	SET_SIZE(switch_sp_and_call)
3999
4000#endif	/* __i386 */
4001#endif	/* __lint */
4002
4003#if defined(__lint)
4004
4005void
4006kmdb_enter(void)
4007{}
4008
4009#else	/* __lint */
4010
4011#if defined(__amd64)
4012
4013	ENTRY_NP(kmdb_enter)
4014	pushq	%rbp
4015	movq	%rsp, %rbp
4016
4017	/*
4018	 * Save flags, do a 'cli' then return the saved flags
4019	 */
4020	call	intr_clear
4021
4022	int	$T_DBGENTR
4023
4024	/*
4025	 * Restore the saved flags
4026	 */
4027	movq	%rax, %rdi
4028	call	intr_restore
4029
4030	leave
4031	ret
4032	SET_SIZE(kmdb_enter)
4033
4034#elif defined(__i386)
4035
4036	ENTRY_NP(kmdb_enter)
4037	pushl	%ebp
4038	movl	%esp, %ebp
4039
4040	/*
4041	 * Save flags, do a 'cli' then return the saved flags
4042	 */
4043	call	intr_clear
4044
4045	int	$T_DBGENTR
4046
4047	/*
4048	 * Restore the saved flags
4049	 */
4050	pushl	%eax
4051	call	intr_restore
4052	addl	$4, %esp
4053
4054	leave
4055	ret
4056	SET_SIZE(kmdb_enter)
4057
4058#endif	/* __i386 */
4059#endif	/* __lint */
4060
4061#if defined(__lint)
4062
4063void
4064return_instr(void)
4065{}
4066
4067#else	/* __lint */
4068
4069	ENTRY_NP(return_instr)
4070	rep;	ret	/* use 2 byte instruction when branch target */
4071			/* AMD Software Optimization Guide - Section 6.2 */
4072	SET_SIZE(return_instr)
4073
4074#endif	/* __lint */
4075
4076#if defined(__lint)
4077
4078ulong_t
4079getflags(void)
4080{
4081	return (0);
4082}
4083
4084#else	/* __lint */
4085
4086#if defined(__amd64)
4087
4088	ENTRY(getflags)
4089	pushfq
4090	popq	%rax
4091#if defined(__xpv)
4092	CURTHREAD(%rdi)
4093	KPREEMPT_DISABLE(%rdi)
4094	/*
4095	 * Synthesize the PS_IE bit from the event mask bit
4096	 */
4097	CURVCPU(%r11)
4098	andq    $_BITNOT(PS_IE), %rax
4099	XEN_TEST_UPCALL_MASK(%r11)
4100	jnz	1f
4101	orq	$PS_IE, %rax
41021:
4103	KPREEMPT_ENABLE_NOKP(%rdi)
4104#endif
4105	ret
4106	SET_SIZE(getflags)
4107
4108#elif defined(__i386)
4109
4110	ENTRY(getflags)
4111	pushfl
4112	popl	%eax
4113#if defined(__xpv)
4114	CURTHREAD(%ecx)
4115	KPREEMPT_DISABLE(%ecx)
4116	/*
4117	 * Synthesize the PS_IE bit from the event mask bit
4118	 */
4119	CURVCPU(%edx)
4120	andl    $_BITNOT(PS_IE), %eax
4121	XEN_TEST_UPCALL_MASK(%edx)
4122	jnz	1f
4123	orl	$PS_IE, %eax
41241:
4125	KPREEMPT_ENABLE_NOKP(%ecx)
4126#endif
4127	ret
4128	SET_SIZE(getflags)
4129
4130#endif	/* __i386 */
4131
4132#endif	/* __lint */
4133
4134#if defined(__lint)
4135
4136ftrace_icookie_t
4137ftrace_interrupt_disable(void)
4138{ return (0); }
4139
4140#else   /* __lint */
4141
4142#if defined(__amd64)
4143
4144	ENTRY(ftrace_interrupt_disable)
4145	pushfq
4146	popq	%rax
4147	CLI(%rdx)
4148	ret
4149	SET_SIZE(ftrace_interrupt_disable)
4150
4151#elif defined(__i386)
4152
4153	ENTRY(ftrace_interrupt_disable)
4154	pushfl
4155	popl	%eax
4156	CLI(%edx)
4157	ret
4158	SET_SIZE(ftrace_interrupt_disable)
4159
4160#endif	/* __i386 */
4161#endif	/* __lint */
4162
4163#if defined(__lint)
4164
4165/*ARGSUSED*/
4166void
4167ftrace_interrupt_enable(ftrace_icookie_t cookie)
4168{}
4169
4170#else	/* __lint */
4171
4172#if defined(__amd64)
4173
4174	ENTRY(ftrace_interrupt_enable)
4175	pushq	%rdi
4176	popfq
4177	ret
4178	SET_SIZE(ftrace_interrupt_enable)
4179
4180#elif defined(__i386)
4181
4182	ENTRY(ftrace_interrupt_enable)
4183	movl	4(%esp), %eax
4184	pushl	%eax
4185	popfl
4186	ret
4187	SET_SIZE(ftrace_interrupt_enable)
4188
4189#endif	/* __i386 */
4190#endif	/* __lint */
4191