xref: /titanic_50/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 9cd928fe5e3ea4e05f64cfb380beb54b2623e7dc)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33/*
34 * General assembly language routines.
35 * It is the intent of this file to contain routines that are
36 * independent of the specific kernel architecture, and those that are
37 * common across kernel architectures.
38 * As architectures diverge, and implementations of specific
39 * architecture-dependent routines change, the routines should be moved
40 * from this file into the respective ../`arch -k`/subr.s file.
41 */
42
43#include <sys/asm_linkage.h>
44#include <sys/asm_misc.h>
45#include <sys/panic.h>
46#include <sys/ontrap.h>
47#include <sys/regset.h>
48#include <sys/privregs.h>
49#include <sys/reboot.h>
50#include <sys/psw.h>
51#include <sys/x86_archext.h>
52
53#if defined(__lint)
54#include <sys/types.h>
55#include <sys/systm.h>
56#include <sys/thread.h>
57#include <sys/archsystm.h>
58#include <sys/byteorder.h>
59#include <sys/dtrace.h>
60#include <sys/ftrace.h>
61#else	/* __lint */
62#include "assym.h"
63#endif	/* __lint */
64#include <sys/dditypes.h>
65
66/*
67 * on_fault()
68 * Catch lofault faults. Like setjmp except it returns one
69 * if code following causes uncorrectable fault. Turned off
70 * by calling no_fault().
71 */
72
73#if defined(__lint)
74
75/* ARGSUSED */
76int
77on_fault(label_t *ljb)
78{ return (0); }
79
80void
81no_fault(void)
82{}
83
84#else	/* __lint */
85
86#if defined(__amd64)
87
88	ENTRY(on_fault)
89	movq	%gs:CPU_THREAD, %rsi
90	leaq	catch_fault(%rip), %rdx
91	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
92	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
93	jmp	setjmp				/* let setjmp do the rest */
94
95catch_fault:
96	movq	%gs:CPU_THREAD, %rsi
97	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
98	xorl	%eax, %eax
99	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
100	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
101	jmp	longjmp				/* let longjmp do the rest */
102	SET_SIZE(on_fault)
103
104	ENTRY(no_fault)
105	movq	%gs:CPU_THREAD, %rsi
106	xorl	%eax, %eax
107	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
108	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
109	ret
110	SET_SIZE(no_fault)
111
112#elif defined(__i386)
113
114	ENTRY(on_fault)
115	movl	%gs:CPU_THREAD, %edx
116	movl	4(%esp), %eax			/* jumpbuf address */
117	leal	catch_fault, %ecx
118	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
119	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
120	jmp	setjmp				/* let setjmp do the rest */
121
122catch_fault:
123	movl	%gs:CPU_THREAD, %edx
124	xorl	%eax, %eax
125	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
126	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
127	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
128	pushl	%ecx
129	call	longjmp				/* let longjmp do the rest */
130	SET_SIZE(on_fault)
131
132	ENTRY(no_fault)
133	movl	%gs:CPU_THREAD, %edx
134	xorl	%eax, %eax
135	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
136	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
137	ret
138	SET_SIZE(no_fault)
139
140#endif	/* __i386 */
141#endif	/* __lint */
142
143/*
144 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
145 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
146 */
147
148#if defined(lint)
149
150void
151on_trap_trampoline(void)
152{}
153
154#else	/* __lint */
155
156#if defined(__amd64)
157
158	ENTRY(on_trap_trampoline)
159	movq	%gs:CPU_THREAD, %rsi
160	movq	T_ONTRAP(%rsi), %rdi
161	addq	$OT_JMPBUF, %rdi
162	jmp	longjmp
163	SET_SIZE(on_trap_trampoline)
164
165#elif defined(__i386)
166
167	ENTRY(on_trap_trampoline)
168	movl	%gs:CPU_THREAD, %eax
169	movl	T_ONTRAP(%eax), %eax
170	addl	$OT_JMPBUF, %eax
171	pushl	%eax
172	call	longjmp
173	SET_SIZE(on_trap_trampoline)
174
175#endif	/* __i386 */
176#endif	/* __lint */
177
178/*
179 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
180 * more information about the on_trap() mechanism.  If the on_trap_data is the
181 * same as the topmost stack element, we just modify that element.
182 */
183#if defined(lint)
184
185/*ARGSUSED*/
186int
187on_trap(on_trap_data_t *otp, uint_t prot)
188{ return (0); }
189
190#else	/* __lint */
191
192#if defined(__amd64)
193
194	ENTRY(on_trap)
195	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
196	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
197	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
198	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
199	xorl	%ecx, %ecx
200	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
201	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
202	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
203	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
204	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
205	je	0f				/*	don't modify t_ontrap */
206
207	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
208	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
209
2100:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
211	jmp	setjmp
212	SET_SIZE(on_trap)
213
214#elif defined(__i386)
215
216	ENTRY(on_trap)
217	movl	4(%esp), %eax			/* %eax = otp */
218	movl	8(%esp), %edx			/* %edx = prot */
219
220	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
221	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
222	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
223	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
224	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
225	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
226	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
227	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
228	cmpl	%eax, %ecx			/* if (otp == %ecx) */
229	je	0f				/*    don't modify t_ontrap */
230
231	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
232	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
233
2340:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
235	movl	%eax, 4(%esp)			/* put %eax back on the stack */
236	jmp	setjmp				/* let setjmp do the rest */
237	SET_SIZE(on_trap)
238
239#endif	/* __i386 */
240#endif	/* __lint */
241
242/*
243 * Setjmp and longjmp implement non-local gotos using state vectors
244 * type label_t.
245 */
246
247#if defined(__lint)
248
249/* ARGSUSED */
250int
251setjmp(label_t *lp)
252{ return (0); }
253
254/* ARGSUSED */
255void
256longjmp(label_t *lp)
257{}
258
259#else	/* __lint */
260
261#if LABEL_PC != 0
262#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
263#endif	/* LABEL_PC != 0 */
264
265#if defined(__amd64)
266
267	ENTRY(setjmp)
268	movq	%rsp, LABEL_SP(%rdi)
269	movq	%rbp, LABEL_RBP(%rdi)
270	movq	%rbx, LABEL_RBX(%rdi)
271	movq	%r12, LABEL_R12(%rdi)
272	movq	%r13, LABEL_R13(%rdi)
273	movq	%r14, LABEL_R14(%rdi)
274	movq	%r15, LABEL_R15(%rdi)
275	movq	(%rsp), %rdx		/* return address */
276	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
277	xorl	%eax, %eax		/* return 0 */
278	ret
279	SET_SIZE(setjmp)
280
281	ENTRY(longjmp)
282	movq	LABEL_SP(%rdi), %rsp
283	movq	LABEL_RBP(%rdi), %rbp
284	movq	LABEL_RBX(%rdi), %rbx
285	movq	LABEL_R12(%rdi), %r12
286	movq	LABEL_R13(%rdi), %r13
287	movq	LABEL_R14(%rdi), %r14
288	movq	LABEL_R15(%rdi), %r15
289	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
290	movq	%rdx, (%rsp)
291	xorl	%eax, %eax
292	incl	%eax			/* return 1 */
293	ret
294	SET_SIZE(longjmp)
295
296#elif defined(__i386)
297
298	ENTRY(setjmp)
299	movl	4(%esp), %edx		/* address of save area */
300	movl	%ebp, LABEL_EBP(%edx)
301	movl	%ebx, LABEL_EBX(%edx)
302	movl	%esi, LABEL_ESI(%edx)
303	movl	%edi, LABEL_EDI(%edx)
304	movl	%esp, 4(%edx)
305	movl	(%esp), %ecx		/* %eip (return address) */
306	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
307	subl	%eax, %eax		/* return 0 */
308	ret
309	SET_SIZE(setjmp)
310
311	ENTRY(longjmp)
312	movl	4(%esp), %edx		/* address of save area */
313	movl	LABEL_EBP(%edx), %ebp
314	movl	LABEL_EBX(%edx), %ebx
315	movl	LABEL_ESI(%edx), %esi
316	movl	LABEL_EDI(%edx), %edi
317	movl	4(%edx), %esp
318	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
319	movl	$1, %eax
320	addl	$4, %esp		/* pop ret adr */
321	jmp	*%ecx			/* indirect */
322	SET_SIZE(longjmp)
323
324#endif	/* __i386 */
325#endif	/* __lint */
326
327/*
328 * if a() calls b() calls caller(),
329 * caller() returns return address in a().
330 * (Note: We assume a() and b() are C routines which do the normal entry/exit
331 *  sequence.)
332 */
333
334#if defined(__lint)
335
336caddr_t
337caller(void)
338{ return (0); }
339
340#else	/* __lint */
341
342#if defined(__amd64)
343
344	ENTRY(caller)
345	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
346	ret
347	SET_SIZE(caller)
348
349#elif defined(__i386)
350
351	ENTRY(caller)
352	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
353	ret
354	SET_SIZE(caller)
355
356#endif	/* __i386 */
357#endif	/* __lint */
358
359/*
360 * if a() calls callee(), callee() returns the
361 * return address in a();
362 */
363
364#if defined(__lint)
365
366caddr_t
367callee(void)
368{ return (0); }
369
370#else	/* __lint */
371
372#if defined(__amd64)
373
374	ENTRY(callee)
375	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
376	ret
377	SET_SIZE(callee)
378
379#elif defined(__i386)
380
381	ENTRY(callee)
382	movl	(%esp), %eax		/* callee()'s return pc, in a() */
383	ret
384	SET_SIZE(callee)
385
386#endif	/* __i386 */
387#endif	/* __lint */
388
389/*
390 * return the current frame pointer
391 */
392
393#if defined(__lint)
394
395greg_t
396getfp(void)
397{ return (0); }
398
399#else	/* __lint */
400
401#if defined(__amd64)
402
403	ENTRY(getfp)
404	movq	%rbp, %rax
405	ret
406	SET_SIZE(getfp)
407
408#elif defined(__i386)
409
410	ENTRY(getfp)
411	movl	%ebp, %eax
412	ret
413	SET_SIZE(getfp)
414
415#endif	/* __i386 */
416#endif	/* __lint */
417
418/*
419 * Invalidate a single page table entry in the TLB
420 */
421
422#if defined(__lint)
423
424/* ARGSUSED */
425void
426mmu_tlbflush_entry(caddr_t m)
427{}
428
429#else	/* __lint */
430
431#if defined(__amd64)
432
433	ENTRY(mmu_tlbflush_entry)
434	invlpg	(%rdi)
435	ret
436	SET_SIZE(mmu_tlbflush_entry)
437
438#elif defined(__i386)
439
440	ENTRY(mmu_tlbflush_entry)
441	movl	4(%esp), %eax
442	invlpg	(%eax)
443	ret
444	SET_SIZE(mmu_tlbflush_entry)
445
446#endif	/* __i386 */
447#endif	/* __lint */
448
449
450/*
451 * Get/Set the value of various control registers
452 */
453
454#if defined(__lint)
455
456ulong_t
457getcr0(void)
458{ return (0); }
459
460/* ARGSUSED */
461void
462setcr0(ulong_t value)
463{}
464
465ulong_t
466getcr2(void)
467{ return (0); }
468
469ulong_t
470getcr3(void)
471{ return (0); }
472
473#if !defined(__xpv)
474/* ARGSUSED */
475void
476setcr3(ulong_t val)
477{}
478
479void
480reload_cr3(void)
481{}
482#endif
483
484ulong_t
485getcr4(void)
486{ return (0); }
487
488/* ARGSUSED */
489void
490setcr4(ulong_t val)
491{}
492
493#if defined(__amd64)
494
495ulong_t
496getcr8(void)
497{ return (0); }
498
499/* ARGSUSED */
500void
501setcr8(ulong_t val)
502{}
503
504#endif	/* __amd64 */
505
506#else	/* __lint */
507
508#if defined(__amd64)
509
510	ENTRY(getcr0)
511	movq	%cr0, %rax
512	ret
513	SET_SIZE(getcr0)
514
515	ENTRY(setcr0)
516	movq	%rdi, %cr0
517	ret
518	SET_SIZE(setcr0)
519
520        ENTRY(getcr2)
521#if defined(__xpv)
522	movq	%gs:CPU_VCPU_INFO, %rax
523	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
524#else
525        movq    %cr2, %rax
526#endif
527        ret
528	SET_SIZE(getcr2)
529
530	ENTRY(getcr3)
531	movq    %cr3, %rax
532	ret
533	SET_SIZE(getcr3)
534
535#if !defined(__xpv)
536
537        ENTRY(setcr3)
538        movq    %rdi, %cr3
539        ret
540	SET_SIZE(setcr3)
541
542	ENTRY(reload_cr3)
543	movq	%cr3, %rdi
544	movq	%rdi, %cr3
545	ret
546	SET_SIZE(reload_cr3)
547
548#endif	/* __xpv */
549
550	ENTRY(getcr4)
551	movq	%cr4, %rax
552	ret
553	SET_SIZE(getcr4)
554
555	ENTRY(setcr4)
556	movq	%rdi, %cr4
557	ret
558	SET_SIZE(setcr4)
559
560	ENTRY(getcr8)
561	movq	%cr8, %rax
562	ret
563	SET_SIZE(getcr8)
564
565	ENTRY(setcr8)
566	movq	%rdi, %cr8
567	ret
568	SET_SIZE(setcr8)
569
570#elif defined(__i386)
571
572        ENTRY(getcr0)
573        movl    %cr0, %eax
574        ret
575	SET_SIZE(getcr0)
576
577        ENTRY(setcr0)
578        movl    4(%esp), %eax
579        movl    %eax, %cr0
580        ret
581	SET_SIZE(setcr0)
582
583	/*
584	 * "lock mov %cr0" is used on processors which indicate it is
585	 * supported via CPUID. Normally the 32 bit TPR is accessed via
586	 * the local APIC.
587	 */
588	ENTRY(getcr8)
589	lock
590	movl	%cr0, %eax
591	ret
592	SET_SIZE(getcr8)
593
594	ENTRY(setcr8)
595        movl    4(%esp), %eax
596	lock
597        movl    %eax, %cr0
598	ret
599	SET_SIZE(setcr8)
600
601        ENTRY(getcr2)
602#if defined(__xpv)
603	movl	%gs:CPU_VCPU_INFO, %eax
604	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
605#else
606        movl    %cr2, %eax
607#endif
608        ret
609	SET_SIZE(getcr2)
610
611	ENTRY(getcr3)
612	movl    %cr3, %eax
613	ret
614	SET_SIZE(getcr3)
615
616#if !defined(__xpv)
617
618        ENTRY(setcr3)
619        movl    4(%esp), %eax
620        movl    %eax, %cr3
621        ret
622	SET_SIZE(setcr3)
623
624	ENTRY(reload_cr3)
625	movl    %cr3, %eax
626	movl    %eax, %cr3
627	ret
628	SET_SIZE(reload_cr3)
629
630#endif	/* __xpv */
631
632	ENTRY(getcr4)
633	movl    %cr4, %eax
634	ret
635	SET_SIZE(getcr4)
636
637        ENTRY(setcr4)
638        movl    4(%esp), %eax
639        movl    %eax, %cr4
640        ret
641	SET_SIZE(setcr4)
642
643#endif	/* __i386 */
644#endif	/* __lint */
645
646#if defined(__lint)
647
648/*ARGSUSED*/
649uint32_t
650__cpuid_insn(struct cpuid_regs *regs)
651{ return (0); }
652
653#else	/* __lint */
654
655#if defined(__amd64)
656
657	ENTRY(__cpuid_insn)
658	movq	%rbx, %r8
659	movq	%rcx, %r9
660	movq	%rdx, %r11
661	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
662	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
663	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
664	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
665	cpuid
666	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
667	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
668	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
669	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
670	movq	%r8, %rbx
671	movq	%r9, %rcx
672	movq	%r11, %rdx
673	ret
674	SET_SIZE(__cpuid_insn)
675
676#elif defined(__i386)
677
678        ENTRY(__cpuid_insn)
679	pushl	%ebp
680	movl	0x8(%esp), %ebp		/* %ebp = regs */
681	pushl	%ebx
682	pushl	%ecx
683	pushl	%edx
684	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
685	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
686	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
687	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
688	cpuid
689	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
690	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
691	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
692	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
693	popl	%edx
694	popl	%ecx
695	popl	%ebx
696	popl	%ebp
697	ret
698	SET_SIZE(__cpuid_insn)
699
700#endif	/* __i386 */
701#endif	/* __lint */
702
703#if defined(__lint)
704
705/*ARGSUSED*/
706void
707i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
708{}
709
710#else   /* __lint */
711
712#if defined(__amd64)
713
714	ENTRY_NP(i86_monitor)
715	pushq	%rbp
716	movq	%rsp, %rbp
717	movq	%rdi, %rax		/* addr */
718	movq	%rsi, %rcx		/* extensions */
719	/* rdx contains input arg3: hints */
720	clflush	(%rax)
721	.byte	0x0f, 0x01, 0xc8	/* monitor */
722	leave
723	ret
724	SET_SIZE(i86_monitor)
725
726#elif defined(__i386)
727
728ENTRY_NP(i86_monitor)
729	pushl	%ebp
730	movl	%esp, %ebp
731	movl	0x8(%ebp),%eax		/* addr */
732	movl	0xc(%ebp),%ecx		/* extensions */
733	movl	0x10(%ebp),%edx		/* hints */
734	clflush	(%eax)
735	.byte	0x0f, 0x01, 0xc8	/* monitor */
736	leave
737	ret
738	SET_SIZE(i86_monitor)
739
740#endif	/* __i386 */
741#endif	/* __lint */
742
743#if defined(__lint)
744
745/*ARGSUSED*/
746void
747i86_mwait(uint32_t data, uint32_t extensions)
748{}
749
750#else	/* __lint */
751
752#if defined(__amd64)
753
754	ENTRY_NP(i86_mwait)
755	pushq	%rbp
756	movq	%rsp, %rbp
757	movq	%rdi, %rax		/* data */
758	movq	%rsi, %rcx		/* extensions */
759	.byte	0x0f, 0x01, 0xc9	/* mwait */
760	leave
761	ret
762	SET_SIZE(i86_mwait)
763
764#elif defined(__i386)
765
766	ENTRY_NP(i86_mwait)
767	pushl	%ebp
768	movl	%esp, %ebp
769	movl	0x8(%ebp),%eax		/* data */
770	movl	0xc(%ebp),%ecx		/* extensions */
771	.byte	0x0f, 0x01, 0xc9	/* mwait */
772	leave
773	ret
774	SET_SIZE(i86_mwait)
775
776#endif	/* __i386 */
777#endif	/* __lint */
778
779#if defined(__xpv)
780	/*
781	 * Defined in C
782	 */
783#else
784
785#if defined(__lint)
786
787hrtime_t
788tsc_read(void)
789{
790	return (0);
791}
792
793#else	/* __lint */
794
795#if defined(__amd64)
796
797	ENTRY_NP(tsc_read)
798	movq	%rbx, %r11
799	movl	$0, %eax
800	cpuid
801	rdtsc
802	movq	%r11, %rbx
803	shlq	$32, %rdx
804	orq	%rdx, %rax
805	ret
806	.globl _tsc_mfence_start
807_tsc_mfence_start:
808	mfence
809	rdtsc
810	shlq	$32, %rdx
811	orq	%rdx, %rax
812	ret
813	.globl _tsc_mfence_end
814_tsc_mfence_end:
815	.globl _tscp_start
816_tscp_start:
817	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
818	shlq	$32, %rdx
819	orq	%rdx, %rax
820	ret
821	.globl _tscp_end
822_tscp_end:
823	.globl _no_rdtsc_start
824_no_rdtsc_start:
825	xorl	%edx, %edx
826	xorl	%eax, %eax
827	ret
828	.globl _no_rdtsc_end
829_no_rdtsc_end:
830	.globl _tsc_lfence_start
831_tsc_lfence_start:
832	lfence
833	rdtsc
834	shlq	$32, %rdx
835	orq	%rdx, %rax
836	ret
837	.globl _tsc_lfence_end
838_tsc_lfence_end:
839	SET_SIZE(tsc_read)
840
841#else /* __i386 */
842
843	ENTRY_NP(tsc_read)
844	pushl	%ebx
845	movl	$0, %eax
846	cpuid
847	rdtsc
848	popl	%ebx
849	ret
850	.globl _tsc_mfence_start
851_tsc_mfence_start:
852	mfence
853	rdtsc
854	ret
855	.globl _tsc_mfence_end
856_tsc_mfence_end:
857	.globl	_tscp_start
858_tscp_start:
859	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
860	ret
861	.globl _tscp_end
862_tscp_end:
863	.globl _no_rdtsc_start
864_no_rdtsc_start:
865	xorl	%edx, %edx
866	xorl	%eax, %eax
867	ret
868	.globl _no_rdtsc_end
869_no_rdtsc_end:
870	.globl _tsc_lfence_start
871_tsc_lfence_start:
872	lfence
873	rdtsc
874	ret
875	.globl _tsc_lfence_end
876_tsc_lfence_end:
877	SET_SIZE(tsc_read)
878
879#endif	/* __i386 */
880
881#endif	/* __lint */
882
883
884#endif	/* __xpv */
885
886#ifdef __lint
887/*
888 * Do not use this function for obtaining clock tick.  This
889 * is called by callers who do not need to have a guarenteed
890 * correct tick value.  The proper routine to use is tsc_read().
891 */
892hrtime_t
893randtick(void)
894{
895	return (0);
896}
897#else
898#if defined(__amd64)
899	ENTRY_NP(randtick)
900	rdtsc
901	shlq    $32, %rdx
902	orq     %rdx, %rax
903	ret
904	SET_SIZE(randtick)
905#else
906	ENTRY_NP(randtick)
907	rdtsc
908	ret
909	SET_SIZE(randtick)
910#endif /* __i386 */
911#endif /* __lint */
912/*
913 * Insert entryp after predp in a doubly linked list.
914 */
915
916#if defined(__lint)
917
918/*ARGSUSED*/
919void
920_insque(caddr_t entryp, caddr_t predp)
921{}
922
923#else	/* __lint */
924
925#if defined(__amd64)
926
927	ENTRY(_insque)
928	movq	(%rsi), %rax		/* predp->forw			*/
929	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
930	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
931	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
932	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
933	ret
934	SET_SIZE(_insque)
935
936#elif defined(__i386)
937
938	ENTRY(_insque)
939	movl	8(%esp), %edx
940	movl	4(%esp), %ecx
941	movl	(%edx), %eax		/* predp->forw			*/
942	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
943	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
944	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
945	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
946	ret
947	SET_SIZE(_insque)
948
949#endif	/* __i386 */
950#endif	/* __lint */
951
952/*
953 * Remove entryp from a doubly linked list
954 */
955
956#if defined(__lint)
957
958/*ARGSUSED*/
959void
960_remque(caddr_t entryp)
961{}
962
963#else	/* __lint */
964
965#if defined(__amd64)
966
967	ENTRY(_remque)
968	movq	(%rdi), %rax		/* entry->forw */
969	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
970	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
971	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
972	ret
973	SET_SIZE(_remque)
974
975#elif defined(__i386)
976
977	ENTRY(_remque)
978	movl	4(%esp), %ecx
979	movl	(%ecx), %eax		/* entry->forw */
980	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
981	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
982	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
983	ret
984	SET_SIZE(_remque)
985
986#endif	/* __i386 */
987#endif	/* __lint */
988
989/*
990 * Returns the number of
991 * non-NULL bytes in string argument.
992 */
993
994#if defined(__lint)
995
996/* ARGSUSED */
997size_t
998strlen(const char *str)
999{ return (0); }
1000
1001#else	/* __lint */
1002
1003#if defined(__amd64)
1004
1005/*
1006 * This is close to a simple transliteration of a C version of this
1007 * routine.  We should either just -make- this be a C version, or
1008 * justify having it in assembler by making it significantly faster.
1009 *
1010 * size_t
1011 * strlen(const char *s)
1012 * {
1013 *	const char *s0;
1014 * #if defined(DEBUG)
1015 *	if ((uintptr_t)s < KERNELBASE)
1016 *		panic(.str_panic_msg);
1017 * #endif
1018 *	for (s0 = s; *s; s++)
1019 *		;
1020 *	return (s - s0);
1021 * }
1022 */
1023
1024	ENTRY(strlen)
1025#ifdef DEBUG
1026	movq	postbootkernelbase(%rip), %rax
1027	cmpq	%rax, %rdi
1028	jae	str_valid
1029	pushq	%rbp
1030	movq	%rsp, %rbp
1031	leaq	.str_panic_msg(%rip), %rdi
1032	xorl	%eax, %eax
1033	call	panic
1034#endif	/* DEBUG */
1035str_valid:
1036	cmpb	$0, (%rdi)
1037	movq	%rdi, %rax
1038	je	.null_found
1039	.align	4
1040.strlen_loop:
1041	incq	%rdi
1042	cmpb	$0, (%rdi)
1043	jne	.strlen_loop
1044.null_found:
1045	subq	%rax, %rdi
1046	movq	%rdi, %rax
1047	ret
1048	SET_SIZE(strlen)
1049
1050#elif defined(__i386)
1051
1052	ENTRY(strlen)
1053#ifdef DEBUG
1054	movl	postbootkernelbase, %eax
1055	cmpl	%eax, 4(%esp)
1056	jae	str_valid
1057	pushl	%ebp
1058	movl	%esp, %ebp
1059	pushl	$.str_panic_msg
1060	call	panic
1061#endif /* DEBUG */
1062
1063str_valid:
1064	movl	4(%esp), %eax		/* %eax = string address */
1065	testl	$3, %eax		/* if %eax not word aligned */
1066	jnz	.not_word_aligned	/* goto .not_word_aligned */
1067	.align	4
1068.word_aligned:
1069	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1070	movl	$0x7f7f7f7f, %ecx
1071	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1072	addl	$4, %eax		/* next word */
1073	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1074	orl	%edx, %ecx		/* %ecx |= %edx */
1075	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1076	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1077	je	.word_aligned		/* goto .word_aligned */
1078	subl	$4, %eax		/* post-incremented */
1079.not_word_aligned:
1080	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1081	je	.null_found		/* goto .null_found */
1082	incl	%eax			/* next byte */
1083	testl	$3, %eax		/* if %eax not word aligned */
1084	jnz	.not_word_aligned	/* goto .not_word_aligned */
1085	jmp	.word_aligned		/* goto .word_aligned */
1086	.align	4
1087.null_found:
1088	subl	4(%esp), %eax		/* %eax -= string address */
1089	ret
1090	SET_SIZE(strlen)
1091
1092#endif	/* __i386 */
1093
1094#ifdef DEBUG
1095	.text
1096.str_panic_msg:
1097	.string "strlen: argument below kernelbase"
1098#endif /* DEBUG */
1099
1100#endif	/* __lint */
1101
1102	/*
1103	 * Berkeley 4.3 introduced symbolically named interrupt levels
1104	 * as a way deal with priority in a machine independent fashion.
1105	 * Numbered priorities are machine specific, and should be
1106	 * discouraged where possible.
1107	 *
1108	 * Note, for the machine specific priorities there are
1109	 * examples listed for devices that use a particular priority.
1110	 * It should not be construed that all devices of that
1111	 * type should be at that priority.  It is currently were
1112	 * the current devices fit into the priority scheme based
1113	 * upon time criticalness.
1114	 *
1115	 * The underlying assumption of these assignments is that
1116	 * IPL 10 is the highest level from which a device
1117	 * routine can call wakeup.  Devices that interrupt from higher
1118	 * levels are restricted in what they can do.  If they need
1119	 * kernels services they should schedule a routine at a lower
1120	 * level (via software interrupt) to do the required
1121	 * processing.
1122	 *
1123	 * Examples of this higher usage:
1124	 *	Level	Usage
1125	 *	14	Profiling clock (and PROM uart polling clock)
1126	 *	12	Serial ports
1127	 *
1128	 * The serial ports request lower level processing on level 6.
1129	 *
1130	 * Also, almost all splN routines (where N is a number or a
1131	 * mnemonic) will do a RAISE(), on the assumption that they are
1132	 * never used to lower our priority.
1133	 * The exceptions are:
1134	 *	spl8()		Because you can't be above 15 to begin with!
1135	 *	splzs()		Because this is used at boot time to lower our
1136	 *			priority, to allow the PROM to poll the uart.
1137	 *	spl0()		Used to lower priority to 0.
1138	 */
1139
1140#if defined(__lint)
1141
1142int spl0(void)		{ return (0); }
1143int spl6(void)		{ return (0); }
1144int spl7(void)		{ return (0); }
1145int spl8(void)		{ return (0); }
1146int splhigh(void)	{ return (0); }
1147int splhi(void)		{ return (0); }
1148int splzs(void)		{ return (0); }
1149
1150/* ARGSUSED */
1151void
1152splx(int level)
1153{}
1154
1155#else	/* __lint */
1156
1157#if defined(__amd64)
1158
1159#define	SETPRI(level) \
1160	movl	$/**/level, %edi;	/* new priority */		\
1161	jmp	do_splx			/* redirect to do_splx */
1162
1163#define	RAISE(level) \
1164	movl	$/**/level, %edi;	/* new priority */		\
1165	jmp	splr			/* redirect to splr */
1166
1167#elif defined(__i386)
1168
1169#define	SETPRI(level) \
1170	pushl	$/**/level;	/* new priority */			\
1171	call	do_splx;	/* invoke common splx code */		\
1172	addl	$4, %esp;	/* unstack arg */			\
1173	ret
1174
1175#define	RAISE(level) \
1176	pushl	$/**/level;	/* new priority */			\
1177	call	splr;		/* invoke common splr code */		\
1178	addl	$4, %esp;	/* unstack args */			\
1179	ret
1180
1181#endif	/* __i386 */
1182
1183	/* locks out all interrupts, including memory errors */
1184	ENTRY(spl8)
1185	SETPRI(15)
1186	SET_SIZE(spl8)
1187
1188	/* just below the level that profiling runs */
1189	ENTRY(spl7)
1190	RAISE(13)
1191	SET_SIZE(spl7)
1192
1193	/* sun specific - highest priority onboard serial i/o asy ports */
1194	ENTRY(splzs)
1195	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1196	SET_SIZE(splzs)
1197
1198	ENTRY(splhi)
1199	ALTENTRY(splhigh)
1200	ALTENTRY(spl6)
1201	ALTENTRY(i_ddi_splhigh)
1202
1203	RAISE(DISP_LEVEL)
1204
1205	SET_SIZE(i_ddi_splhigh)
1206	SET_SIZE(spl6)
1207	SET_SIZE(splhigh)
1208	SET_SIZE(splhi)
1209
1210	/* allow all interrupts */
1211	ENTRY(spl0)
1212	SETPRI(0)
1213	SET_SIZE(spl0)
1214
1215
1216	/* splx implementation */
1217	ENTRY(splx)
1218	jmp	do_splx		/* redirect to common splx code */
1219	SET_SIZE(splx)
1220
1221#endif	/* __lint */
1222
1223#if defined(__i386)
1224
1225/*
1226 * Read and write the %gs register
1227 */
1228
1229#if defined(__lint)
1230
1231/*ARGSUSED*/
1232uint16_t
1233getgs(void)
1234{ return (0); }
1235
1236/*ARGSUSED*/
1237void
1238setgs(uint16_t sel)
1239{}
1240
1241#else	/* __lint */
1242
1243	ENTRY(getgs)
1244	clr	%eax
1245	movw	%gs, %ax
1246	ret
1247	SET_SIZE(getgs)
1248
1249	ENTRY(setgs)
1250	movw	4(%esp), %gs
1251	ret
1252	SET_SIZE(setgs)
1253
1254#endif	/* __lint */
1255#endif	/* __i386 */
1256
1257#if defined(__lint)
1258
1259void
1260pc_reset(void)
1261{}
1262
1263void
1264efi_reset(void)
1265{}
1266
1267#else	/* __lint */
1268
1269	ENTRY(wait_500ms)
1270#if defined(__amd64)
1271	pushq	%rbx
1272#elif defined(__i386)
1273	push	%ebx
1274#endif
1275	movl	$50000, %ebx
12761:
1277	call	tenmicrosec
1278	decl	%ebx
1279	jnz	1b
1280#if defined(__amd64)
1281	popq	%rbx
1282#elif defined(__i386)
1283	pop	%ebx
1284#endif
1285	ret
1286	SET_SIZE(wait_500ms)
1287
1288#define	RESET_METHOD_KBC	1
1289#define	RESET_METHOD_PORT92	2
1290#define RESET_METHOD_PCI	4
1291
1292	DGDEF3(pc_reset_methods, 4, 8)
1293	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1294
1295	ENTRY(pc_reset)
1296
1297#if defined(__i386)
1298	testl	$RESET_METHOD_KBC, pc_reset_methods
1299#elif defined(__amd64)
1300	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1301#endif
1302	jz	1f
1303
1304	/
1305	/ Try the classic keyboard controller-triggered reset.
1306	/
1307	movw	$0x64, %dx
1308	movb	$0xfe, %al
1309	outb	(%dx)
1310
1311	/ Wait up to 500 milliseconds here for the keyboard controller
1312	/ to pull the reset line.  On some systems where the keyboard
1313	/ controller is slow to pull the reset line, the next reset method
1314	/ may be executed (which may be bad if those systems hang when the
1315	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1316	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1317
1318	call	wait_500ms
1319
13201:
1321#if defined(__i386)
1322	testl	$RESET_METHOD_PORT92, pc_reset_methods
1323#elif defined(__amd64)
1324	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1325#endif
1326	jz	3f
1327
1328	/
1329	/ Try port 0x92 fast reset
1330	/
1331	movw	$0x92, %dx
1332	inb	(%dx)
1333	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1334	je	1f
1335	testb	$1, %al		/ If bit 0
1336	jz	2f		/ is clear, jump to perform the reset
1337	andb	$0xfe, %al	/ otherwise,
1338	outb	(%dx)		/ clear bit 0 first, then
13392:
1340	orb	$1, %al		/ Set bit 0
1341	outb	(%dx)		/ and reset the system
13421:
1343
1344	call	wait_500ms
1345
13463:
1347#if defined(__i386)
1348	testl	$RESET_METHOD_PCI, pc_reset_methods
1349#elif defined(__amd64)
1350	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1351#endif
1352	jz	4f
1353
1354	/ Try the PCI (soft) reset vector (should work on all modern systems,
1355	/ but has been shown to cause problems on 450NX systems, and some newer
1356	/ systems (e.g. ATI IXP400-equipped systems))
1357	/ When resetting via this method, 2 writes are required.  The first
1358	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1359	/ power cycle).
1360	/ The reset occurs on the second write, during bit 2's transition from
1361	/ 0->1.
1362	movw	$0xcf9, %dx
1363	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1364	outb	(%dx)
1365	movb	$0x6, %al
1366	outb	(%dx)
1367
1368	call	wait_500ms
1369
13704:
1371	/
1372	/ port 0xcf9 failed also.  Last-ditch effort is to
1373	/ triple-fault the CPU.
1374	/ Also, use triple fault for EFI firmware
1375	/
1376	ENTRY(efi_reset)
1377#if defined(__amd64)
1378	pushq	$0x0
1379	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1380	lidt	(%rsp)
1381#elif defined(__i386)
1382	pushl	$0x0
1383	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1384	lidt	(%esp)
1385#endif
1386	int	$0x0		/ Trigger interrupt, generate triple-fault
1387
1388	cli
1389	hlt			/ Wait forever
1390	/*NOTREACHED*/
1391	SET_SIZE(efi_reset)
1392	SET_SIZE(pc_reset)
1393
1394#endif	/* __lint */
1395
1396/*
1397 * C callable in and out routines
1398 */
1399
1400#if defined(__lint)
1401
1402/* ARGSUSED */
1403void
1404outl(int port_address, uint32_t val)
1405{}
1406
1407#else	/* __lint */
1408
1409#if defined(__amd64)
1410
1411	ENTRY(outl)
1412	movw	%di, %dx
1413	movl	%esi, %eax
1414	outl	(%dx)
1415	ret
1416	SET_SIZE(outl)
1417
1418#elif defined(__i386)
1419
1420	.set	PORT, 4
1421	.set	VAL, 8
1422
1423	ENTRY(outl)
1424	movw	PORT(%esp), %dx
1425	movl	VAL(%esp), %eax
1426	outl	(%dx)
1427	ret
1428	SET_SIZE(outl)
1429
1430#endif	/* __i386 */
1431#endif	/* __lint */
1432
1433#if defined(__lint)
1434
1435/* ARGSUSED */
1436void
1437outw(int port_address, uint16_t val)
1438{}
1439
1440#else	/* __lint */
1441
1442#if defined(__amd64)
1443
1444	ENTRY(outw)
1445	movw	%di, %dx
1446	movw	%si, %ax
1447	D16 outl (%dx)		/* XX64 why not outw? */
1448	ret
1449	SET_SIZE(outw)
1450
1451#elif defined(__i386)
1452
1453	ENTRY(outw)
1454	movw	PORT(%esp), %dx
1455	movw	VAL(%esp), %ax
1456	D16 outl (%dx)
1457	ret
1458	SET_SIZE(outw)
1459
1460#endif	/* __i386 */
1461#endif	/* __lint */
1462
1463#if defined(__lint)
1464
1465/* ARGSUSED */
1466void
1467outb(int port_address, uint8_t val)
1468{}
1469
1470#else	/* __lint */
1471
1472#if defined(__amd64)
1473
1474	ENTRY(outb)
1475	movw	%di, %dx
1476	movb	%sil, %al
1477	outb	(%dx)
1478	ret
1479	SET_SIZE(outb)
1480
1481#elif defined(__i386)
1482
1483	ENTRY(outb)
1484	movw	PORT(%esp), %dx
1485	movb	VAL(%esp), %al
1486	outb	(%dx)
1487	ret
1488	SET_SIZE(outb)
1489
1490#endif	/* __i386 */
1491#endif	/* __lint */
1492
1493#if defined(__lint)
1494
1495/* ARGSUSED */
1496uint32_t
1497inl(int port_address)
1498{ return (0); }
1499
1500#else	/* __lint */
1501
1502#if defined(__amd64)
1503
1504	ENTRY(inl)
1505	xorl	%eax, %eax
1506	movw	%di, %dx
1507	inl	(%dx)
1508	ret
1509	SET_SIZE(inl)
1510
1511#elif defined(__i386)
1512
1513	ENTRY(inl)
1514	movw	PORT(%esp), %dx
1515	inl	(%dx)
1516	ret
1517	SET_SIZE(inl)
1518
1519#endif	/* __i386 */
1520#endif	/* __lint */
1521
1522#if defined(__lint)
1523
1524/* ARGSUSED */
1525uint16_t
1526inw(int port_address)
1527{ return (0); }
1528
1529#else	/* __lint */
1530
1531#if defined(__amd64)
1532
1533	ENTRY(inw)
1534	xorl	%eax, %eax
1535	movw	%di, %dx
1536	D16 inl	(%dx)
1537	ret
1538	SET_SIZE(inw)
1539
1540#elif defined(__i386)
1541
1542	ENTRY(inw)
1543	subl	%eax, %eax
1544	movw	PORT(%esp), %dx
1545	D16 inl	(%dx)
1546	ret
1547	SET_SIZE(inw)
1548
1549#endif	/* __i386 */
1550#endif	/* __lint */
1551
1552
1553#if defined(__lint)
1554
1555/* ARGSUSED */
1556uint8_t
1557inb(int port_address)
1558{ return (0); }
1559
1560#else	/* __lint */
1561
1562#if defined(__amd64)
1563
1564	ENTRY(inb)
1565	xorl	%eax, %eax
1566	movw	%di, %dx
1567	inb	(%dx)
1568	ret
1569	SET_SIZE(inb)
1570
1571#elif defined(__i386)
1572
1573	ENTRY(inb)
1574	subl    %eax, %eax
1575	movw	PORT(%esp), %dx
1576	inb	(%dx)
1577	ret
1578	SET_SIZE(inb)
1579
1580#endif	/* __i386 */
1581#endif	/* __lint */
1582
1583
1584#if defined(__lint)
1585
1586/* ARGSUSED */
1587void
1588repoutsw(int port, uint16_t *addr, int cnt)
1589{}
1590
1591#else	/* __lint */
1592
1593#if defined(__amd64)
1594
1595	ENTRY(repoutsw)
1596	movl	%edx, %ecx
1597	movw	%di, %dx
1598	rep
1599	  D16 outsl
1600	ret
1601	SET_SIZE(repoutsw)
1602
1603#elif defined(__i386)
1604
1605	/*
1606	 * The arguments and saved registers are on the stack in the
1607	 *  following order:
1608	 *      |  cnt  |  +16
1609	 *      | *addr |  +12
1610	 *      | port  |  +8
1611	 *      |  eip  |  +4
1612	 *      |  esi  |  <-- %esp
1613	 * If additional values are pushed onto the stack, make sure
1614	 * to adjust the following constants accordingly.
1615	 */
1616	.set	PORT, 8
1617	.set	ADDR, 12
1618	.set	COUNT, 16
1619
1620	ENTRY(repoutsw)
1621	pushl	%esi
1622	movl	PORT(%esp), %edx
1623	movl	ADDR(%esp), %esi
1624	movl	COUNT(%esp), %ecx
1625	rep
1626	  D16 outsl
1627	popl	%esi
1628	ret
1629	SET_SIZE(repoutsw)
1630
1631#endif	/* __i386 */
1632#endif	/* __lint */
1633
1634
1635#if defined(__lint)
1636
1637/* ARGSUSED */
1638void
1639repinsw(int port_addr, uint16_t *addr, int cnt)
1640{}
1641
1642#else	/* __lint */
1643
1644#if defined(__amd64)
1645
1646	ENTRY(repinsw)
1647	movl	%edx, %ecx
1648	movw	%di, %dx
1649	rep
1650	  D16 insl
1651	ret
1652	SET_SIZE(repinsw)
1653
1654#elif defined(__i386)
1655
1656	ENTRY(repinsw)
1657	pushl	%edi
1658	movl	PORT(%esp), %edx
1659	movl	ADDR(%esp), %edi
1660	movl	COUNT(%esp), %ecx
1661	rep
1662	  D16 insl
1663	popl	%edi
1664	ret
1665	SET_SIZE(repinsw)
1666
1667#endif	/* __i386 */
1668#endif	/* __lint */
1669
1670
1671#if defined(__lint)
1672
1673/* ARGSUSED */
1674void
1675repinsb(int port, uint8_t *addr, int count)
1676{}
1677
1678#else	/* __lint */
1679
1680#if defined(__amd64)
1681
1682	ENTRY(repinsb)
1683	movl	%edx, %ecx
1684	movw	%di, %dx
1685	movq	%rsi, %rdi
1686	rep
1687	  insb
1688	ret
1689	SET_SIZE(repinsb)
1690
1691#elif defined(__i386)
1692
1693	/*
1694	 * The arguments and saved registers are on the stack in the
1695	 *  following order:
1696	 *      |  cnt  |  +16
1697	 *      | *addr |  +12
1698	 *      | port  |  +8
1699	 *      |  eip  |  +4
1700	 *      |  esi  |  <-- %esp
1701	 * If additional values are pushed onto the stack, make sure
1702	 * to adjust the following constants accordingly.
1703	 */
1704	.set	IO_PORT, 8
1705	.set	IO_ADDR, 12
1706	.set	IO_COUNT, 16
1707
1708	ENTRY(repinsb)
1709	pushl	%edi
1710	movl	IO_ADDR(%esp), %edi
1711	movl	IO_COUNT(%esp), %ecx
1712	movl	IO_PORT(%esp), %edx
1713	rep
1714	  insb
1715	popl	%edi
1716	ret
1717	SET_SIZE(repinsb)
1718
1719#endif	/* __i386 */
1720#endif	/* __lint */
1721
1722
1723/*
1724 * Input a stream of 32-bit words.
1725 * NOTE: count is a DWORD count.
1726 */
1727#if defined(__lint)
1728
1729/* ARGSUSED */
1730void
1731repinsd(int port, uint32_t *addr, int count)
1732{}
1733
1734#else	/* __lint */
1735
1736#if defined(__amd64)
1737
1738	ENTRY(repinsd)
1739	movl	%edx, %ecx
1740	movw	%di, %dx
1741	movq	%rsi, %rdi
1742	rep
1743	  insl
1744	ret
1745	SET_SIZE(repinsd)
1746
1747#elif defined(__i386)
1748
1749	ENTRY(repinsd)
1750	pushl	%edi
1751	movl	IO_ADDR(%esp), %edi
1752	movl	IO_COUNT(%esp), %ecx
1753	movl	IO_PORT(%esp), %edx
1754	rep
1755	  insl
1756	popl	%edi
1757	ret
1758	SET_SIZE(repinsd)
1759
1760#endif	/* __i386 */
1761#endif	/* __lint */
1762
1763/*
1764 * Output a stream of bytes
1765 * NOTE: count is a byte count
1766 */
1767#if defined(__lint)
1768
1769/* ARGSUSED */
1770void
1771repoutsb(int port, uint8_t *addr, int count)
1772{}
1773
1774#else	/* __lint */
1775
1776#if defined(__amd64)
1777
1778	ENTRY(repoutsb)
1779	movl	%edx, %ecx
1780	movw	%di, %dx
1781	rep
1782	  outsb
1783	ret
1784	SET_SIZE(repoutsb)
1785
1786#elif defined(__i386)
1787
1788	ENTRY(repoutsb)
1789	pushl	%esi
1790	movl	IO_ADDR(%esp), %esi
1791	movl	IO_COUNT(%esp), %ecx
1792	movl	IO_PORT(%esp), %edx
1793	rep
1794	  outsb
1795	popl	%esi
1796	ret
1797	SET_SIZE(repoutsb)
1798
1799#endif	/* __i386 */
1800#endif	/* __lint */
1801
1802/*
1803 * Output a stream of 32-bit words
1804 * NOTE: count is a DWORD count
1805 */
1806#if defined(__lint)
1807
1808/* ARGSUSED */
1809void
1810repoutsd(int port, uint32_t *addr, int count)
1811{}
1812
1813#else	/* __lint */
1814
1815#if defined(__amd64)
1816
1817	ENTRY(repoutsd)
1818	movl	%edx, %ecx
1819	movw	%di, %dx
1820	rep
1821	  outsl
1822	ret
1823	SET_SIZE(repoutsd)
1824
1825#elif defined(__i386)
1826
1827	ENTRY(repoutsd)
1828	pushl	%esi
1829	movl	IO_ADDR(%esp), %esi
1830	movl	IO_COUNT(%esp), %ecx
1831	movl	IO_PORT(%esp), %edx
1832	rep
1833	  outsl
1834	popl	%esi
1835	ret
1836	SET_SIZE(repoutsd)
1837
1838#endif	/* __i386 */
1839#endif	/* __lint */
1840
1841/*
1842 * void int3(void)
1843 * void int18(void)
1844 * void int20(void)
1845 * void int_cmci(void)
1846 */
1847
1848#if defined(__lint)
1849
1850void
1851int3(void)
1852{}
1853
1854void
1855int18(void)
1856{}
1857
1858void
1859int20(void)
1860{}
1861
1862void
1863int_cmci(void)
1864{}
1865
1866#else	/* __lint */
1867
1868	ENTRY(int3)
1869	int	$T_BPTFLT
1870	ret
1871	SET_SIZE(int3)
1872
1873	ENTRY(int18)
1874	int	$T_MCE
1875	ret
1876	SET_SIZE(int18)
1877
1878	ENTRY(int20)
1879	movl	boothowto, %eax
1880	andl	$RB_DEBUG, %eax
1881	jz	1f
1882
1883	int	$T_DBGENTR
18841:
1885	rep;	ret	/* use 2 byte return instruction when branch target */
1886			/* AMD Software Optimization Guide - Section 6.2 */
1887	SET_SIZE(int20)
1888
1889	ENTRY(int_cmci)
1890	int	$T_ENOEXTFLT
1891	ret
1892	SET_SIZE(int_cmci)
1893
1894#endif	/* __lint */
1895
1896#if defined(__lint)
1897
1898/* ARGSUSED */
1899int
1900scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1901{ return (0); }
1902
1903#else	/* __lint */
1904
1905#if defined(__amd64)
1906
1907	ENTRY(scanc)
1908					/* rdi == size */
1909					/* rsi == cp */
1910					/* rdx == table */
1911					/* rcx == mask */
1912	addq	%rsi, %rdi		/* end = &cp[size] */
1913.scanloop:
1914	cmpq	%rdi, %rsi		/* while (cp < end */
1915	jnb	.scandone
1916	movzbq	(%rsi), %r8		/* %r8 = *cp */
1917	incq	%rsi			/* cp++ */
1918	testb	%cl, (%r8, %rdx)
1919	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1920	decq	%rsi			/* (fix post-increment) */
1921.scandone:
1922	movl	%edi, %eax
1923	subl	%esi, %eax		/* return (end - cp) */
1924	ret
1925	SET_SIZE(scanc)
1926
1927#elif defined(__i386)
1928
1929	ENTRY(scanc)
1930	pushl	%edi
1931	pushl	%esi
1932	movb	24(%esp), %cl		/* mask = %cl */
1933	movl	16(%esp), %esi		/* cp = %esi */
1934	movl	20(%esp), %edx		/* table = %edx */
1935	movl	%esi, %edi
1936	addl	12(%esp), %edi		/* end = &cp[size]; */
1937.scanloop:
1938	cmpl	%edi, %esi		/* while (cp < end */
1939	jnb	.scandone
1940	movzbl	(%esi),  %eax		/* %al = *cp */
1941	incl	%esi			/* cp++ */
1942	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1943	testb	%al, %cl
1944	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1945	dec	%esi			/* post-incremented */
1946.scandone:
1947	movl	%edi, %eax
1948	subl	%esi, %eax		/* return (end - cp) */
1949	popl	%esi
1950	popl	%edi
1951	ret
1952	SET_SIZE(scanc)
1953
1954#endif	/* __i386 */
1955#endif	/* __lint */
1956
1957/*
1958 * Replacement functions for ones that are normally inlined.
1959 * In addition to the copy in i86.il, they are defined here just in case.
1960 */
1961
1962#if defined(__lint)
1963
1964ulong_t
1965intr_clear(void)
1966{ return (0); }
1967
1968ulong_t
1969clear_int_flag(void)
1970{ return (0); }
1971
1972#else	/* __lint */
1973
1974#if defined(__amd64)
1975
1976	ENTRY(intr_clear)
1977	ENTRY(clear_int_flag)
1978	pushfq
1979	popq	%rax
1980#if defined(__xpv)
1981	leaq	xpv_panicking, %rdi
1982	movl	(%rdi), %edi
1983	cmpl	$0, %edi
1984	jne	2f
1985	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1986	/*
1987	 * Synthesize the PS_IE bit from the event mask bit
1988	 */
1989	andq    $_BITNOT(PS_IE), %rax
1990	testb	$1, %dl
1991	jnz	1f
1992	orq	$PS_IE, %rax
19931:
1994	ret
19952:
1996#endif
1997	CLI(%rdi)
1998	ret
1999	SET_SIZE(clear_int_flag)
2000	SET_SIZE(intr_clear)
2001
2002#elif defined(__i386)
2003
2004	ENTRY(intr_clear)
2005	ENTRY(clear_int_flag)
2006	pushfl
2007	popl	%eax
2008#if defined(__xpv)
2009	leal	xpv_panicking, %edx
2010	movl	(%edx), %edx
2011	cmpl	$0, %edx
2012	jne	2f
2013	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2014	/*
2015	 * Synthesize the PS_IE bit from the event mask bit
2016	 */
2017	andl    $_BITNOT(PS_IE), %eax
2018	testb	$1, %cl
2019	jnz	1f
2020	orl	$PS_IE, %eax
20211:
2022	ret
20232:
2024#endif
2025	CLI(%edx)
2026	ret
2027	SET_SIZE(clear_int_flag)
2028	SET_SIZE(intr_clear)
2029
2030#endif	/* __i386 */
2031#endif	/* __lint */
2032
2033#if defined(__lint)
2034
2035struct cpu *
2036curcpup(void)
2037{ return 0; }
2038
2039#else	/* __lint */
2040
2041#if defined(__amd64)
2042
2043	ENTRY(curcpup)
2044	movq	%gs:CPU_SELF, %rax
2045	ret
2046	SET_SIZE(curcpup)
2047
2048#elif defined(__i386)
2049
2050	ENTRY(curcpup)
2051	movl	%gs:CPU_SELF, %eax
2052	ret
2053	SET_SIZE(curcpup)
2054
2055#endif	/* __i386 */
2056#endif	/* __lint */
2057
2058/* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs()
2059 * These functions reverse the byte order of the input parameter and returns
2060 * the result.  This is to convert the byte order from host byte order
2061 * (little endian) to network byte order (big endian), or vice versa.
2062 */
2063
2064#if defined(__lint)
2065
2066uint64_t
2067htonll(uint64_t i)
2068{ return (i); }
2069
2070uint64_t
2071ntohll(uint64_t i)
2072{ return (i); }
2073
2074uint32_t
2075htonl(uint32_t i)
2076{ return (i); }
2077
2078uint32_t
2079ntohl(uint32_t i)
2080{ return (i); }
2081
2082uint16_t
2083htons(uint16_t i)
2084{ return (i); }
2085
2086uint16_t
2087ntohs(uint16_t i)
2088{ return (i); }
2089
2090#else	/* __lint */
2091
2092#if defined(__amd64)
2093
2094	ENTRY(htonll)
2095	ALTENTRY(ntohll)
2096	movq	%rdi, %rax
2097	bswapq	%rax
2098	ret
2099	SET_SIZE(ntohll)
2100	SET_SIZE(htonll)
2101
2102	/* XX64 there must be shorter sequences for this */
2103	ENTRY(htonl)
2104	ALTENTRY(ntohl)
2105	movl	%edi, %eax
2106	bswap	%eax
2107	ret
2108	SET_SIZE(ntohl)
2109	SET_SIZE(htonl)
2110
2111	/* XX64 there must be better sequences for this */
2112	ENTRY(htons)
2113	ALTENTRY(ntohs)
2114	movl	%edi, %eax
2115	bswap	%eax
2116	shrl	$16, %eax
2117	ret
2118	SET_SIZE(ntohs)
2119	SET_SIZE(htons)
2120
2121#elif defined(__i386)
2122
2123	ENTRY(htonll)
2124	ALTENTRY(ntohll)
2125	movl	4(%esp), %edx
2126	movl	8(%esp), %eax
2127	bswap	%edx
2128	bswap	%eax
2129	ret
2130	SET_SIZE(ntohll)
2131	SET_SIZE(htonll)
2132
2133	ENTRY(htonl)
2134	ALTENTRY(ntohl)
2135	movl	4(%esp), %eax
2136	bswap	%eax
2137	ret
2138	SET_SIZE(ntohl)
2139	SET_SIZE(htonl)
2140
2141	ENTRY(htons)
2142	ALTENTRY(ntohs)
2143	movl	4(%esp), %eax
2144	bswap	%eax
2145	shrl	$16, %eax
2146	ret
2147	SET_SIZE(ntohs)
2148	SET_SIZE(htons)
2149
2150#endif	/* __i386 */
2151#endif	/* __lint */
2152
2153
2154#if defined(__lint)
2155
2156/* ARGSUSED */
2157void
2158intr_restore(ulong_t i)
2159{ return; }
2160
2161/* ARGSUSED */
2162void
2163restore_int_flag(ulong_t i)
2164{ return; }
2165
2166#else	/* __lint */
2167
2168#if defined(__amd64)
2169
2170	ENTRY(intr_restore)
2171	ENTRY(restore_int_flag)
2172	testq	$PS_IE, %rdi
2173	jz	1f
2174#if defined(__xpv)
2175	leaq	xpv_panicking, %rsi
2176	movl	(%rsi), %esi
2177	cmpl	$0, %esi
2178	jne	1f
2179	/*
2180	 * Since we're -really- running unprivileged, our attempt
2181	 * to change the state of the IF bit will be ignored.
2182	 * The virtual IF bit is tweaked by CLI and STI.
2183	 */
2184	IE_TO_EVENT_MASK(%rsi, %rdi)
2185#else
2186	sti
2187#endif
21881:
2189	ret
2190	SET_SIZE(restore_int_flag)
2191	SET_SIZE(intr_restore)
2192
2193#elif defined(__i386)
2194
2195	ENTRY(intr_restore)
2196	ENTRY(restore_int_flag)
2197	testl	$PS_IE, 4(%esp)
2198	jz	1f
2199#if defined(__xpv)
2200	leal	xpv_panicking, %edx
2201	movl	(%edx), %edx
2202	cmpl	$0, %edx
2203	jne	1f
2204	/*
2205	 * Since we're -really- running unprivileged, our attempt
2206	 * to change the state of the IF bit will be ignored.
2207	 * The virtual IF bit is tweaked by CLI and STI.
2208	 */
2209	IE_TO_EVENT_MASK(%edx, 4(%esp))
2210#else
2211	sti
2212#endif
22131:
2214	ret
2215	SET_SIZE(restore_int_flag)
2216	SET_SIZE(intr_restore)
2217
2218#endif	/* __i386 */
2219#endif	/* __lint */
2220
2221#if defined(__lint)
2222
2223void
2224sti(void)
2225{}
2226
2227void
2228cli(void)
2229{}
2230
2231#else	/* __lint */
2232
2233	ENTRY(sti)
2234	STI
2235	ret
2236	SET_SIZE(sti)
2237
2238	ENTRY(cli)
2239#if defined(__amd64)
2240	CLI(%rax)
2241#elif defined(__i386)
2242	CLI(%eax)
2243#endif	/* __i386 */
2244	ret
2245	SET_SIZE(cli)
2246
2247#endif	/* __lint */
2248
2249#if defined(__lint)
2250
2251dtrace_icookie_t
2252dtrace_interrupt_disable(void)
2253{ return (0); }
2254
2255#else   /* __lint */
2256
2257#if defined(__amd64)
2258
2259	ENTRY(dtrace_interrupt_disable)
2260	pushfq
2261	popq	%rax
2262#if defined(__xpv)
2263	leaq	xpv_panicking, %rdi
2264	movl	(%rdi), %edi
2265	cmpl	$0, %edi
2266	jne	.dtrace_interrupt_disable_done
2267	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2268	/*
2269	 * Synthesize the PS_IE bit from the event mask bit
2270	 */
2271	andq    $_BITNOT(PS_IE), %rax
2272	testb	$1, %dl
2273	jnz	.dtrace_interrupt_disable_done
2274	orq	$PS_IE, %rax
2275#else
2276	CLI(%rdx)
2277#endif
2278.dtrace_interrupt_disable_done:
2279	ret
2280	SET_SIZE(dtrace_interrupt_disable)
2281
2282#elif defined(__i386)
2283
2284	ENTRY(dtrace_interrupt_disable)
2285	pushfl
2286	popl	%eax
2287#if defined(__xpv)
2288	leal	xpv_panicking, %edx
2289	movl	(%edx), %edx
2290	cmpl	$0, %edx
2291	jne	.dtrace_interrupt_disable_done
2292	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2293	/*
2294	 * Synthesize the PS_IE bit from the event mask bit
2295	 */
2296	andl    $_BITNOT(PS_IE), %eax
2297	testb	$1, %cl
2298	jnz	.dtrace_interrupt_disable_done
2299	orl	$PS_IE, %eax
2300#else
2301	CLI(%edx)
2302#endif
2303.dtrace_interrupt_disable_done:
2304	ret
2305	SET_SIZE(dtrace_interrupt_disable)
2306
2307#endif	/* __i386 */
2308#endif	/* __lint */
2309
2310#if defined(__lint)
2311
2312/*ARGSUSED*/
2313void
2314dtrace_interrupt_enable(dtrace_icookie_t cookie)
2315{}
2316
2317#else	/* __lint */
2318
2319#if defined(__amd64)
2320
2321	ENTRY(dtrace_interrupt_enable)
2322	pushq	%rdi
2323	popfq
2324#if defined(__xpv)
2325	leaq	xpv_panicking, %rdx
2326	movl	(%rdx), %edx
2327	cmpl	$0, %edx
2328	jne	.dtrace_interrupt_enable_done
2329	/*
2330	 * Since we're -really- running unprivileged, our attempt
2331	 * to change the state of the IF bit will be ignored. The
2332	 * virtual IF bit is tweaked by CLI and STI.
2333	 */
2334	IE_TO_EVENT_MASK(%rdx, %rdi)
2335#endif
2336.dtrace_interrupt_enable_done:
2337	ret
2338	SET_SIZE(dtrace_interrupt_enable)
2339
2340#elif defined(__i386)
2341
2342	ENTRY(dtrace_interrupt_enable)
2343	movl	4(%esp), %eax
2344	pushl	%eax
2345	popfl
2346#if defined(__xpv)
2347	leal	xpv_panicking, %edx
2348	movl	(%edx), %edx
2349	cmpl	$0, %edx
2350	jne	.dtrace_interrupt_enable_done
2351	/*
2352	 * Since we're -really- running unprivileged, our attempt
2353	 * to change the state of the IF bit will be ignored. The
2354	 * virtual IF bit is tweaked by CLI and STI.
2355	 */
2356	IE_TO_EVENT_MASK(%edx, %eax)
2357#endif
2358.dtrace_interrupt_enable_done:
2359	ret
2360	SET_SIZE(dtrace_interrupt_enable)
2361
2362#endif	/* __i386 */
2363#endif	/* __lint */
2364
2365
2366#if defined(lint)
2367
2368void
2369dtrace_membar_producer(void)
2370{}
2371
2372void
2373dtrace_membar_consumer(void)
2374{}
2375
2376#else	/* __lint */
2377
2378	ENTRY(dtrace_membar_producer)
2379	rep;	ret	/* use 2 byte return instruction when branch target */
2380			/* AMD Software Optimization Guide - Section 6.2 */
2381	SET_SIZE(dtrace_membar_producer)
2382
2383	ENTRY(dtrace_membar_consumer)
2384	rep;	ret	/* use 2 byte return instruction when branch target */
2385			/* AMD Software Optimization Guide - Section 6.2 */
2386	SET_SIZE(dtrace_membar_consumer)
2387
2388#endif	/* __lint */
2389
2390#if defined(__lint)
2391
2392kthread_id_t
2393threadp(void)
2394{ return ((kthread_id_t)0); }
2395
2396#else	/* __lint */
2397
2398#if defined(__amd64)
2399
2400	ENTRY(threadp)
2401	movq	%gs:CPU_THREAD, %rax
2402	ret
2403	SET_SIZE(threadp)
2404
2405#elif defined(__i386)
2406
2407	ENTRY(threadp)
2408	movl	%gs:CPU_THREAD, %eax
2409	ret
2410	SET_SIZE(threadp)
2411
2412#endif	/* __i386 */
2413#endif	/* __lint */
2414
2415/*
2416 *   Checksum routine for Internet Protocol Headers
2417 */
2418
2419#if defined(__lint)
2420
2421/* ARGSUSED */
2422unsigned int
2423ip_ocsum(
2424	ushort_t *address,	/* ptr to 1st message buffer */
2425	int halfword_count,	/* length of data */
2426	unsigned int sum)	/* partial checksum */
2427{
2428	int		i;
2429	unsigned int	psum = 0;	/* partial sum */
2430
2431	for (i = 0; i < halfword_count; i++, address++) {
2432		psum += *address;
2433	}
2434
2435	while ((psum >> 16) != 0) {
2436		psum = (psum & 0xffff) + (psum >> 16);
2437	}
2438
2439	psum += sum;
2440
2441	while ((psum >> 16) != 0) {
2442		psum = (psum & 0xffff) + (psum >> 16);
2443	}
2444
2445	return (psum);
2446}
2447
2448#else	/* __lint */
2449
2450#if defined(__amd64)
2451
2452	ENTRY(ip_ocsum)
2453	pushq	%rbp
2454	movq	%rsp, %rbp
2455#ifdef DEBUG
2456	movq	postbootkernelbase(%rip), %rax
2457	cmpq	%rax, %rdi
2458	jnb	1f
2459	xorl	%eax, %eax
2460	movq	%rdi, %rsi
2461	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2462	call	panic
2463	/*NOTREACHED*/
2464.ip_ocsum_panic_msg:
2465	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24661:
2467#endif
2468	movl	%esi, %ecx	/* halfword_count */
2469	movq	%rdi, %rsi	/* address */
2470				/* partial sum in %edx */
2471	xorl	%eax, %eax
2472	testl	%ecx, %ecx
2473	jz	.ip_ocsum_done
2474	testq	$3, %rsi
2475	jnz	.ip_csum_notaligned
2476.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2477.next_iter:
2478	/* XX64 opportunities for prefetch? */
2479	/* XX64 compute csum with 64 bit quantities? */
2480	subl	$32, %ecx
2481	jl	.less_than_32
2482
2483	addl	0(%rsi), %edx
2484.only60:
2485	adcl	4(%rsi), %eax
2486.only56:
2487	adcl	8(%rsi), %edx
2488.only52:
2489	adcl	12(%rsi), %eax
2490.only48:
2491	adcl	16(%rsi), %edx
2492.only44:
2493	adcl	20(%rsi), %eax
2494.only40:
2495	adcl	24(%rsi), %edx
2496.only36:
2497	adcl	28(%rsi), %eax
2498.only32:
2499	adcl	32(%rsi), %edx
2500.only28:
2501	adcl	36(%rsi), %eax
2502.only24:
2503	adcl	40(%rsi), %edx
2504.only20:
2505	adcl	44(%rsi), %eax
2506.only16:
2507	adcl	48(%rsi), %edx
2508.only12:
2509	adcl	52(%rsi), %eax
2510.only8:
2511	adcl	56(%rsi), %edx
2512.only4:
2513	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2514.only0:
2515	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2516	adcl	$0, %eax
2517
2518	addq	$64, %rsi
2519	testl	%ecx, %ecx
2520	jnz	.next_iter
2521
2522.ip_ocsum_done:
2523	addl	%eax, %edx
2524	adcl	$0, %edx
2525	movl	%edx, %eax	/* form a 16 bit checksum by */
2526	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2527	addw	%dx, %ax
2528	adcw	$0, %ax
2529	andl	$0xffff, %eax
2530	leave
2531	ret
2532
2533.ip_csum_notaligned:
2534	xorl	%edi, %edi
2535	movw	(%rsi), %di
2536	addl	%edi, %edx
2537	adcl	$0, %edx
2538	addq	$2, %rsi
2539	decl	%ecx
2540	jmp	.ip_csum_aligned
2541
2542.less_than_32:
2543	addl	$32, %ecx
2544	testl	$1, %ecx
2545	jz	.size_aligned
2546	andl	$0xfe, %ecx
2547	movzwl	(%rsi, %rcx, 2), %edi
2548	addl	%edi, %edx
2549	adcl	$0, %edx
2550.size_aligned:
2551	movl	%ecx, %edi
2552	shrl	$1, %ecx
2553	shl	$1, %edi
2554	subq	$64, %rdi
2555	addq	%rdi, %rsi
2556	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2557	leaq	(%rdi, %rcx, 8), %rdi
2558	xorl	%ecx, %ecx
2559	clc
2560	jmp 	*(%rdi)
2561
2562	.align	8
2563.ip_ocsum_jmptbl:
2564	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2565	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2566	.quad	.only48, .only52, .only56, .only60
2567	SET_SIZE(ip_ocsum)
2568
2569#elif defined(__i386)
2570
2571	ENTRY(ip_ocsum)
2572	pushl	%ebp
2573	movl	%esp, %ebp
2574	pushl	%ebx
2575	pushl	%esi
2576	pushl	%edi
2577	movl	12(%ebp), %ecx	/* count of half words */
2578	movl	16(%ebp), %edx	/* partial checksum */
2579	movl	8(%ebp), %esi
2580	xorl	%eax, %eax
2581	testl	%ecx, %ecx
2582	jz	.ip_ocsum_done
2583
2584	testl	$3, %esi
2585	jnz	.ip_csum_notaligned
2586.ip_csum_aligned:
2587.next_iter:
2588	subl	$32, %ecx
2589	jl	.less_than_32
2590
2591	addl	0(%esi), %edx
2592.only60:
2593	adcl	4(%esi), %eax
2594.only56:
2595	adcl	8(%esi), %edx
2596.only52:
2597	adcl	12(%esi), %eax
2598.only48:
2599	adcl	16(%esi), %edx
2600.only44:
2601	adcl	20(%esi), %eax
2602.only40:
2603	adcl	24(%esi), %edx
2604.only36:
2605	adcl	28(%esi), %eax
2606.only32:
2607	adcl	32(%esi), %edx
2608.only28:
2609	adcl	36(%esi), %eax
2610.only24:
2611	adcl	40(%esi), %edx
2612.only20:
2613	adcl	44(%esi), %eax
2614.only16:
2615	adcl	48(%esi), %edx
2616.only12:
2617	adcl	52(%esi), %eax
2618.only8:
2619	adcl	56(%esi), %edx
2620.only4:
2621	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2622.only0:
2623	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2624	adcl	$0, %eax
2625
2626	addl	$64, %esi
2627	andl	%ecx, %ecx
2628	jnz	.next_iter
2629
2630.ip_ocsum_done:
2631	addl	%eax, %edx
2632	adcl	$0, %edx
2633	movl	%edx, %eax	/* form a 16 bit checksum by */
2634	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2635	addw	%dx, %ax
2636	adcw	$0, %ax
2637	andl	$0xffff, %eax
2638	popl	%edi		/* restore registers */
2639	popl	%esi
2640	popl	%ebx
2641	leave
2642	ret
2643
2644.ip_csum_notaligned:
2645	xorl	%edi, %edi
2646	movw	(%esi), %di
2647	addl	%edi, %edx
2648	adcl	$0, %edx
2649	addl	$2, %esi
2650	decl	%ecx
2651	jmp	.ip_csum_aligned
2652
2653.less_than_32:
2654	addl	$32, %ecx
2655	testl	$1, %ecx
2656	jz	.size_aligned
2657	andl	$0xfe, %ecx
2658	movzwl	(%esi, %ecx, 2), %edi
2659	addl	%edi, %edx
2660	adcl	$0, %edx
2661.size_aligned:
2662	movl	%ecx, %edi
2663	shrl	$1, %ecx
2664	shl	$1, %edi
2665	subl	$64, %edi
2666	addl	%edi, %esi
2667	movl	$.ip_ocsum_jmptbl, %edi
2668	lea	(%edi, %ecx, 4), %edi
2669	xorl	%ecx, %ecx
2670	clc
2671	jmp 	*(%edi)
2672	SET_SIZE(ip_ocsum)
2673
2674	.data
2675	.align	4
2676
2677.ip_ocsum_jmptbl:
2678	.long	.only0, .only4, .only8, .only12, .only16, .only20
2679	.long	.only24, .only28, .only32, .only36, .only40, .only44
2680	.long	.only48, .only52, .only56, .only60
2681
2682
2683#endif	/* __i386 */
2684#endif	/* __lint */
2685
2686/*
2687 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2688 * Provided to manipulate hrtime_t values.
2689 */
2690#if defined(__lint)
2691
2692/* result = a * b; */
2693
2694/* ARGSUSED */
2695unsigned long long
2696mul32(uint_t a, uint_t b)
2697{ return (0); }
2698
2699#else	/* __lint */
2700
2701#if defined(__amd64)
2702
2703	ENTRY(mul32)
2704	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2705	movl	%edi, %eax
2706	mull	%esi
2707	shlq	$32, %rdx
2708	orq	%rdx, %rax
2709	ret
2710	SET_SIZE(mul32)
2711
2712#elif defined(__i386)
2713
2714	ENTRY(mul32)
2715	movl	8(%esp), %eax
2716	movl	4(%esp), %ecx
2717	mull	%ecx
2718	ret
2719	SET_SIZE(mul32)
2720
2721#endif	/* __i386 */
2722#endif	/* __lint */
2723
2724#if defined(notused)
2725#if defined(__lint)
2726/* ARGSUSED */
2727void
2728load_pte64(uint64_t *pte, uint64_t pte_value)
2729{}
2730#else	/* __lint */
2731	.globl load_pte64
2732load_pte64:
2733	movl	4(%esp), %eax
2734	movl	8(%esp), %ecx
2735	movl	12(%esp), %edx
2736	movl	%edx, 4(%eax)
2737	movl	%ecx, (%eax)
2738	ret
2739#endif	/* __lint */
2740#endif	/* notused */
2741
2742#if defined(__lint)
2743
2744/*ARGSUSED*/
2745void
2746scan_memory(caddr_t addr, size_t size)
2747{}
2748
2749#else	/* __lint */
2750
2751#if defined(__amd64)
2752
2753	ENTRY(scan_memory)
2754	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2755	jz	.scanm_done
2756	movq	%rsi, %rcx	/* move count into rep control register */
2757	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2758	rep lodsq		/* scan the memory range */
2759.scanm_done:
2760	rep;	ret	/* use 2 byte return instruction when branch target */
2761			/* AMD Software Optimization Guide - Section 6.2 */
2762	SET_SIZE(scan_memory)
2763
2764#elif defined(__i386)
2765
2766	ENTRY(scan_memory)
2767	pushl	%ecx
2768	pushl	%esi
2769	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2770	shrl	$2, %ecx	/* convert from byte count to word count */
2771	jz	.scanm_done
2772	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2773	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2774	lodsl
2775.scanm_done:
2776	popl	%esi
2777	popl	%ecx
2778	ret
2779	SET_SIZE(scan_memory)
2780
2781#endif	/* __i386 */
2782#endif	/* __lint */
2783
2784
2785#if defined(__lint)
2786
2787/*ARGSUSED */
2788int
2789lowbit(ulong_t i)
2790{ return (0); }
2791
2792#else	/* __lint */
2793
2794#if defined(__amd64)
2795
2796	ENTRY(lowbit)
2797	movl	$-1, %eax
2798	bsfq	%rdi, %rax
2799	incl	%eax
2800	ret
2801	SET_SIZE(lowbit)
2802
2803#elif defined(__i386)
2804
2805	ENTRY(lowbit)
2806	movl	$-1, %eax
2807	bsfl	4(%esp), %eax
2808	incl	%eax
2809	ret
2810	SET_SIZE(lowbit)
2811
2812#endif	/* __i386 */
2813#endif	/* __lint */
2814
2815#if defined(__lint)
2816
2817/*ARGSUSED*/
2818int
2819highbit(ulong_t i)
2820{ return (0); }
2821
2822#else	/* __lint */
2823
2824#if defined(__amd64)
2825
2826	ENTRY(highbit)
2827	movl	$-1, %eax
2828	bsrq	%rdi, %rax
2829	incl	%eax
2830	ret
2831	SET_SIZE(highbit)
2832
2833#elif defined(__i386)
2834
2835	ENTRY(highbit)
2836	movl	$-1, %eax
2837	bsrl	4(%esp), %eax
2838	incl	%eax
2839	ret
2840	SET_SIZE(highbit)
2841
2842#endif	/* __i386 */
2843#endif	/* __lint */
2844
2845#if defined(__lint)
2846
2847/*ARGSUSED*/
2848uint64_t
2849rdmsr(uint_t r)
2850{ return (0); }
2851
2852/*ARGSUSED*/
2853void
2854wrmsr(uint_t r, const uint64_t val)
2855{}
2856
2857/*ARGSUSED*/
2858uint64_t
2859xrdmsr(uint_t r)
2860{ return (0); }
2861
2862/*ARGSUSED*/
2863void
2864xwrmsr(uint_t r, const uint64_t val)
2865{}
2866
2867void
2868invalidate_cache(void)
2869{}
2870
2871#else  /* __lint */
2872
2873#define	XMSR_ACCESS_VAL		$0x9c5a203a
2874
2875#if defined(__amd64)
2876
2877	ENTRY(rdmsr)
2878	movl	%edi, %ecx
2879	rdmsr
2880	shlq	$32, %rdx
2881	orq	%rdx, %rax
2882	ret
2883	SET_SIZE(rdmsr)
2884
2885	ENTRY(wrmsr)
2886	movq	%rsi, %rdx
2887	shrq	$32, %rdx
2888	movl	%esi, %eax
2889	movl	%edi, %ecx
2890	wrmsr
2891	ret
2892	SET_SIZE(wrmsr)
2893
2894	ENTRY(xrdmsr)
2895	pushq	%rbp
2896	movq	%rsp, %rbp
2897	movl	%edi, %ecx
2898	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2899	rdmsr
2900	shlq	$32, %rdx
2901	orq	%rdx, %rax
2902	leave
2903	ret
2904	SET_SIZE(xrdmsr)
2905
2906	ENTRY(xwrmsr)
2907	pushq	%rbp
2908	movq	%rsp, %rbp
2909	movl	%edi, %ecx
2910	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2911	movq	%rsi, %rdx
2912	shrq	$32, %rdx
2913	movl	%esi, %eax
2914	wrmsr
2915	leave
2916	ret
2917	SET_SIZE(xwrmsr)
2918
2919#elif defined(__i386)
2920
2921	ENTRY(rdmsr)
2922	movl	4(%esp), %ecx
2923	rdmsr
2924	ret
2925	SET_SIZE(rdmsr)
2926
2927	ENTRY(wrmsr)
2928	movl	4(%esp), %ecx
2929	movl	8(%esp), %eax
2930	movl	12(%esp), %edx
2931	wrmsr
2932	ret
2933	SET_SIZE(wrmsr)
2934
2935	ENTRY(xrdmsr)
2936	pushl	%ebp
2937	movl	%esp, %ebp
2938	movl	8(%esp), %ecx
2939	pushl	%edi
2940	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2941	rdmsr
2942	popl	%edi
2943	leave
2944	ret
2945	SET_SIZE(xrdmsr)
2946
2947	ENTRY(xwrmsr)
2948	pushl	%ebp
2949	movl	%esp, %ebp
2950	movl	8(%esp), %ecx
2951	movl	12(%esp), %eax
2952	movl	16(%esp), %edx
2953	pushl	%edi
2954	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2955	wrmsr
2956	popl	%edi
2957	leave
2958	ret
2959	SET_SIZE(xwrmsr)
2960
2961#endif	/* __i386 */
2962
2963	ENTRY(invalidate_cache)
2964	wbinvd
2965	ret
2966	SET_SIZE(invalidate_cache)
2967
2968#endif	/* __lint */
2969
2970#if defined(__lint)
2971
2972/*ARGSUSED*/
2973void
2974getcregs(struct cregs *crp)
2975{}
2976
2977#else	/* __lint */
2978
2979#if defined(__amd64)
2980
2981	ENTRY_NP(getcregs)
2982#if defined(__xpv)
2983	/*
2984	 * Only a few of the hardware control registers or descriptor tables
2985	 * are directly accessible to us, so just zero the structure.
2986	 *
2987	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2988	 *	virtualized versions of these for post-mortem use.
2989	 *	(Need to reevaluate - perhaps it already does!)
2990	 */
2991	pushq	%rdi		/* save *crp */
2992	movq	$CREGSZ, %rsi
2993	call	bzero
2994	popq	%rdi
2995
2996	/*
2997	 * Dump what limited information we can
2998	 */
2999	movq	%cr0, %rax
3000	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
3001	movq	%cr2, %rax
3002	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
3003	movq	%cr3, %rax
3004	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
3005	movq	%cr4, %rax
3006	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
3007
3008#else	/* __xpv */
3009
3010#define	GETMSR(r, off, d)	\
3011	movl	$r, %ecx;	\
3012	rdmsr;			\
3013	movl	%eax, off(d);	\
3014	movl	%edx, off+4(d)
3015
3016	xorl	%eax, %eax
3017	movq	%rax, CREG_GDT+8(%rdi)
3018	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
3019	movq	%rax, CREG_IDT+8(%rdi)
3020	sidt	CREG_IDT(%rdi)		/* 10 bytes */
3021	movq	%rax, CREG_LDT(%rdi)
3022	sldt	CREG_LDT(%rdi)		/* 2 bytes */
3023	movq	%rax, CREG_TASKR(%rdi)
3024	str	CREG_TASKR(%rdi)	/* 2 bytes */
3025	movq	%cr0, %rax
3026	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
3027	movq	%cr2, %rax
3028	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
3029	movq	%cr3, %rax
3030	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
3031	movq	%cr4, %rax
3032	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
3033	movq	%cr8, %rax
3034	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
3035	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
3036	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
3037#endif	/* __xpv */
3038	ret
3039	SET_SIZE(getcregs)
3040
3041#undef GETMSR
3042
3043#elif defined(__i386)
3044
3045	ENTRY_NP(getcregs)
3046#if defined(__xpv)
3047	/*
3048	 * Only a few of the hardware control registers or descriptor tables
3049	 * are directly accessible to us, so just zero the structure.
3050	 *
3051	 * XXPV	Perhaps it would be helpful for the hypervisor to return
3052	 *	virtualized versions of these for post-mortem use.
3053	 *	(Need to reevaluate - perhaps it already does!)
3054	 */
3055	movl	4(%esp), %edx
3056	pushl	$CREGSZ
3057	pushl	%edx
3058	call	bzero
3059	addl	$8, %esp
3060	movl	4(%esp), %edx
3061
3062	/*
3063	 * Dump what limited information we can
3064	 */
3065	movl	%cr0, %eax
3066	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3067	movl	%cr2, %eax
3068	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3069	movl	%cr3, %eax
3070	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3071	movl	%cr4, %eax
3072	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3073
3074#else	/* __xpv */
3075
3076	movl	4(%esp), %edx
3077	movw	$0, CREG_GDT+6(%edx)
3078	movw	$0, CREG_IDT+6(%edx)
3079	sgdt	CREG_GDT(%edx)		/* gdt */
3080	sidt	CREG_IDT(%edx)		/* idt */
3081	sldt	CREG_LDT(%edx)		/* ldt */
3082	str	CREG_TASKR(%edx)	/* task */
3083	movl	%cr0, %eax
3084	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3085	movl	%cr2, %eax
3086	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3087	movl	%cr3, %eax
3088	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3089	testl	$X86_LARGEPAGE, x86_feature
3090	jz	.nocr4
3091	movl	%cr4, %eax
3092	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3093	jmp	.skip
3094.nocr4:
3095	movl	$0, CREG_CR4(%edx)
3096.skip:
3097#endif
3098	ret
3099	SET_SIZE(getcregs)
3100
3101#endif	/* __i386 */
3102#endif	/* __lint */
3103
3104
3105/*
3106 * A panic trigger is a word which is updated atomically and can only be set
3107 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3108 * previous value was 0, we succeed and return 1; otherwise return 0.
3109 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3110 * has its own version of this function to allow it to panic correctly from
3111 * probe context.
3112 */
3113#if defined(__lint)
3114
3115/*ARGSUSED*/
3116int
3117panic_trigger(int *tp)
3118{ return (0); }
3119
3120/*ARGSUSED*/
3121int
3122dtrace_panic_trigger(int *tp)
3123{ return (0); }
3124
3125#else	/* __lint */
3126
3127#if defined(__amd64)
3128
3129	ENTRY_NP(panic_trigger)
3130	xorl	%eax, %eax
3131	movl	$0xdefacedd, %edx
3132	lock
3133	  xchgl	%edx, (%rdi)
3134	cmpl	$0, %edx
3135	je	0f
3136	movl	$0, %eax
3137	ret
31380:	movl	$1, %eax
3139	ret
3140	SET_SIZE(panic_trigger)
3141
3142	ENTRY_NP(dtrace_panic_trigger)
3143	xorl	%eax, %eax
3144	movl	$0xdefacedd, %edx
3145	lock
3146	  xchgl	%edx, (%rdi)
3147	cmpl	$0, %edx
3148	je	0f
3149	movl	$0, %eax
3150	ret
31510:	movl	$1, %eax
3152	ret
3153	SET_SIZE(dtrace_panic_trigger)
3154
3155#elif defined(__i386)
3156
3157	ENTRY_NP(panic_trigger)
3158	movl	4(%esp), %edx		/ %edx = address of trigger
3159	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3160	lock				/ assert lock
3161	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3162	cmpl	$0, %eax		/ if (%eax == 0x0)
3163	je	0f			/   return (1);
3164	movl	$0, %eax		/ else
3165	ret				/   return (0);
31660:	movl	$1, %eax
3167	ret
3168	SET_SIZE(panic_trigger)
3169
3170	ENTRY_NP(dtrace_panic_trigger)
3171	movl	4(%esp), %edx		/ %edx = address of trigger
3172	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3173	lock				/ assert lock
3174	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3175	cmpl	$0, %eax		/ if (%eax == 0x0)
3176	je	0f			/   return (1);
3177	movl	$0, %eax		/ else
3178	ret				/   return (0);
31790:	movl	$1, %eax
3180	ret
3181	SET_SIZE(dtrace_panic_trigger)
3182
3183#endif	/* __i386 */
3184#endif	/* __lint */
3185
3186/*
3187 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3188 * into the panic code implemented in panicsys().  vpanic() is responsible
3189 * for passing through the format string and arguments, and constructing a
3190 * regs structure on the stack into which it saves the current register
3191 * values.  If we are not dying due to a fatal trap, these registers will
3192 * then be preserved in panicbuf as the current processor state.  Before
3193 * invoking panicsys(), vpanic() activates the first panic trigger (see
3194 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3195 * DTrace takes a slightly different panic path if it must panic from probe
3196 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3197 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3198 * branches back into vpanic().
3199 */
3200#if defined(__lint)
3201
3202/*ARGSUSED*/
3203void
3204vpanic(const char *format, va_list alist)
3205{}
3206
3207/*ARGSUSED*/
3208void
3209dtrace_vpanic(const char *format, va_list alist)
3210{}
3211
3212#else	/* __lint */
3213
3214#if defined(__amd64)
3215
3216	ENTRY_NP(vpanic)			/* Initial stack layout: */
3217
3218	pushq	%rbp				/* | %rip | 	0x60	*/
3219	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3220	pushfq					/* | rfl  |	0x50	*/
3221	pushq	%r11				/* | %r11 |	0x48	*/
3222	pushq	%r10				/* | %r10 |	0x40	*/
3223	pushq	%rbx				/* | %rbx |	0x38	*/
3224	pushq	%rax				/* | %rax |	0x30	*/
3225	pushq	%r9				/* | %r9  |	0x28	*/
3226	pushq	%r8				/* | %r8  |	0x20	*/
3227	pushq	%rcx				/* | %rcx |	0x18	*/
3228	pushq	%rdx				/* | %rdx |	0x10	*/
3229	pushq	%rsi				/* | %rsi |	0x8 alist */
3230	pushq	%rdi				/* | %rdi |	0x0 format */
3231
3232	movq	%rsp, %rbx			/* %rbx = current %rsp */
3233
3234	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3235	call	panic_trigger			/* %eax = panic_trigger() */
3236
3237vpanic_common:
3238	/*
3239	 * The panic_trigger result is in %eax from the call above, and
3240	 * dtrace_panic places it in %eax before branching here.
3241	 * The rdmsr instructions that follow below will clobber %eax so
3242	 * we stash the panic_trigger result in %r11d.
3243	 */
3244	movl	%eax, %r11d
3245	cmpl	$0, %r11d
3246	je	0f
3247
3248	/*
3249	 * If panic_trigger() was successful, we are the first to initiate a
3250	 * panic: we now switch to the reserved panic_stack before continuing.
3251	 */
3252	leaq	panic_stack(%rip), %rsp
3253	addq	$PANICSTKSIZE, %rsp
32540:	subq	$REGSIZE, %rsp
3255	/*
3256	 * Now that we've got everything set up, store the register values as
3257	 * they were when we entered vpanic() to the designated location in
3258	 * the regs structure we allocated on the stack.
3259	 */
3260	movq	0x0(%rbx), %rcx
3261	movq	%rcx, REGOFF_RDI(%rsp)
3262	movq	0x8(%rbx), %rcx
3263	movq	%rcx, REGOFF_RSI(%rsp)
3264	movq	0x10(%rbx), %rcx
3265	movq	%rcx, REGOFF_RDX(%rsp)
3266	movq	0x18(%rbx), %rcx
3267	movq	%rcx, REGOFF_RCX(%rsp)
3268	movq	0x20(%rbx), %rcx
3269
3270	movq	%rcx, REGOFF_R8(%rsp)
3271	movq	0x28(%rbx), %rcx
3272	movq	%rcx, REGOFF_R9(%rsp)
3273	movq	0x30(%rbx), %rcx
3274	movq	%rcx, REGOFF_RAX(%rsp)
3275	movq	0x38(%rbx), %rcx
3276	movq	%rcx, REGOFF_RBX(%rsp)
3277	movq	0x58(%rbx), %rcx
3278
3279	movq	%rcx, REGOFF_RBP(%rsp)
3280	movq	0x40(%rbx), %rcx
3281	movq	%rcx, REGOFF_R10(%rsp)
3282	movq	0x48(%rbx), %rcx
3283	movq	%rcx, REGOFF_R11(%rsp)
3284	movq	%r12, REGOFF_R12(%rsp)
3285
3286	movq	%r13, REGOFF_R13(%rsp)
3287	movq	%r14, REGOFF_R14(%rsp)
3288	movq	%r15, REGOFF_R15(%rsp)
3289
3290	xorl	%ecx, %ecx
3291	movw	%ds, %cx
3292	movq	%rcx, REGOFF_DS(%rsp)
3293	movw	%es, %cx
3294	movq	%rcx, REGOFF_ES(%rsp)
3295	movw	%fs, %cx
3296	movq	%rcx, REGOFF_FS(%rsp)
3297	movw	%gs, %cx
3298	movq	%rcx, REGOFF_GS(%rsp)
3299
3300	movq	$0, REGOFF_TRAPNO(%rsp)
3301
3302	movq	$0, REGOFF_ERR(%rsp)
3303	leaq	vpanic(%rip), %rcx
3304	movq	%rcx, REGOFF_RIP(%rsp)
3305	movw	%cs, %cx
3306	movzwq	%cx, %rcx
3307	movq	%rcx, REGOFF_CS(%rsp)
3308	movq	0x50(%rbx), %rcx
3309	movq	%rcx, REGOFF_RFL(%rsp)
3310	movq	%rbx, %rcx
3311	addq	$0x60, %rcx
3312	movq	%rcx, REGOFF_RSP(%rsp)
3313	movw	%ss, %cx
3314	movzwq	%cx, %rcx
3315	movq	%rcx, REGOFF_SS(%rsp)
3316
3317	/*
3318	 * panicsys(format, alist, rp, on_panic_stack)
3319	 */
3320	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3321	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3322	movq	%rsp, %rdx			/* struct regs */
3323	movl	%r11d, %ecx			/* on_panic_stack */
3324	call	panicsys
3325	addq	$REGSIZE, %rsp
3326	popq	%rdi
3327	popq	%rsi
3328	popq	%rdx
3329	popq	%rcx
3330	popq	%r8
3331	popq	%r9
3332	popq	%rax
3333	popq	%rbx
3334	popq	%r10
3335	popq	%r11
3336	popfq
3337	leave
3338	ret
3339	SET_SIZE(vpanic)
3340
3341	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3342
3343	pushq	%rbp				/* | %rip | 	0x60	*/
3344	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3345	pushfq					/* | rfl  |	0x50	*/
3346	pushq	%r11				/* | %r11 |	0x48	*/
3347	pushq	%r10				/* | %r10 |	0x40	*/
3348	pushq	%rbx				/* | %rbx |	0x38	*/
3349	pushq	%rax				/* | %rax |	0x30	*/
3350	pushq	%r9				/* | %r9  |	0x28	*/
3351	pushq	%r8				/* | %r8  |	0x20	*/
3352	pushq	%rcx				/* | %rcx |	0x18	*/
3353	pushq	%rdx				/* | %rdx |	0x10	*/
3354	pushq	%rsi				/* | %rsi |	0x8 alist */
3355	pushq	%rdi				/* | %rdi |	0x0 format */
3356
3357	movq	%rsp, %rbx			/* %rbx = current %rsp */
3358
3359	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3360	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3361	jmp	vpanic_common
3362
3363	SET_SIZE(dtrace_vpanic)
3364
3365#elif defined(__i386)
3366
3367	ENTRY_NP(vpanic)			/ Initial stack layout:
3368
3369	pushl	%ebp				/ | %eip | 20
3370	movl	%esp, %ebp			/ | %ebp | 16
3371	pushl	%eax				/ | %eax | 12
3372	pushl	%ebx				/ | %ebx |  8
3373	pushl	%ecx				/ | %ecx |  4
3374	pushl	%edx				/ | %edx |  0
3375
3376	movl	%esp, %ebx			/ %ebx = current stack pointer
3377
3378	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3379	pushl	%eax				/ push &panic_quiesce
3380	call	panic_trigger			/ %eax = panic_trigger()
3381	addl	$4, %esp			/ reset stack pointer
3382
3383vpanic_common:
3384	cmpl	$0, %eax			/ if (%eax == 0)
3385	je	0f				/   goto 0f;
3386
3387	/*
3388	 * If panic_trigger() was successful, we are the first to initiate a
3389	 * panic: we now switch to the reserved panic_stack before continuing.
3390	 */
3391	lea	panic_stack, %esp		/ %esp  = panic_stack
3392	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3393
33940:	subl	$REGSIZE, %esp			/ allocate struct regs
3395
3396	/*
3397	 * Now that we've got everything set up, store the register values as
3398	 * they were when we entered vpanic() to the designated location in
3399	 * the regs structure we allocated on the stack.
3400	 */
3401#if !defined(__GNUC_AS__)
3402	movw	%gs, %edx
3403	movl	%edx, REGOFF_GS(%esp)
3404	movw	%fs, %edx
3405	movl	%edx, REGOFF_FS(%esp)
3406	movw	%es, %edx
3407	movl	%edx, REGOFF_ES(%esp)
3408	movw	%ds, %edx
3409	movl	%edx, REGOFF_DS(%esp)
3410#else	/* __GNUC_AS__ */
3411	mov	%gs, %edx
3412	mov	%edx, REGOFF_GS(%esp)
3413	mov	%fs, %edx
3414	mov	%edx, REGOFF_FS(%esp)
3415	mov	%es, %edx
3416	mov	%edx, REGOFF_ES(%esp)
3417	mov	%ds, %edx
3418	mov	%edx, REGOFF_DS(%esp)
3419#endif	/* __GNUC_AS__ */
3420	movl	%edi, REGOFF_EDI(%esp)
3421	movl	%esi, REGOFF_ESI(%esp)
3422	movl	16(%ebx), %ecx
3423	movl	%ecx, REGOFF_EBP(%esp)
3424	movl	%ebx, %ecx
3425	addl	$20, %ecx
3426	movl	%ecx, REGOFF_ESP(%esp)
3427	movl	8(%ebx), %ecx
3428	movl	%ecx, REGOFF_EBX(%esp)
3429	movl	0(%ebx), %ecx
3430	movl	%ecx, REGOFF_EDX(%esp)
3431	movl	4(%ebx), %ecx
3432	movl	%ecx, REGOFF_ECX(%esp)
3433	movl	12(%ebx), %ecx
3434	movl	%ecx, REGOFF_EAX(%esp)
3435	movl	$0, REGOFF_TRAPNO(%esp)
3436	movl	$0, REGOFF_ERR(%esp)
3437	lea	vpanic, %ecx
3438	movl	%ecx, REGOFF_EIP(%esp)
3439#if !defined(__GNUC_AS__)
3440	movw	%cs, %edx
3441#else	/* __GNUC_AS__ */
3442	mov	%cs, %edx
3443#endif	/* __GNUC_AS__ */
3444	movl	%edx, REGOFF_CS(%esp)
3445	pushfl
3446	popl	%ecx
3447#if defined(__xpv)
3448	/*
3449	 * Synthesize the PS_IE bit from the event mask bit
3450	 */
3451	CURTHREAD(%edx)
3452	KPREEMPT_DISABLE(%edx)
3453	EVENT_MASK_TO_IE(%edx, %ecx)
3454	CURTHREAD(%edx)
3455	KPREEMPT_ENABLE_NOKP(%edx)
3456#endif
3457	movl	%ecx, REGOFF_EFL(%esp)
3458	movl	$0, REGOFF_UESP(%esp)
3459#if !defined(__GNUC_AS__)
3460	movw	%ss, %edx
3461#else	/* __GNUC_AS__ */
3462	mov	%ss, %edx
3463#endif	/* __GNUC_AS__ */
3464	movl	%edx, REGOFF_SS(%esp)
3465
3466	movl	%esp, %ecx			/ %ecx = &regs
3467	pushl	%eax				/ push on_panic_stack
3468	pushl	%ecx				/ push &regs
3469	movl	12(%ebp), %ecx			/ %ecx = alist
3470	pushl	%ecx				/ push alist
3471	movl	8(%ebp), %ecx			/ %ecx = format
3472	pushl	%ecx				/ push format
3473	call	panicsys			/ panicsys();
3474	addl	$16, %esp			/ pop arguments
3475
3476	addl	$REGSIZE, %esp
3477	popl	%edx
3478	popl	%ecx
3479	popl	%ebx
3480	popl	%eax
3481	leave
3482	ret
3483	SET_SIZE(vpanic)
3484
3485	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3486
3487	pushl	%ebp				/ | %eip | 20
3488	movl	%esp, %ebp			/ | %ebp | 16
3489	pushl	%eax				/ | %eax | 12
3490	pushl	%ebx				/ | %ebx |  8
3491	pushl	%ecx				/ | %ecx |  4
3492	pushl	%edx				/ | %edx |  0
3493
3494	movl	%esp, %ebx			/ %ebx = current stack pointer
3495
3496	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3497	pushl	%eax				/ push &panic_quiesce
3498	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3499	addl	$4, %esp			/ reset stack pointer
3500	jmp	vpanic_common			/ jump back to common code
3501
3502	SET_SIZE(dtrace_vpanic)
3503
3504#endif	/* __i386 */
3505#endif	/* __lint */
3506
3507#if defined(__lint)
3508
3509void
3510hres_tick(void)
3511{}
3512
3513int64_t timedelta;
3514hrtime_t hres_last_tick;
3515volatile timestruc_t hrestime;
3516int64_t hrestime_adj;
3517volatile int hres_lock;
3518hrtime_t hrtime_base;
3519
3520#else	/* __lint */
3521
3522	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3523	.NWORD	0, 0
3524
3525	DGDEF3(hrestime_adj, 8, 8)
3526	.long	0, 0
3527
3528	DGDEF3(hres_last_tick, 8, 8)
3529	.long	0, 0
3530
3531	DGDEF3(timedelta, 8, 8)
3532	.long	0, 0
3533
3534	DGDEF3(hres_lock, 4, 8)
3535	.long	0
3536
3537	/*
3538	 * initialized to a non zero value to make pc_gethrtime()
3539	 * work correctly even before clock is initialized
3540	 */
3541	DGDEF3(hrtime_base, 8, 8)
3542	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3543
3544	DGDEF3(adj_shift, 4, 4)
3545	.long	ADJ_SHIFT
3546
3547#if defined(__amd64)
3548
3549	ENTRY_NP(hres_tick)
3550	pushq	%rbp
3551	movq	%rsp, %rbp
3552
3553	/*
3554	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3555	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3556	 * At worst, performing this now instead of under CLOCK_LOCK may
3557	 * introduce some jitter in pc_gethrestime().
3558	 */
3559	call	*gethrtimef(%rip)
3560	movq	%rax, %r8
3561
3562	leaq	hres_lock(%rip), %rax
3563	movb	$-1, %dl
3564.CL1:
3565	xchgb	%dl, (%rax)
3566	testb	%dl, %dl
3567	jz	.CL3			/* got it */
3568.CL2:
3569	cmpb	$0, (%rax)		/* possible to get lock? */
3570	pause
3571	jne	.CL2
3572	jmp	.CL1			/* yes, try again */
3573.CL3:
3574	/*
3575	 * compute the interval since last time hres_tick was called
3576	 * and adjust hrtime_base and hrestime accordingly
3577	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3578	 * a timestruc_t (sec, nsec)
3579	 */
3580	leaq	hres_last_tick(%rip), %rax
3581	movq	%r8, %r11
3582	subq	(%rax), %r8
3583	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3584	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3585	/*
3586	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3587	 */
3588	movq	%r11, (%rax)
3589
3590	call	__adj_hrestime
3591
3592	/*
3593	 * release the hres_lock
3594	 */
3595	incl	hres_lock(%rip)
3596	leave
3597	ret
3598	SET_SIZE(hres_tick)
3599
3600#elif defined(__i386)
3601
3602	ENTRY_NP(hres_tick)
3603	pushl	%ebp
3604	movl	%esp, %ebp
3605	pushl	%esi
3606	pushl	%ebx
3607
3608	/*
3609	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3610	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3611	 * At worst, performing this now instead of under CLOCK_LOCK may
3612	 * introduce some jitter in pc_gethrestime().
3613	 */
3614	call	*gethrtimef
3615	movl	%eax, %ebx
3616	movl	%edx, %esi
3617
3618	movl	$hres_lock, %eax
3619	movl	$-1, %edx
3620.CL1:
3621	xchgb	%dl, (%eax)
3622	testb	%dl, %dl
3623	jz	.CL3			/ got it
3624.CL2:
3625	cmpb	$0, (%eax)		/ possible to get lock?
3626	pause
3627	jne	.CL2
3628	jmp	.CL1			/ yes, try again
3629.CL3:
3630	/*
3631	 * compute the interval since last time hres_tick was called
3632	 * and adjust hrtime_base and hrestime accordingly
3633	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3634	 * timestruc_t (sec, nsec)
3635	 */
3636
3637	lea	hres_last_tick, %eax
3638
3639	movl	%ebx, %edx
3640	movl	%esi, %ecx
3641
3642	subl 	(%eax), %edx
3643	sbbl 	4(%eax), %ecx
3644
3645	addl	%edx, hrtime_base	/ add interval to hrtime_base
3646	adcl	%ecx, hrtime_base+4
3647
3648	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3649
3650	/
3651	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3652	/
3653	movl	%ebx, (%eax)
3654	movl	%esi,  4(%eax)
3655
3656	/ get hrestime at this moment. used as base for pc_gethrestime
3657	/
3658	/ Apply adjustment, if any
3659	/
3660	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3661	/ (max_hres_adj)
3662	/
3663	/ void
3664	/ adj_hrestime()
3665	/ {
3666	/	long long adj;
3667	/
3668	/	if (hrestime_adj == 0)
3669	/		adj = 0;
3670	/	else if (hrestime_adj > 0) {
3671	/		if (hrestime_adj < HRES_ADJ)
3672	/			adj = hrestime_adj;
3673	/		else
3674	/			adj = HRES_ADJ;
3675	/	}
3676	/	else {
3677	/		if (hrestime_adj < -(HRES_ADJ))
3678	/			adj = -(HRES_ADJ);
3679	/		else
3680	/			adj = hrestime_adj;
3681	/	}
3682	/
3683	/	timedelta -= adj;
3684	/	hrestime_adj = timedelta;
3685	/	hrestime.tv_nsec += adj;
3686	/
3687	/	while (hrestime.tv_nsec >= NANOSEC) {
3688	/		one_sec++;
3689	/		hrestime.tv_sec++;
3690	/		hrestime.tv_nsec -= NANOSEC;
3691	/	}
3692	/ }
3693__adj_hrestime:
3694	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3695	movl	hrestime_adj+4, %edx
3696	andl	%esi, %esi
3697	jne	.CL4			/ no
3698	andl	%edx, %edx
3699	jne	.CL4			/ no
3700	subl	%ecx, %ecx		/ yes, adj = 0;
3701	subl	%edx, %edx
3702	jmp	.CL5
3703.CL4:
3704	subl	%ecx, %ecx
3705	subl	%eax, %eax
3706	subl	%esi, %ecx
3707	sbbl	%edx, %eax
3708	andl	%eax, %eax		/ if (hrestime_adj > 0)
3709	jge	.CL6
3710
3711	/ In the following comments, HRES_ADJ is used, while in the code
3712	/ max_hres_adj is used.
3713	/
3714	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3715	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3716	/ on the logical equivalence of:
3717	/
3718	/	!(hrestime_adj < HRES_ADJ)
3719	/
3720	/ and the two step sequence:
3721	/
3722	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3723	/
3724	/ which computes whether or not the least significant 32-bits
3725	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3726	/
3727	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3728	/
3729	/ which generates a carry whenever step 1 is true or the most
3730	/ significant long of the longlong hrestime_adj is non-zero.
3731
3732	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3733	subl	%esi, %ecx
3734	movl	%edx, %eax
3735	adcl	$-1, %eax
3736	jnc	.CL7
3737	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3738	subl	%edx, %edx
3739	jmp	.CL5
3740
3741	/ The following computation is similar to the one above.
3742	/
3743	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3744	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3745	/ on the logical equivalence of:
3746	/
3747	/	(hrestime_adj > -HRES_ADJ)
3748	/
3749	/ and the two step sequence:
3750	/
3751	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3752	/
3753	/ which means the least significant 32-bits of hrestime_adj is
3754	/ greater than -HRES_ADJ, followed by:
3755	/
3756	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3757	/
3758	/ which generates a carry only when step 1 is true and the most
3759	/ significant long of the longlong hrestime_adj is -1.
3760
3761.CL6:					/ hrestime_adj is negative
3762	movl	%esi, %ecx
3763	addl	max_hres_adj, %ecx
3764	movl	%edx, %eax
3765	adcl	$0, %eax
3766	jc	.CL7
3767	xor	%ecx, %ecx
3768	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3769	movl	$-1, %edx
3770	jmp	.CL5
3771.CL7:
3772	movl	%esi, %ecx		/ adj = hrestime_adj;
3773.CL5:
3774	movl	timedelta, %esi
3775	subl	%ecx, %esi
3776	movl	timedelta+4, %eax
3777	sbbl	%edx, %eax
3778	movl	%esi, timedelta
3779	movl	%eax, timedelta+4	/ timedelta -= adj;
3780	movl	%esi, hrestime_adj
3781	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3782	addl	hrestime+4, %ecx
3783
3784	movl	%ecx, %eax		/ eax = tv_nsec
37851:
3786	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3787	jb	.CL8			/ no
3788	incl	one_sec			/ yes,  one_sec++;
3789	incl	hrestime		/ hrestime.tv_sec++;
3790	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3791	jmp	1b			/ check for more seconds
3792
3793.CL8:
3794	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3795	incl	hres_lock		/ release the hres_lock
3796
3797	popl	%ebx
3798	popl	%esi
3799	leave
3800	ret
3801	SET_SIZE(hres_tick)
3802
3803#endif	/* __i386 */
3804#endif	/* __lint */
3805
3806/*
3807 * void prefetch_smap_w(void *)
3808 *
3809 * Prefetch ahead within a linear list of smap structures.
3810 * Not implemented for ia32.  Stub for compatibility.
3811 */
3812
3813#if defined(__lint)
3814
3815/*ARGSUSED*/
3816void prefetch_smap_w(void *smp)
3817{}
3818
3819#else	/* __lint */
3820
3821	ENTRY(prefetch_smap_w)
3822	rep;	ret	/* use 2 byte return instruction when branch target */
3823			/* AMD Software Optimization Guide - Section 6.2 */
3824	SET_SIZE(prefetch_smap_w)
3825
3826#endif	/* __lint */
3827
3828/*
3829 * prefetch_page_r(page_t *)
3830 * issue prefetch instructions for a page_t
3831 */
3832#if defined(__lint)
3833
3834/*ARGSUSED*/
3835void
3836prefetch_page_r(void *pp)
3837{}
3838
3839#else	/* __lint */
3840
3841	ENTRY(prefetch_page_r)
3842	rep;	ret	/* use 2 byte return instruction when branch target */
3843			/* AMD Software Optimization Guide - Section 6.2 */
3844	SET_SIZE(prefetch_page_r)
3845
3846#endif	/* __lint */
3847
3848#if defined(__lint)
3849
3850/*ARGSUSED*/
3851int
3852bcmp(const void *s1, const void *s2, size_t count)
3853{ return (0); }
3854
3855#else   /* __lint */
3856
3857#if defined(__amd64)
3858
3859	ENTRY(bcmp)
3860	pushq	%rbp
3861	movq	%rsp, %rbp
3862#ifdef DEBUG
3863	movq	postbootkernelbase(%rip), %r11
3864	cmpq	%r11, %rdi
3865	jb	0f
3866	cmpq	%r11, %rsi
3867	jnb	1f
38680:	leaq	.bcmp_panic_msg(%rip), %rdi
3869	xorl	%eax, %eax
3870	call	panic
38711:
3872#endif	/* DEBUG */
3873	call	memcmp
3874	testl	%eax, %eax
3875	setne	%dl
3876	leave
3877	movzbl	%dl, %eax
3878	ret
3879	SET_SIZE(bcmp)
3880
3881#elif defined(__i386)
3882
3883#define	ARG_S1		8
3884#define	ARG_S2		12
3885#define	ARG_LENGTH	16
3886
3887	ENTRY(bcmp)
3888	pushl	%ebp
3889	movl	%esp, %ebp	/ create new stack frame
3890#ifdef DEBUG
3891	movl    postbootkernelbase, %eax
3892	cmpl    %eax, ARG_S1(%ebp)
3893	jb	0f
3894	cmpl    %eax, ARG_S2(%ebp)
3895	jnb	1f
38960:	pushl   $.bcmp_panic_msg
3897	call    panic
38981:
3899#endif	/* DEBUG */
3900
3901	pushl	%edi		/ save register variable
3902	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3903	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3904	cmpl	%eax, %ecx	/ if the same string
3905	je	.equal		/ goto .equal
3906	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3907	cmpl	$4, %edi	/ if %edi < 4
3908	jb	.byte_check	/ goto .byte_check
3909	.align	4
3910.word_loop:
3911	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3912	leal	-4(%edi), %edi	/ %edi -= 4
3913	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3914	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3915	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3916	leal	4(%eax), %eax	/ %eax += 4 (next word)
3917	cmpl	$4, %edi	/ if %edi >= 4
3918	jae	.word_loop	/ goto .word_loop
3919.byte_check:
3920	cmpl	$0, %edi	/ if %edi == 0
3921	je	.equal		/ goto .equal
3922	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3923.word_not_equal:
3924	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3925	.align	4
3926.byte_loop:
3927	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3928	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3929	jne	.not_equal	/ if not equal, goto .not_equal
3930	incl	%ecx		/ %ecx++ (next byte)
3931	incl	%eax		/ %eax++ (next byte)
3932	decl	%edi		/ %edi--
3933	jnz	.byte_loop	/ if not zero, goto .byte_loop
3934.equal:
3935	xorl	%eax, %eax	/ %eax = 0
3936	popl	%edi		/ restore register variable
3937	leave			/ restore old stack frame
3938	ret			/ return (NULL)
3939	.align	4
3940.not_equal:
3941	movl	$1, %eax	/ return 1
3942	popl	%edi		/ restore register variable
3943	leave			/ restore old stack frame
3944	ret			/ return (NULL)
3945	SET_SIZE(bcmp)
3946
3947#endif	/* __i386 */
3948
3949#ifdef DEBUG
3950	.text
3951.bcmp_panic_msg:
3952	.string "bcmp: arguments below kernelbase"
3953#endif	/* DEBUG */
3954
3955#endif	/* __lint */
3956
3957#if defined(__lint)
3958
3959uint_t
3960bsrw_insn(uint16_t mask)
3961{
3962	uint_t index = sizeof (mask) * NBBY - 1;
3963
3964	while ((mask & (1 << index)) == 0)
3965		index--;
3966	return (index);
3967}
3968
3969#else	/* __lint */
3970
3971#if defined(__amd64)
3972
3973	ENTRY_NP(bsrw_insn)
3974	xorl	%eax, %eax
3975	bsrw	%di, %ax
3976	ret
3977	SET_SIZE(bsrw_insn)
3978
3979#elif defined(__i386)
3980
3981	ENTRY_NP(bsrw_insn)
3982	movw	4(%esp), %cx
3983	xorl	%eax, %eax
3984	bsrw	%cx, %ax
3985	ret
3986	SET_SIZE(bsrw_insn)
3987
3988#endif	/* __i386 */
3989#endif	/* __lint */
3990
3991#if defined(__lint)
3992
3993uint_t
3994atomic_btr32(uint32_t *pending, uint_t pil)
3995{
3996	return (*pending &= ~(1 << pil));
3997}
3998
3999#else	/* __lint */
4000
4001#if defined(__i386)
4002
4003	ENTRY_NP(atomic_btr32)
4004	movl	4(%esp), %ecx
4005	movl	8(%esp), %edx
4006	xorl	%eax, %eax
4007	lock
4008	btrl	%edx, (%ecx)
4009	setc	%al
4010	ret
4011	SET_SIZE(atomic_btr32)
4012
4013#endif	/* __i386 */
4014#endif	/* __lint */
4015
4016#if defined(__lint)
4017
4018/*ARGSUSED*/
4019void
4020switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
4021	    uint_t arg2)
4022{}
4023
4024#else	/* __lint */
4025
4026#if defined(__amd64)
4027
4028	ENTRY_NP(switch_sp_and_call)
4029	pushq	%rbp
4030	movq	%rsp, %rbp		/* set up stack frame */
4031	movq	%rdi, %rsp		/* switch stack pointer */
4032	movq	%rdx, %rdi		/* pass func arg 1 */
4033	movq	%rsi, %r11		/* save function to call */
4034	movq	%rcx, %rsi		/* pass func arg 2 */
4035	call	*%r11			/* call function */
4036	leave				/* restore stack */
4037	ret
4038	SET_SIZE(switch_sp_and_call)
4039
4040#elif defined(__i386)
4041
4042	ENTRY_NP(switch_sp_and_call)
4043	pushl	%ebp
4044	mov	%esp, %ebp		/* set up stack frame */
4045	movl	8(%ebp), %esp		/* switch stack pointer */
4046	pushl	20(%ebp)		/* push func arg 2 */
4047	pushl	16(%ebp)		/* push func arg 1 */
4048	call	*12(%ebp)		/* call function */
4049	addl	$8, %esp		/* pop arguments */
4050	leave				/* restore stack */
4051	ret
4052	SET_SIZE(switch_sp_and_call)
4053
4054#endif	/* __i386 */
4055#endif	/* __lint */
4056
4057#if defined(__lint)
4058
4059void
4060kmdb_enter(void)
4061{}
4062
4063#else	/* __lint */
4064
4065#if defined(__amd64)
4066
4067	ENTRY_NP(kmdb_enter)
4068	pushq	%rbp
4069	movq	%rsp, %rbp
4070
4071	/*
4072	 * Save flags, do a 'cli' then return the saved flags
4073	 */
4074	call	intr_clear
4075
4076	int	$T_DBGENTR
4077
4078	/*
4079	 * Restore the saved flags
4080	 */
4081	movq	%rax, %rdi
4082	call	intr_restore
4083
4084	leave
4085	ret
4086	SET_SIZE(kmdb_enter)
4087
4088#elif defined(__i386)
4089
4090	ENTRY_NP(kmdb_enter)
4091	pushl	%ebp
4092	movl	%esp, %ebp
4093
4094	/*
4095	 * Save flags, do a 'cli' then return the saved flags
4096	 */
4097	call	intr_clear
4098
4099	int	$T_DBGENTR
4100
4101	/*
4102	 * Restore the saved flags
4103	 */
4104	pushl	%eax
4105	call	intr_restore
4106	addl	$4, %esp
4107
4108	leave
4109	ret
4110	SET_SIZE(kmdb_enter)
4111
4112#endif	/* __i386 */
4113#endif	/* __lint */
4114
4115#if defined(__lint)
4116
4117void
4118return_instr(void)
4119{}
4120
4121#else	/* __lint */
4122
4123	ENTRY_NP(return_instr)
4124	rep;	ret	/* use 2 byte instruction when branch target */
4125			/* AMD Software Optimization Guide - Section 6.2 */
4126	SET_SIZE(return_instr)
4127
4128#endif	/* __lint */
4129
4130#if defined(__lint)
4131
4132ulong_t
4133getflags(void)
4134{
4135	return (0);
4136}
4137
4138#else	/* __lint */
4139
4140#if defined(__amd64)
4141
4142	ENTRY(getflags)
4143	pushfq
4144	popq	%rax
4145#if defined(__xpv)
4146	CURTHREAD(%rdi)
4147	KPREEMPT_DISABLE(%rdi)
4148	/*
4149	 * Synthesize the PS_IE bit from the event mask bit
4150	 */
4151	CURVCPU(%r11)
4152	andq    $_BITNOT(PS_IE), %rax
4153	XEN_TEST_UPCALL_MASK(%r11)
4154	jnz	1f
4155	orq	$PS_IE, %rax
41561:
4157	KPREEMPT_ENABLE_NOKP(%rdi)
4158#endif
4159	ret
4160	SET_SIZE(getflags)
4161
4162#elif defined(__i386)
4163
4164	ENTRY(getflags)
4165	pushfl
4166	popl	%eax
4167#if defined(__xpv)
4168	CURTHREAD(%ecx)
4169	KPREEMPT_DISABLE(%ecx)
4170	/*
4171	 * Synthesize the PS_IE bit from the event mask bit
4172	 */
4173	CURVCPU(%edx)
4174	andl    $_BITNOT(PS_IE), %eax
4175	XEN_TEST_UPCALL_MASK(%edx)
4176	jnz	1f
4177	orl	$PS_IE, %eax
41781:
4179	KPREEMPT_ENABLE_NOKP(%ecx)
4180#endif
4181	ret
4182	SET_SIZE(getflags)
4183
4184#endif	/* __i386 */
4185
4186#endif	/* __lint */
4187
4188#if defined(__lint)
4189
4190ftrace_icookie_t
4191ftrace_interrupt_disable(void)
4192{ return (0); }
4193
4194#else   /* __lint */
4195
4196#if defined(__amd64)
4197
4198	ENTRY(ftrace_interrupt_disable)
4199	pushfq
4200	popq	%rax
4201	CLI(%rdx)
4202	ret
4203	SET_SIZE(ftrace_interrupt_disable)
4204
4205#elif defined(__i386)
4206
4207	ENTRY(ftrace_interrupt_disable)
4208	pushfl
4209	popl	%eax
4210	CLI(%edx)
4211	ret
4212	SET_SIZE(ftrace_interrupt_disable)
4213
4214#endif	/* __i386 */
4215#endif	/* __lint */
4216
4217#if defined(__lint)
4218
4219/*ARGSUSED*/
4220void
4221ftrace_interrupt_enable(ftrace_icookie_t cookie)
4222{}
4223
4224#else	/* __lint */
4225
4226#if defined(__amd64)
4227
4228	ENTRY(ftrace_interrupt_enable)
4229	pushq	%rdi
4230	popfq
4231	ret
4232	SET_SIZE(ftrace_interrupt_enable)
4233
4234#elif defined(__i386)
4235
4236	ENTRY(ftrace_interrupt_enable)
4237	movl	4(%esp), %eax
4238	pushl	%eax
4239	popfl
4240	ret
4241	SET_SIZE(ftrace_interrupt_enable)
4242
4243#endif	/* __i386 */
4244#endif	/* __lint */
4245
4246#if defined (__lint)
4247
4248/*ARGSUSED*/
4249void
4250iommu_cpu_nop(void)
4251{}
4252
4253#else /* __lint */
4254
4255	ENTRY(iommu_cpu_nop)
4256	rep;	nop
4257	ret
4258	SET_SIZE(iommu_cpu_nop)
4259
4260#endif /* __lint */
4261
4262#if defined (__lint)
4263
4264/*ARGSUSED*/
4265void
4266clflush_insn(caddr_t addr)
4267{}
4268
4269#else /* __lint */
4270
4271#if defined (__amd64)
4272	ENTRY(clflush_insn)
4273	clflush (%rdi)
4274	ret
4275	SET_SIZE(clflush_insn)
4276#elif defined (__i386)
4277	ENTRY(clflush_insn)
4278	movl	4(%esp), %eax
4279	clflush (%eax)
4280	ret
4281	SET_SIZE(clflush_insn)
4282
4283#endif /* __i386 */
4284#endif /* __lint */
4285
4286#if defined (__lint)
4287/*ARGSUSED*/
4288void
4289mfence_insn(void)
4290{}
4291
4292#else /* __lint */
4293
4294#if defined (__amd64)
4295	ENTRY(mfence_insn)
4296	mfence
4297	ret
4298	SET_SIZE(mfence_insn)
4299#elif defined (__i386)
4300	ENTRY(mfence_insn)
4301	mfence
4302	ret
4303	SET_SIZE(mfence_insn)
4304
4305#endif /* __i386 */
4306#endif /* __lint */
4307
4308/*
4309 * This is how VMware lets the guests figure that they are running
4310 * on top of VMWare platform :
4311 * Write 0xA in the ECX register and put the I/O port address value of
4312 * 0x564D5868 in the EAX register. Then read a word from port 0x5658.
4313 * If VMWare is installed than this code will be executed correctly and
4314 * the EBX register will contain the same I/O port address value of 0x564D5868.
4315 * If VMWare is not installed then OS will return an exception on port access.
4316 */
4317#if defined(__lint)
4318
4319int
4320vmware_platform(void) { return (1); }
4321
4322#else
4323
4324#if defined(__amd64)
4325
4326	ENTRY(vmware_platform)
4327	pushq	%rbx
4328	xorl	%ebx, %ebx
4329	movl	$0x564d5868, %eax
4330	movl	$0xa, %ecx
4331	movl	$0x5658, %edx
4332	inl	(%dx)
4333	movl	$0x564d5868, %ecx
4334	xorl	%eax, %eax
4335	cmpl	%ecx, %ebx
4336	jne	1f
4337	incl	%eax
43381:
4339	popq	%rbx
4340	ret
4341	SET_SIZE(vmware_platform)
4342
4343#elif defined(__i386)
4344
4345	ENTRY(vmware_platform)
4346	pushl	%ebx
4347	pushl	%ecx
4348	pushl	%edx
4349	xorl	%ebx, %ebx
4350	movl	$0x564d5868, %eax
4351	movl	$0xa, %ecx
4352	movl	$0x5658, %edx
4353	inl	(%dx)
4354	movl	$0x564d5868, %ecx
4355	xorl	%eax, %eax
4356	cmpl	%ecx, %ebx
4357	jne	1f
4358	incl	%eax
43591:
4360	popl	%edx
4361	popl	%ecx
4362	popl	%ebx
4363	ret
4364	SET_SIZE(vmware_platform)
4365
4366#endif /* __i386 */
4367#endif /* __lint */
4368