xref: /titanic_52/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 6e0cbcaa0c6f2bc34634a4cc17b099f9ecef03d1)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*
27 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
28 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
29 *    All Rights Reserved
30 */
31
32/*
33 * General assembly language routines.
34 * It is the intent of this file to contain routines that are
35 * independent of the specific kernel architecture, and those that are
36 * common across kernel architectures.
37 * As architectures diverge, and implementations of specific
38 * architecture-dependent routines change, the routines should be moved
39 * from this file into the respective ../`arch -k`/subr.s file.
40 */
41
42#include <sys/asm_linkage.h>
43#include <sys/asm_misc.h>
44#include <sys/panic.h>
45#include <sys/ontrap.h>
46#include <sys/regset.h>
47#include <sys/privregs.h>
48#include <sys/reboot.h>
49#include <sys/psw.h>
50#include <sys/x86_archext.h>
51
52#if defined(__lint)
53#include <sys/types.h>
54#include <sys/systm.h>
55#include <sys/thread.h>
56#include <sys/archsystm.h>
57#include <sys/byteorder.h>
58#include <sys/dtrace.h>
59#include <sys/ftrace.h>
60#else	/* __lint */
61#include "assym.h"
62#endif	/* __lint */
63#include <sys/dditypes.h>
64
65/*
66 * on_fault()
67 * Catch lofault faults. Like setjmp except it returns one
68 * if code following causes uncorrectable fault. Turned off
69 * by calling no_fault().
70 */
71
72#if defined(__lint)
73
74/* ARGSUSED */
75int
76on_fault(label_t *ljb)
77{ return (0); }
78
79void
80no_fault(void)
81{}
82
83#else	/* __lint */
84
85#if defined(__amd64)
86
87	ENTRY(on_fault)
88	movq	%gs:CPU_THREAD, %rsi
89	leaq	catch_fault(%rip), %rdx
90	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
91	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
92	jmp	setjmp				/* let setjmp do the rest */
93
94catch_fault:
95	movq	%gs:CPU_THREAD, %rsi
96	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
97	xorl	%eax, %eax
98	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
99	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
100	jmp	longjmp				/* let longjmp do the rest */
101	SET_SIZE(on_fault)
102
103	ENTRY(no_fault)
104	movq	%gs:CPU_THREAD, %rsi
105	xorl	%eax, %eax
106	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
107	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
108	ret
109	SET_SIZE(no_fault)
110
111#elif defined(__i386)
112
113	ENTRY(on_fault)
114	movl	%gs:CPU_THREAD, %edx
115	movl	4(%esp), %eax			/* jumpbuf address */
116	leal	catch_fault, %ecx
117	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
118	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
119	jmp	setjmp				/* let setjmp do the rest */
120
121catch_fault:
122	movl	%gs:CPU_THREAD, %edx
123	xorl	%eax, %eax
124	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
125	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
126	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
127	pushl	%ecx
128	call	longjmp				/* let longjmp do the rest */
129	SET_SIZE(on_fault)
130
131	ENTRY(no_fault)
132	movl	%gs:CPU_THREAD, %edx
133	xorl	%eax, %eax
134	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
135	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
136	ret
137	SET_SIZE(no_fault)
138
139#endif	/* __i386 */
140#endif	/* __lint */
141
142/*
143 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
144 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
145 */
146
147#if defined(lint)
148
149void
150on_trap_trampoline(void)
151{}
152
153#else	/* __lint */
154
155#if defined(__amd64)
156
157	ENTRY(on_trap_trampoline)
158	movq	%gs:CPU_THREAD, %rsi
159	movq	T_ONTRAP(%rsi), %rdi
160	addq	$OT_JMPBUF, %rdi
161	jmp	longjmp
162	SET_SIZE(on_trap_trampoline)
163
164#elif defined(__i386)
165
166	ENTRY(on_trap_trampoline)
167	movl	%gs:CPU_THREAD, %eax
168	movl	T_ONTRAP(%eax), %eax
169	addl	$OT_JMPBUF, %eax
170	pushl	%eax
171	call	longjmp
172	SET_SIZE(on_trap_trampoline)
173
174#endif	/* __i386 */
175#endif	/* __lint */
176
177/*
178 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
179 * more information about the on_trap() mechanism.  If the on_trap_data is the
180 * same as the topmost stack element, we just modify that element.
181 */
182#if defined(lint)
183
184/*ARGSUSED*/
185int
186on_trap(on_trap_data_t *otp, uint_t prot)
187{ return (0); }
188
189#else	/* __lint */
190
191#if defined(__amd64)
192
193	ENTRY(on_trap)
194	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
195	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
196	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
197	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
198	xorl	%ecx, %ecx
199	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
200	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
201	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
202	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
203	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
204	je	0f				/*	don't modify t_ontrap */
205
206	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
207	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
208
2090:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
210	jmp	setjmp
211	SET_SIZE(on_trap)
212
213#elif defined(__i386)
214
215	ENTRY(on_trap)
216	movl	4(%esp), %eax			/* %eax = otp */
217	movl	8(%esp), %edx			/* %edx = prot */
218
219	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
220	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
221	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
222	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
223	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
224	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
225	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
226	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
227	cmpl	%eax, %ecx			/* if (otp == %ecx) */
228	je	0f				/*    don't modify t_ontrap */
229
230	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
231	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
232
2330:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
234	movl	%eax, 4(%esp)			/* put %eax back on the stack */
235	jmp	setjmp				/* let setjmp do the rest */
236	SET_SIZE(on_trap)
237
238#endif	/* __i386 */
239#endif	/* __lint */
240
241/*
242 * Setjmp and longjmp implement non-local gotos using state vectors
243 * type label_t.
244 */
245
246#if defined(__lint)
247
248/* ARGSUSED */
249int
250setjmp(label_t *lp)
251{ return (0); }
252
253/* ARGSUSED */
254void
255longjmp(label_t *lp)
256{}
257
258#else	/* __lint */
259
260#if LABEL_PC != 0
261#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
262#endif	/* LABEL_PC != 0 */
263
264#if defined(__amd64)
265
266	ENTRY(setjmp)
267	movq	%rsp, LABEL_SP(%rdi)
268	movq	%rbp, LABEL_RBP(%rdi)
269	movq	%rbx, LABEL_RBX(%rdi)
270	movq	%r12, LABEL_R12(%rdi)
271	movq	%r13, LABEL_R13(%rdi)
272	movq	%r14, LABEL_R14(%rdi)
273	movq	%r15, LABEL_R15(%rdi)
274	movq	(%rsp), %rdx		/* return address */
275	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
276	xorl	%eax, %eax		/* return 0 */
277	ret
278	SET_SIZE(setjmp)
279
280	ENTRY(longjmp)
281	movq	LABEL_SP(%rdi), %rsp
282	movq	LABEL_RBP(%rdi), %rbp
283	movq	LABEL_RBX(%rdi), %rbx
284	movq	LABEL_R12(%rdi), %r12
285	movq	LABEL_R13(%rdi), %r13
286	movq	LABEL_R14(%rdi), %r14
287	movq	LABEL_R15(%rdi), %r15
288	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
289	movq	%rdx, (%rsp)
290	xorl	%eax, %eax
291	incl	%eax			/* return 1 */
292	ret
293	SET_SIZE(longjmp)
294
295#elif defined(__i386)
296
297	ENTRY(setjmp)
298	movl	4(%esp), %edx		/* address of save area */
299	movl	%ebp, LABEL_EBP(%edx)
300	movl	%ebx, LABEL_EBX(%edx)
301	movl	%esi, LABEL_ESI(%edx)
302	movl	%edi, LABEL_EDI(%edx)
303	movl	%esp, 4(%edx)
304	movl	(%esp), %ecx		/* %eip (return address) */
305	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
306	subl	%eax, %eax		/* return 0 */
307	ret
308	SET_SIZE(setjmp)
309
310	ENTRY(longjmp)
311	movl	4(%esp), %edx		/* address of save area */
312	movl	LABEL_EBP(%edx), %ebp
313	movl	LABEL_EBX(%edx), %ebx
314	movl	LABEL_ESI(%edx), %esi
315	movl	LABEL_EDI(%edx), %edi
316	movl	4(%edx), %esp
317	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
318	movl	$1, %eax
319	addl	$4, %esp		/* pop ret adr */
320	jmp	*%ecx			/* indirect */
321	SET_SIZE(longjmp)
322
323#endif	/* __i386 */
324#endif	/* __lint */
325
326/*
327 * if a() calls b() calls caller(),
328 * caller() returns return address in a().
329 * (Note: We assume a() and b() are C routines which do the normal entry/exit
330 *  sequence.)
331 */
332
333#if defined(__lint)
334
335caddr_t
336caller(void)
337{ return (0); }
338
339#else	/* __lint */
340
341#if defined(__amd64)
342
343	ENTRY(caller)
344	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
345	ret
346	SET_SIZE(caller)
347
348#elif defined(__i386)
349
350	ENTRY(caller)
351	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
352	ret
353	SET_SIZE(caller)
354
355#endif	/* __i386 */
356#endif	/* __lint */
357
358/*
359 * if a() calls callee(), callee() returns the
360 * return address in a();
361 */
362
363#if defined(__lint)
364
365caddr_t
366callee(void)
367{ return (0); }
368
369#else	/* __lint */
370
371#if defined(__amd64)
372
373	ENTRY(callee)
374	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
375	ret
376	SET_SIZE(callee)
377
378#elif defined(__i386)
379
380	ENTRY(callee)
381	movl	(%esp), %eax		/* callee()'s return pc, in a() */
382	ret
383	SET_SIZE(callee)
384
385#endif	/* __i386 */
386#endif	/* __lint */
387
388/*
389 * return the current frame pointer
390 */
391
392#if defined(__lint)
393
394greg_t
395getfp(void)
396{ return (0); }
397
398#else	/* __lint */
399
400#if defined(__amd64)
401
402	ENTRY(getfp)
403	movq	%rbp, %rax
404	ret
405	SET_SIZE(getfp)
406
407#elif defined(__i386)
408
409	ENTRY(getfp)
410	movl	%ebp, %eax
411	ret
412	SET_SIZE(getfp)
413
414#endif	/* __i386 */
415#endif	/* __lint */
416
417/*
418 * Invalidate a single page table entry in the TLB
419 */
420
421#if defined(__lint)
422
423/* ARGSUSED */
424void
425mmu_tlbflush_entry(caddr_t m)
426{}
427
428#else	/* __lint */
429
430#if defined(__amd64)
431
432	ENTRY(mmu_tlbflush_entry)
433	invlpg	(%rdi)
434	ret
435	SET_SIZE(mmu_tlbflush_entry)
436
437#elif defined(__i386)
438
439	ENTRY(mmu_tlbflush_entry)
440	movl	4(%esp), %eax
441	invlpg	(%eax)
442	ret
443	SET_SIZE(mmu_tlbflush_entry)
444
445#endif	/* __i386 */
446#endif	/* __lint */
447
448
449/*
450 * Get/Set the value of various control registers
451 */
452
453#if defined(__lint)
454
455ulong_t
456getcr0(void)
457{ return (0); }
458
459/* ARGSUSED */
460void
461setcr0(ulong_t value)
462{}
463
464ulong_t
465getcr2(void)
466{ return (0); }
467
468ulong_t
469getcr3(void)
470{ return (0); }
471
472#if !defined(__xpv)
473/* ARGSUSED */
474void
475setcr3(ulong_t val)
476{}
477
478void
479reload_cr3(void)
480{}
481#endif
482
483ulong_t
484getcr4(void)
485{ return (0); }
486
487/* ARGSUSED */
488void
489setcr4(ulong_t val)
490{}
491
492#if defined(__amd64)
493
494ulong_t
495getcr8(void)
496{ return (0); }
497
498/* ARGSUSED */
499void
500setcr8(ulong_t val)
501{}
502
503#endif	/* __amd64 */
504
505#else	/* __lint */
506
507#if defined(__amd64)
508
509	ENTRY(getcr0)
510	movq	%cr0, %rax
511	ret
512	SET_SIZE(getcr0)
513
514	ENTRY(setcr0)
515	movq	%rdi, %cr0
516	ret
517	SET_SIZE(setcr0)
518
519        ENTRY(getcr2)
520#if defined(__xpv)
521	movq	%gs:CPU_VCPU_INFO, %rax
522	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
523#else
524        movq    %cr2, %rax
525#endif
526        ret
527	SET_SIZE(getcr2)
528
529	ENTRY(getcr3)
530	movq    %cr3, %rax
531	ret
532	SET_SIZE(getcr3)
533
534#if !defined(__xpv)
535
536        ENTRY(setcr3)
537        movq    %rdi, %cr3
538        ret
539	SET_SIZE(setcr3)
540
541	ENTRY(reload_cr3)
542	movq	%cr3, %rdi
543	movq	%rdi, %cr3
544	ret
545	SET_SIZE(reload_cr3)
546
547#endif	/* __xpv */
548
549	ENTRY(getcr4)
550	movq	%cr4, %rax
551	ret
552	SET_SIZE(getcr4)
553
554	ENTRY(setcr4)
555	movq	%rdi, %cr4
556	ret
557	SET_SIZE(setcr4)
558
559	ENTRY(getcr8)
560	movq	%cr8, %rax
561	ret
562	SET_SIZE(getcr8)
563
564	ENTRY(setcr8)
565	movq	%rdi, %cr8
566	ret
567	SET_SIZE(setcr8)
568
569#elif defined(__i386)
570
571        ENTRY(getcr0)
572        movl    %cr0, %eax
573        ret
574	SET_SIZE(getcr0)
575
576        ENTRY(setcr0)
577        movl    4(%esp), %eax
578        movl    %eax, %cr0
579        ret
580	SET_SIZE(setcr0)
581
582	/*
583	 * "lock mov %cr0" is used on processors which indicate it is
584	 * supported via CPUID. Normally the 32 bit TPR is accessed via
585	 * the local APIC.
586	 */
587	ENTRY(getcr8)
588	lock
589	movl	%cr0, %eax
590	ret
591	SET_SIZE(getcr8)
592
593	ENTRY(setcr8)
594        movl    4(%esp), %eax
595	lock
596        movl    %eax, %cr0
597	ret
598	SET_SIZE(setcr8)
599
600        ENTRY(getcr2)
601#if defined(__xpv)
602	movl	%gs:CPU_VCPU_INFO, %eax
603	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
604#else
605        movl    %cr2, %eax
606#endif
607        ret
608	SET_SIZE(getcr2)
609
610	ENTRY(getcr3)
611	movl    %cr3, %eax
612	ret
613	SET_SIZE(getcr3)
614
615#if !defined(__xpv)
616
617        ENTRY(setcr3)
618        movl    4(%esp), %eax
619        movl    %eax, %cr3
620        ret
621	SET_SIZE(setcr3)
622
623	ENTRY(reload_cr3)
624	movl    %cr3, %eax
625	movl    %eax, %cr3
626	ret
627	SET_SIZE(reload_cr3)
628
629#endif	/* __xpv */
630
631	ENTRY(getcr4)
632	movl    %cr4, %eax
633	ret
634	SET_SIZE(getcr4)
635
636        ENTRY(setcr4)
637        movl    4(%esp), %eax
638        movl    %eax, %cr4
639        ret
640	SET_SIZE(setcr4)
641
642#endif	/* __i386 */
643#endif	/* __lint */
644
645#if defined(__lint)
646
647/*ARGSUSED*/
648uint32_t
649__cpuid_insn(struct cpuid_regs *regs)
650{ return (0); }
651
652#else	/* __lint */
653
654#if defined(__amd64)
655
656	ENTRY(__cpuid_insn)
657	movq	%rbx, %r8
658	movq	%rcx, %r9
659	movq	%rdx, %r11
660	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
661	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
662	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
663	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
664	cpuid
665	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
666	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
667	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
668	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
669	movq	%r8, %rbx
670	movq	%r9, %rcx
671	movq	%r11, %rdx
672	ret
673	SET_SIZE(__cpuid_insn)
674
675#elif defined(__i386)
676
677        ENTRY(__cpuid_insn)
678	pushl	%ebp
679	movl	0x8(%esp), %ebp		/* %ebp = regs */
680	pushl	%ebx
681	pushl	%ecx
682	pushl	%edx
683	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
684	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
685	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
686	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
687	cpuid
688	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
689	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
690	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
691	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
692	popl	%edx
693	popl	%ecx
694	popl	%ebx
695	popl	%ebp
696	ret
697	SET_SIZE(__cpuid_insn)
698
699#endif	/* __i386 */
700#endif	/* __lint */
701
702#if defined(__lint)
703
704/*ARGSUSED*/
705void
706i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
707{}
708
709#else   /* __lint */
710
711#if defined(__amd64)
712
713	ENTRY_NP(i86_monitor)
714	pushq	%rbp
715	movq	%rsp, %rbp
716	movq	%rdi, %rax		/* addr */
717	movq	%rsi, %rcx		/* extensions */
718	/* rdx contains input arg3: hints */
719	clflush	(%rax)
720	.byte	0x0f, 0x01, 0xc8	/* monitor */
721	leave
722	ret
723	SET_SIZE(i86_monitor)
724
725#elif defined(__i386)
726
727ENTRY_NP(i86_monitor)
728	pushl	%ebp
729	movl	%esp, %ebp
730	movl	0x8(%ebp),%eax		/* addr */
731	movl	0xc(%ebp),%ecx		/* extensions */
732	movl	0x10(%ebp),%edx		/* hints */
733	clflush	(%eax)
734	.byte	0x0f, 0x01, 0xc8	/* monitor */
735	leave
736	ret
737	SET_SIZE(i86_monitor)
738
739#endif	/* __i386 */
740#endif	/* __lint */
741
742#if defined(__lint)
743
744/*ARGSUSED*/
745void
746i86_mwait(uint32_t data, uint32_t extensions)
747{}
748
749#else	/* __lint */
750
751#if defined(__amd64)
752
753	ENTRY_NP(i86_mwait)
754	pushq	%rbp
755	movq	%rsp, %rbp
756	movq	%rdi, %rax		/* data */
757	movq	%rsi, %rcx		/* extensions */
758	.byte	0x0f, 0x01, 0xc9	/* mwait */
759	leave
760	ret
761	SET_SIZE(i86_mwait)
762
763#elif defined(__i386)
764
765	ENTRY_NP(i86_mwait)
766	pushl	%ebp
767	movl	%esp, %ebp
768	movl	0x8(%ebp),%eax		/* data */
769	movl	0xc(%ebp),%ecx		/* extensions */
770	.byte	0x0f, 0x01, 0xc9	/* mwait */
771	leave
772	ret
773	SET_SIZE(i86_mwait)
774
775#endif	/* __i386 */
776#endif	/* __lint */
777
778#if defined(__xpv)
779	/*
780	 * Defined in C
781	 */
782#else
783
784#if defined(__lint)
785
786hrtime_t
787tsc_read(void)
788{
789	return (0);
790}
791
792#else	/* __lint */
793
794#if defined(__amd64)
795
796	ENTRY_NP(tsc_read)
797	movq	%rbx, %r11
798	movl	$0, %eax
799	cpuid
800	rdtsc
801	movq	%r11, %rbx
802	shlq	$32, %rdx
803	orq	%rdx, %rax
804	ret
805	.globl _tsc_mfence_start
806_tsc_mfence_start:
807	mfence
808	rdtsc
809	shlq	$32, %rdx
810	orq	%rdx, %rax
811	ret
812	.globl _tsc_mfence_end
813_tsc_mfence_end:
814	.globl _tscp_start
815_tscp_start:
816	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
817	shlq	$32, %rdx
818	orq	%rdx, %rax
819	ret
820	.globl _tscp_end
821_tscp_end:
822	.globl _no_rdtsc_start
823_no_rdtsc_start:
824	xorl	%edx, %edx
825	xorl	%eax, %eax
826	ret
827	.globl _no_rdtsc_end
828_no_rdtsc_end:
829	.globl _tsc_lfence_start
830_tsc_lfence_start:
831	lfence
832	rdtsc
833	shlq	$32, %rdx
834	orq	%rdx, %rax
835	ret
836	.globl _tsc_lfence_end
837_tsc_lfence_end:
838	SET_SIZE(tsc_read)
839
840#else /* __i386 */
841
842	ENTRY_NP(tsc_read)
843	pushl	%ebx
844	movl	$0, %eax
845	cpuid
846	rdtsc
847	popl	%ebx
848	ret
849	.globl _tsc_mfence_start
850_tsc_mfence_start:
851	mfence
852	rdtsc
853	ret
854	.globl _tsc_mfence_end
855_tsc_mfence_end:
856	.globl	_tscp_start
857_tscp_start:
858	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
859	ret
860	.globl _tscp_end
861_tscp_end:
862	.globl _no_rdtsc_start
863_no_rdtsc_start:
864	xorl	%edx, %edx
865	xorl	%eax, %eax
866	ret
867	.globl _no_rdtsc_end
868_no_rdtsc_end:
869	.globl _tsc_lfence_start
870_tsc_lfence_start:
871	lfence
872	rdtsc
873	ret
874	.globl _tsc_lfence_end
875_tsc_lfence_end:
876	SET_SIZE(tsc_read)
877
878#endif	/* __i386 */
879
880#endif	/* __lint */
881
882
883#endif	/* __xpv */
884
885#ifdef __lint
886/*
887 * Do not use this function for obtaining clock tick.  This
888 * is called by callers who do not need to have a guarenteed
889 * correct tick value.  The proper routine to use is tsc_read().
890 */
891hrtime_t
892randtick(void)
893{
894	return (0);
895}
896#else
897#if defined(__amd64)
898	ENTRY_NP(randtick)
899	rdtsc
900	shlq    $32, %rdx
901	orq     %rdx, %rax
902	ret
903	SET_SIZE(randtick)
904#else
905	ENTRY_NP(randtick)
906	rdtsc
907	ret
908	SET_SIZE(randtick)
909#endif /* __i386 */
910#endif /* __lint */
911/*
912 * Insert entryp after predp in a doubly linked list.
913 */
914
915#if defined(__lint)
916
917/*ARGSUSED*/
918void
919_insque(caddr_t entryp, caddr_t predp)
920{}
921
922#else	/* __lint */
923
924#if defined(__amd64)
925
926	ENTRY(_insque)
927	movq	(%rsi), %rax		/* predp->forw			*/
928	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
929	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
930	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
931	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
932	ret
933	SET_SIZE(_insque)
934
935#elif defined(__i386)
936
937	ENTRY(_insque)
938	movl	8(%esp), %edx
939	movl	4(%esp), %ecx
940	movl	(%edx), %eax		/* predp->forw			*/
941	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
942	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
943	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
944	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
945	ret
946	SET_SIZE(_insque)
947
948#endif	/* __i386 */
949#endif	/* __lint */
950
951/*
952 * Remove entryp from a doubly linked list
953 */
954
955#if defined(__lint)
956
957/*ARGSUSED*/
958void
959_remque(caddr_t entryp)
960{}
961
962#else	/* __lint */
963
964#if defined(__amd64)
965
966	ENTRY(_remque)
967	movq	(%rdi), %rax		/* entry->forw */
968	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
969	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
970	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
971	ret
972	SET_SIZE(_remque)
973
974#elif defined(__i386)
975
976	ENTRY(_remque)
977	movl	4(%esp), %ecx
978	movl	(%ecx), %eax		/* entry->forw */
979	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
980	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
981	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
982	ret
983	SET_SIZE(_remque)
984
985#endif	/* __i386 */
986#endif	/* __lint */
987
988/*
989 * Returns the number of
990 * non-NULL bytes in string argument.
991 */
992
993#if defined(__lint)
994
995/* ARGSUSED */
996size_t
997strlen(const char *str)
998{ return (0); }
999
1000#else	/* __lint */
1001
1002#if defined(__amd64)
1003
1004/*
1005 * This is close to a simple transliteration of a C version of this
1006 * routine.  We should either just -make- this be a C version, or
1007 * justify having it in assembler by making it significantly faster.
1008 *
1009 * size_t
1010 * strlen(const char *s)
1011 * {
1012 *	const char *s0;
1013 * #if defined(DEBUG)
1014 *	if ((uintptr_t)s < KERNELBASE)
1015 *		panic(.str_panic_msg);
1016 * #endif
1017 *	for (s0 = s; *s; s++)
1018 *		;
1019 *	return (s - s0);
1020 * }
1021 */
1022
1023	ENTRY(strlen)
1024#ifdef DEBUG
1025	movq	postbootkernelbase(%rip), %rax
1026	cmpq	%rax, %rdi
1027	jae	str_valid
1028	pushq	%rbp
1029	movq	%rsp, %rbp
1030	leaq	.str_panic_msg(%rip), %rdi
1031	xorl	%eax, %eax
1032	call	panic
1033#endif	/* DEBUG */
1034str_valid:
1035	cmpb	$0, (%rdi)
1036	movq	%rdi, %rax
1037	je	.null_found
1038	.align	4
1039.strlen_loop:
1040	incq	%rdi
1041	cmpb	$0, (%rdi)
1042	jne	.strlen_loop
1043.null_found:
1044	subq	%rax, %rdi
1045	movq	%rdi, %rax
1046	ret
1047	SET_SIZE(strlen)
1048
1049#elif defined(__i386)
1050
1051	ENTRY(strlen)
1052#ifdef DEBUG
1053	movl	postbootkernelbase, %eax
1054	cmpl	%eax, 4(%esp)
1055	jae	str_valid
1056	pushl	%ebp
1057	movl	%esp, %ebp
1058	pushl	$.str_panic_msg
1059	call	panic
1060#endif /* DEBUG */
1061
1062str_valid:
1063	movl	4(%esp), %eax		/* %eax = string address */
1064	testl	$3, %eax		/* if %eax not word aligned */
1065	jnz	.not_word_aligned	/* goto .not_word_aligned */
1066	.align	4
1067.word_aligned:
1068	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1069	movl	$0x7f7f7f7f, %ecx
1070	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1071	addl	$4, %eax		/* next word */
1072	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1073	orl	%edx, %ecx		/* %ecx |= %edx */
1074	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1075	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1076	je	.word_aligned		/* goto .word_aligned */
1077	subl	$4, %eax		/* post-incremented */
1078.not_word_aligned:
1079	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1080	je	.null_found		/* goto .null_found */
1081	incl	%eax			/* next byte */
1082	testl	$3, %eax		/* if %eax not word aligned */
1083	jnz	.not_word_aligned	/* goto .not_word_aligned */
1084	jmp	.word_aligned		/* goto .word_aligned */
1085	.align	4
1086.null_found:
1087	subl	4(%esp), %eax		/* %eax -= string address */
1088	ret
1089	SET_SIZE(strlen)
1090
1091#endif	/* __i386 */
1092
1093#ifdef DEBUG
1094	.text
1095.str_panic_msg:
1096	.string "strlen: argument below kernelbase"
1097#endif /* DEBUG */
1098
1099#endif	/* __lint */
1100
1101	/*
1102	 * Berkeley 4.3 introduced symbolically named interrupt levels
1103	 * as a way deal with priority in a machine independent fashion.
1104	 * Numbered priorities are machine specific, and should be
1105	 * discouraged where possible.
1106	 *
1107	 * Note, for the machine specific priorities there are
1108	 * examples listed for devices that use a particular priority.
1109	 * It should not be construed that all devices of that
1110	 * type should be at that priority.  It is currently were
1111	 * the current devices fit into the priority scheme based
1112	 * upon time criticalness.
1113	 *
1114	 * The underlying assumption of these assignments is that
1115	 * IPL 10 is the highest level from which a device
1116	 * routine can call wakeup.  Devices that interrupt from higher
1117	 * levels are restricted in what they can do.  If they need
1118	 * kernels services they should schedule a routine at a lower
1119	 * level (via software interrupt) to do the required
1120	 * processing.
1121	 *
1122	 * Examples of this higher usage:
1123	 *	Level	Usage
1124	 *	14	Profiling clock (and PROM uart polling clock)
1125	 *	12	Serial ports
1126	 *
1127	 * The serial ports request lower level processing on level 6.
1128	 *
1129	 * Also, almost all splN routines (where N is a number or a
1130	 * mnemonic) will do a RAISE(), on the assumption that they are
1131	 * never used to lower our priority.
1132	 * The exceptions are:
1133	 *	spl8()		Because you can't be above 15 to begin with!
1134	 *	splzs()		Because this is used at boot time to lower our
1135	 *			priority, to allow the PROM to poll the uart.
1136	 *	spl0()		Used to lower priority to 0.
1137	 */
1138
1139#if defined(__lint)
1140
1141int spl0(void)		{ return (0); }
1142int spl6(void)		{ return (0); }
1143int spl7(void)		{ return (0); }
1144int spl8(void)		{ return (0); }
1145int splhigh(void)	{ return (0); }
1146int splhi(void)		{ return (0); }
1147int splzs(void)		{ return (0); }
1148
1149/* ARGSUSED */
1150void
1151splx(int level)
1152{}
1153
1154#else	/* __lint */
1155
1156#if defined(__amd64)
1157
1158#define	SETPRI(level) \
1159	movl	$/**/level, %edi;	/* new priority */		\
1160	jmp	do_splx			/* redirect to do_splx */
1161
1162#define	RAISE(level) \
1163	movl	$/**/level, %edi;	/* new priority */		\
1164	jmp	splr			/* redirect to splr */
1165
1166#elif defined(__i386)
1167
1168#define	SETPRI(level) \
1169	pushl	$/**/level;	/* new priority */			\
1170	call	do_splx;	/* invoke common splx code */		\
1171	addl	$4, %esp;	/* unstack arg */			\
1172	ret
1173
1174#define	RAISE(level) \
1175	pushl	$/**/level;	/* new priority */			\
1176	call	splr;		/* invoke common splr code */		\
1177	addl	$4, %esp;	/* unstack args */			\
1178	ret
1179
1180#endif	/* __i386 */
1181
1182	/* locks out all interrupts, including memory errors */
1183	ENTRY(spl8)
1184	SETPRI(15)
1185	SET_SIZE(spl8)
1186
1187	/* just below the level that profiling runs */
1188	ENTRY(spl7)
1189	RAISE(13)
1190	SET_SIZE(spl7)
1191
1192	/* sun specific - highest priority onboard serial i/o asy ports */
1193	ENTRY(splzs)
1194	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1195	SET_SIZE(splzs)
1196
1197	ENTRY(splhi)
1198	ALTENTRY(splhigh)
1199	ALTENTRY(spl6)
1200	ALTENTRY(i_ddi_splhigh)
1201
1202	RAISE(DISP_LEVEL)
1203
1204	SET_SIZE(i_ddi_splhigh)
1205	SET_SIZE(spl6)
1206	SET_SIZE(splhigh)
1207	SET_SIZE(splhi)
1208
1209	/* allow all interrupts */
1210	ENTRY(spl0)
1211	SETPRI(0)
1212	SET_SIZE(spl0)
1213
1214
1215	/* splx implementation */
1216	ENTRY(splx)
1217	jmp	do_splx		/* redirect to common splx code */
1218	SET_SIZE(splx)
1219
1220#endif	/* __lint */
1221
1222#if defined(__i386)
1223
1224/*
1225 * Read and write the %gs register
1226 */
1227
1228#if defined(__lint)
1229
1230/*ARGSUSED*/
1231uint16_t
1232getgs(void)
1233{ return (0); }
1234
1235/*ARGSUSED*/
1236void
1237setgs(uint16_t sel)
1238{}
1239
1240#else	/* __lint */
1241
1242	ENTRY(getgs)
1243	clr	%eax
1244	movw	%gs, %ax
1245	ret
1246	SET_SIZE(getgs)
1247
1248	ENTRY(setgs)
1249	movw	4(%esp), %gs
1250	ret
1251	SET_SIZE(setgs)
1252
1253#endif	/* __lint */
1254#endif	/* __i386 */
1255
1256#if defined(__lint)
1257
1258void
1259pc_reset(void)
1260{}
1261
1262void
1263efi_reset(void)
1264{}
1265
1266#else	/* __lint */
1267
1268	ENTRY(wait_500ms)
1269#if defined(__amd64)
1270	pushq	%rbx
1271#elif defined(__i386)
1272	push	%ebx
1273#endif
1274	movl	$50000, %ebx
12751:
1276	call	tenmicrosec
1277	decl	%ebx
1278	jnz	1b
1279#if defined(__amd64)
1280	popq	%rbx
1281#elif defined(__i386)
1282	pop	%ebx
1283#endif
1284	ret
1285	SET_SIZE(wait_500ms)
1286
1287#define	RESET_METHOD_KBC	1
1288#define	RESET_METHOD_PORT92	2
1289#define RESET_METHOD_PCI	4
1290
1291	DGDEF3(pc_reset_methods, 4, 8)
1292	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1293
1294	ENTRY(pc_reset)
1295
1296#if defined(__i386)
1297	testl	$RESET_METHOD_KBC, pc_reset_methods
1298#elif defined(__amd64)
1299	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1300#endif
1301	jz	1f
1302
1303	/
1304	/ Try the classic keyboard controller-triggered reset.
1305	/
1306	movw	$0x64, %dx
1307	movb	$0xfe, %al
1308	outb	(%dx)
1309
1310	/ Wait up to 500 milliseconds here for the keyboard controller
1311	/ to pull the reset line.  On some systems where the keyboard
1312	/ controller is slow to pull the reset line, the next reset method
1313	/ may be executed (which may be bad if those systems hang when the
1314	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1315	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1316
1317	call	wait_500ms
1318
13191:
1320#if defined(__i386)
1321	testl	$RESET_METHOD_PORT92, pc_reset_methods
1322#elif defined(__amd64)
1323	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1324#endif
1325	jz	3f
1326
1327	/
1328	/ Try port 0x92 fast reset
1329	/
1330	movw	$0x92, %dx
1331	inb	(%dx)
1332	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1333	je	1f
1334	testb	$1, %al		/ If bit 0
1335	jz	2f		/ is clear, jump to perform the reset
1336	andb	$0xfe, %al	/ otherwise,
1337	outb	(%dx)		/ clear bit 0 first, then
13382:
1339	orb	$1, %al		/ Set bit 0
1340	outb	(%dx)		/ and reset the system
13411:
1342
1343	call	wait_500ms
1344
13453:
1346#if defined(__i386)
1347	testl	$RESET_METHOD_PCI, pc_reset_methods
1348#elif defined(__amd64)
1349	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1350#endif
1351	jz	4f
1352
1353	/ Try the PCI (soft) reset vector (should work on all modern systems,
1354	/ but has been shown to cause problems on 450NX systems, and some newer
1355	/ systems (e.g. ATI IXP400-equipped systems))
1356	/ When resetting via this method, 2 writes are required.  The first
1357	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1358	/ power cycle).
1359	/ The reset occurs on the second write, during bit 2's transition from
1360	/ 0->1.
1361	movw	$0xcf9, %dx
1362	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1363	outb	(%dx)
1364	movb	$0x6, %al
1365	outb	(%dx)
1366
1367	call	wait_500ms
1368
13694:
1370	/
1371	/ port 0xcf9 failed also.  Last-ditch effort is to
1372	/ triple-fault the CPU.
1373	/ Also, use triple fault for EFI firmware
1374	/
1375	ENTRY(efi_reset)
1376#if defined(__amd64)
1377	pushq	$0x0
1378	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1379	lidt	(%rsp)
1380#elif defined(__i386)
1381	pushl	$0x0
1382	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1383	lidt	(%esp)
1384#endif
1385	int	$0x0		/ Trigger interrupt, generate triple-fault
1386
1387	cli
1388	hlt			/ Wait forever
1389	/*NOTREACHED*/
1390	SET_SIZE(efi_reset)
1391	SET_SIZE(pc_reset)
1392
1393#endif	/* __lint */
1394
1395/*
1396 * C callable in and out routines
1397 */
1398
1399#if defined(__lint)
1400
1401/* ARGSUSED */
1402void
1403outl(int port_address, uint32_t val)
1404{}
1405
1406#else	/* __lint */
1407
1408#if defined(__amd64)
1409
1410	ENTRY(outl)
1411	movw	%di, %dx
1412	movl	%esi, %eax
1413	outl	(%dx)
1414	ret
1415	SET_SIZE(outl)
1416
1417#elif defined(__i386)
1418
1419	.set	PORT, 4
1420	.set	VAL, 8
1421
1422	ENTRY(outl)
1423	movw	PORT(%esp), %dx
1424	movl	VAL(%esp), %eax
1425	outl	(%dx)
1426	ret
1427	SET_SIZE(outl)
1428
1429#endif	/* __i386 */
1430#endif	/* __lint */
1431
1432#if defined(__lint)
1433
1434/* ARGSUSED */
1435void
1436outw(int port_address, uint16_t val)
1437{}
1438
1439#else	/* __lint */
1440
1441#if defined(__amd64)
1442
1443	ENTRY(outw)
1444	movw	%di, %dx
1445	movw	%si, %ax
1446	D16 outl (%dx)		/* XX64 why not outw? */
1447	ret
1448	SET_SIZE(outw)
1449
1450#elif defined(__i386)
1451
1452	ENTRY(outw)
1453	movw	PORT(%esp), %dx
1454	movw	VAL(%esp), %ax
1455	D16 outl (%dx)
1456	ret
1457	SET_SIZE(outw)
1458
1459#endif	/* __i386 */
1460#endif	/* __lint */
1461
1462#if defined(__lint)
1463
1464/* ARGSUSED */
1465void
1466outb(int port_address, uint8_t val)
1467{}
1468
1469#else	/* __lint */
1470
1471#if defined(__amd64)
1472
1473	ENTRY(outb)
1474	movw	%di, %dx
1475	movb	%sil, %al
1476	outb	(%dx)
1477	ret
1478	SET_SIZE(outb)
1479
1480#elif defined(__i386)
1481
1482	ENTRY(outb)
1483	movw	PORT(%esp), %dx
1484	movb	VAL(%esp), %al
1485	outb	(%dx)
1486	ret
1487	SET_SIZE(outb)
1488
1489#endif	/* __i386 */
1490#endif	/* __lint */
1491
1492#if defined(__lint)
1493
1494/* ARGSUSED */
1495uint32_t
1496inl(int port_address)
1497{ return (0); }
1498
1499#else	/* __lint */
1500
1501#if defined(__amd64)
1502
1503	ENTRY(inl)
1504	xorl	%eax, %eax
1505	movw	%di, %dx
1506	inl	(%dx)
1507	ret
1508	SET_SIZE(inl)
1509
1510#elif defined(__i386)
1511
1512	ENTRY(inl)
1513	movw	PORT(%esp), %dx
1514	inl	(%dx)
1515	ret
1516	SET_SIZE(inl)
1517
1518#endif	/* __i386 */
1519#endif	/* __lint */
1520
1521#if defined(__lint)
1522
1523/* ARGSUSED */
1524uint16_t
1525inw(int port_address)
1526{ return (0); }
1527
1528#else	/* __lint */
1529
1530#if defined(__amd64)
1531
1532	ENTRY(inw)
1533	xorl	%eax, %eax
1534	movw	%di, %dx
1535	D16 inl	(%dx)
1536	ret
1537	SET_SIZE(inw)
1538
1539#elif defined(__i386)
1540
1541	ENTRY(inw)
1542	subl	%eax, %eax
1543	movw	PORT(%esp), %dx
1544	D16 inl	(%dx)
1545	ret
1546	SET_SIZE(inw)
1547
1548#endif	/* __i386 */
1549#endif	/* __lint */
1550
1551
1552#if defined(__lint)
1553
1554/* ARGSUSED */
1555uint8_t
1556inb(int port_address)
1557{ return (0); }
1558
1559#else	/* __lint */
1560
1561#if defined(__amd64)
1562
1563	ENTRY(inb)
1564	xorl	%eax, %eax
1565	movw	%di, %dx
1566	inb	(%dx)
1567	ret
1568	SET_SIZE(inb)
1569
1570#elif defined(__i386)
1571
1572	ENTRY(inb)
1573	subl    %eax, %eax
1574	movw	PORT(%esp), %dx
1575	inb	(%dx)
1576	ret
1577	SET_SIZE(inb)
1578
1579#endif	/* __i386 */
1580#endif	/* __lint */
1581
1582
1583#if defined(__lint)
1584
1585/* ARGSUSED */
1586void
1587repoutsw(int port, uint16_t *addr, int cnt)
1588{}
1589
1590#else	/* __lint */
1591
1592#if defined(__amd64)
1593
1594	ENTRY(repoutsw)
1595	movl	%edx, %ecx
1596	movw	%di, %dx
1597	rep
1598	  D16 outsl
1599	ret
1600	SET_SIZE(repoutsw)
1601
1602#elif defined(__i386)
1603
1604	/*
1605	 * The arguments and saved registers are on the stack in the
1606	 *  following order:
1607	 *      |  cnt  |  +16
1608	 *      | *addr |  +12
1609	 *      | port  |  +8
1610	 *      |  eip  |  +4
1611	 *      |  esi  |  <-- %esp
1612	 * If additional values are pushed onto the stack, make sure
1613	 * to adjust the following constants accordingly.
1614	 */
1615	.set	PORT, 8
1616	.set	ADDR, 12
1617	.set	COUNT, 16
1618
1619	ENTRY(repoutsw)
1620	pushl	%esi
1621	movl	PORT(%esp), %edx
1622	movl	ADDR(%esp), %esi
1623	movl	COUNT(%esp), %ecx
1624	rep
1625	  D16 outsl
1626	popl	%esi
1627	ret
1628	SET_SIZE(repoutsw)
1629
1630#endif	/* __i386 */
1631#endif	/* __lint */
1632
1633
1634#if defined(__lint)
1635
1636/* ARGSUSED */
1637void
1638repinsw(int port_addr, uint16_t *addr, int cnt)
1639{}
1640
1641#else	/* __lint */
1642
1643#if defined(__amd64)
1644
1645	ENTRY(repinsw)
1646	movl	%edx, %ecx
1647	movw	%di, %dx
1648	rep
1649	  D16 insl
1650	ret
1651	SET_SIZE(repinsw)
1652
1653#elif defined(__i386)
1654
1655	ENTRY(repinsw)
1656	pushl	%edi
1657	movl	PORT(%esp), %edx
1658	movl	ADDR(%esp), %edi
1659	movl	COUNT(%esp), %ecx
1660	rep
1661	  D16 insl
1662	popl	%edi
1663	ret
1664	SET_SIZE(repinsw)
1665
1666#endif	/* __i386 */
1667#endif	/* __lint */
1668
1669
1670#if defined(__lint)
1671
1672/* ARGSUSED */
1673void
1674repinsb(int port, uint8_t *addr, int count)
1675{}
1676
1677#else	/* __lint */
1678
1679#if defined(__amd64)
1680
1681	ENTRY(repinsb)
1682	movl	%edx, %ecx
1683	movw	%di, %dx
1684	movq	%rsi, %rdi
1685	rep
1686	  insb
1687	ret
1688	SET_SIZE(repinsb)
1689
1690#elif defined(__i386)
1691
1692	/*
1693	 * The arguments and saved registers are on the stack in the
1694	 *  following order:
1695	 *      |  cnt  |  +16
1696	 *      | *addr |  +12
1697	 *      | port  |  +8
1698	 *      |  eip  |  +4
1699	 *      |  esi  |  <-- %esp
1700	 * If additional values are pushed onto the stack, make sure
1701	 * to adjust the following constants accordingly.
1702	 */
1703	.set	IO_PORT, 8
1704	.set	IO_ADDR, 12
1705	.set	IO_COUNT, 16
1706
1707	ENTRY(repinsb)
1708	pushl	%edi
1709	movl	IO_ADDR(%esp), %edi
1710	movl	IO_COUNT(%esp), %ecx
1711	movl	IO_PORT(%esp), %edx
1712	rep
1713	  insb
1714	popl	%edi
1715	ret
1716	SET_SIZE(repinsb)
1717
1718#endif	/* __i386 */
1719#endif	/* __lint */
1720
1721
1722/*
1723 * Input a stream of 32-bit words.
1724 * NOTE: count is a DWORD count.
1725 */
1726#if defined(__lint)
1727
1728/* ARGSUSED */
1729void
1730repinsd(int port, uint32_t *addr, int count)
1731{}
1732
1733#else	/* __lint */
1734
1735#if defined(__amd64)
1736
1737	ENTRY(repinsd)
1738	movl	%edx, %ecx
1739	movw	%di, %dx
1740	movq	%rsi, %rdi
1741	rep
1742	  insl
1743	ret
1744	SET_SIZE(repinsd)
1745
1746#elif defined(__i386)
1747
1748	ENTRY(repinsd)
1749	pushl	%edi
1750	movl	IO_ADDR(%esp), %edi
1751	movl	IO_COUNT(%esp), %ecx
1752	movl	IO_PORT(%esp), %edx
1753	rep
1754	  insl
1755	popl	%edi
1756	ret
1757	SET_SIZE(repinsd)
1758
1759#endif	/* __i386 */
1760#endif	/* __lint */
1761
1762/*
1763 * Output a stream of bytes
1764 * NOTE: count is a byte count
1765 */
1766#if defined(__lint)
1767
1768/* ARGSUSED */
1769void
1770repoutsb(int port, uint8_t *addr, int count)
1771{}
1772
1773#else	/* __lint */
1774
1775#if defined(__amd64)
1776
1777	ENTRY(repoutsb)
1778	movl	%edx, %ecx
1779	movw	%di, %dx
1780	rep
1781	  outsb
1782	ret
1783	SET_SIZE(repoutsb)
1784
1785#elif defined(__i386)
1786
1787	ENTRY(repoutsb)
1788	pushl	%esi
1789	movl	IO_ADDR(%esp), %esi
1790	movl	IO_COUNT(%esp), %ecx
1791	movl	IO_PORT(%esp), %edx
1792	rep
1793	  outsb
1794	popl	%esi
1795	ret
1796	SET_SIZE(repoutsb)
1797
1798#endif	/* __i386 */
1799#endif	/* __lint */
1800
1801/*
1802 * Output a stream of 32-bit words
1803 * NOTE: count is a DWORD count
1804 */
1805#if defined(__lint)
1806
1807/* ARGSUSED */
1808void
1809repoutsd(int port, uint32_t *addr, int count)
1810{}
1811
1812#else	/* __lint */
1813
1814#if defined(__amd64)
1815
1816	ENTRY(repoutsd)
1817	movl	%edx, %ecx
1818	movw	%di, %dx
1819	rep
1820	  outsl
1821	ret
1822	SET_SIZE(repoutsd)
1823
1824#elif defined(__i386)
1825
1826	ENTRY(repoutsd)
1827	pushl	%esi
1828	movl	IO_ADDR(%esp), %esi
1829	movl	IO_COUNT(%esp), %ecx
1830	movl	IO_PORT(%esp), %edx
1831	rep
1832	  outsl
1833	popl	%esi
1834	ret
1835	SET_SIZE(repoutsd)
1836
1837#endif	/* __i386 */
1838#endif	/* __lint */
1839
1840/*
1841 * void int3(void)
1842 * void int18(void)
1843 * void int20(void)
1844 * void int_cmci(void)
1845 */
1846
1847#if defined(__lint)
1848
1849void
1850int3(void)
1851{}
1852
1853void
1854int18(void)
1855{}
1856
1857void
1858int20(void)
1859{}
1860
1861void
1862int_cmci(void)
1863{}
1864
1865#else	/* __lint */
1866
1867	ENTRY(int3)
1868	int	$T_BPTFLT
1869	ret
1870	SET_SIZE(int3)
1871
1872	ENTRY(int18)
1873	int	$T_MCE
1874	ret
1875	SET_SIZE(int18)
1876
1877	ENTRY(int20)
1878	movl	boothowto, %eax
1879	andl	$RB_DEBUG, %eax
1880	jz	1f
1881
1882	int	$T_DBGENTR
18831:
1884	rep;	ret	/* use 2 byte return instruction when branch target */
1885			/* AMD Software Optimization Guide - Section 6.2 */
1886	SET_SIZE(int20)
1887
1888	ENTRY(int_cmci)
1889	int	$T_ENOEXTFLT
1890	ret
1891	SET_SIZE(int_cmci)
1892
1893#endif	/* __lint */
1894
1895#if defined(__lint)
1896
1897/* ARGSUSED */
1898int
1899scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1900{ return (0); }
1901
1902#else	/* __lint */
1903
1904#if defined(__amd64)
1905
1906	ENTRY(scanc)
1907					/* rdi == size */
1908					/* rsi == cp */
1909					/* rdx == table */
1910					/* rcx == mask */
1911	addq	%rsi, %rdi		/* end = &cp[size] */
1912.scanloop:
1913	cmpq	%rdi, %rsi		/* while (cp < end */
1914	jnb	.scandone
1915	movzbq	(%rsi), %r8		/* %r8 = *cp */
1916	incq	%rsi			/* cp++ */
1917	testb	%cl, (%r8, %rdx)
1918	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1919	decq	%rsi			/* (fix post-increment) */
1920.scandone:
1921	movl	%edi, %eax
1922	subl	%esi, %eax		/* return (end - cp) */
1923	ret
1924	SET_SIZE(scanc)
1925
1926#elif defined(__i386)
1927
1928	ENTRY(scanc)
1929	pushl	%edi
1930	pushl	%esi
1931	movb	24(%esp), %cl		/* mask = %cl */
1932	movl	16(%esp), %esi		/* cp = %esi */
1933	movl	20(%esp), %edx		/* table = %edx */
1934	movl	%esi, %edi
1935	addl	12(%esp), %edi		/* end = &cp[size]; */
1936.scanloop:
1937	cmpl	%edi, %esi		/* while (cp < end */
1938	jnb	.scandone
1939	movzbl	(%esi),  %eax		/* %al = *cp */
1940	incl	%esi			/* cp++ */
1941	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1942	testb	%al, %cl
1943	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1944	dec	%esi			/* post-incremented */
1945.scandone:
1946	movl	%edi, %eax
1947	subl	%esi, %eax		/* return (end - cp) */
1948	popl	%esi
1949	popl	%edi
1950	ret
1951	SET_SIZE(scanc)
1952
1953#endif	/* __i386 */
1954#endif	/* __lint */
1955
1956/*
1957 * Replacement functions for ones that are normally inlined.
1958 * In addition to the copy in i86.il, they are defined here just in case.
1959 */
1960
1961#if defined(__lint)
1962
1963ulong_t
1964intr_clear(void)
1965{ return (0); }
1966
1967ulong_t
1968clear_int_flag(void)
1969{ return (0); }
1970
1971#else	/* __lint */
1972
1973#if defined(__amd64)
1974
1975	ENTRY(intr_clear)
1976	ENTRY(clear_int_flag)
1977	pushfq
1978	popq	%rax
1979#if defined(__xpv)
1980	leaq	xpv_panicking, %rdi
1981	movl	(%rdi), %edi
1982	cmpl	$0, %edi
1983	jne	2f
1984	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1985	/*
1986	 * Synthesize the PS_IE bit from the event mask bit
1987	 */
1988	andq    $_BITNOT(PS_IE), %rax
1989	testb	$1, %dl
1990	jnz	1f
1991	orq	$PS_IE, %rax
19921:
1993	ret
19942:
1995#endif
1996	CLI(%rdi)
1997	ret
1998	SET_SIZE(clear_int_flag)
1999	SET_SIZE(intr_clear)
2000
2001#elif defined(__i386)
2002
2003	ENTRY(intr_clear)
2004	ENTRY(clear_int_flag)
2005	pushfl
2006	popl	%eax
2007#if defined(__xpv)
2008	leal	xpv_panicking, %edx
2009	movl	(%edx), %edx
2010	cmpl	$0, %edx
2011	jne	2f
2012	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2013	/*
2014	 * Synthesize the PS_IE bit from the event mask bit
2015	 */
2016	andl    $_BITNOT(PS_IE), %eax
2017	testb	$1, %cl
2018	jnz	1f
2019	orl	$PS_IE, %eax
20201:
2021	ret
20222:
2023#endif
2024	CLI(%edx)
2025	ret
2026	SET_SIZE(clear_int_flag)
2027	SET_SIZE(intr_clear)
2028
2029#endif	/* __i386 */
2030#endif	/* __lint */
2031
2032#if defined(__lint)
2033
2034struct cpu *
2035curcpup(void)
2036{ return 0; }
2037
2038#else	/* __lint */
2039
2040#if defined(__amd64)
2041
2042	ENTRY(curcpup)
2043	movq	%gs:CPU_SELF, %rax
2044	ret
2045	SET_SIZE(curcpup)
2046
2047#elif defined(__i386)
2048
2049	ENTRY(curcpup)
2050	movl	%gs:CPU_SELF, %eax
2051	ret
2052	SET_SIZE(curcpup)
2053
2054#endif	/* __i386 */
2055#endif	/* __lint */
2056
2057/* htonll(), ntohll(), htonl(), ntohl(), htons(), ntohs()
2058 * These functions reverse the byte order of the input parameter and returns
2059 * the result.  This is to convert the byte order from host byte order
2060 * (little endian) to network byte order (big endian), or vice versa.
2061 */
2062
2063#if defined(__lint)
2064
2065uint64_t
2066htonll(uint64_t i)
2067{ return (i); }
2068
2069uint64_t
2070ntohll(uint64_t i)
2071{ return (i); }
2072
2073uint32_t
2074htonl(uint32_t i)
2075{ return (i); }
2076
2077uint32_t
2078ntohl(uint32_t i)
2079{ return (i); }
2080
2081uint16_t
2082htons(uint16_t i)
2083{ return (i); }
2084
2085uint16_t
2086ntohs(uint16_t i)
2087{ return (i); }
2088
2089#else	/* __lint */
2090
2091#if defined(__amd64)
2092
2093	ENTRY(htonll)
2094	ALTENTRY(ntohll)
2095	movq	%rdi, %rax
2096	bswapq	%rax
2097	ret
2098	SET_SIZE(ntohll)
2099	SET_SIZE(htonll)
2100
2101	/* XX64 there must be shorter sequences for this */
2102	ENTRY(htonl)
2103	ALTENTRY(ntohl)
2104	movl	%edi, %eax
2105	bswap	%eax
2106	ret
2107	SET_SIZE(ntohl)
2108	SET_SIZE(htonl)
2109
2110	/* XX64 there must be better sequences for this */
2111	ENTRY(htons)
2112	ALTENTRY(ntohs)
2113	movl	%edi, %eax
2114	bswap	%eax
2115	shrl	$16, %eax
2116	ret
2117	SET_SIZE(ntohs)
2118	SET_SIZE(htons)
2119
2120#elif defined(__i386)
2121
2122	ENTRY(htonll)
2123	ALTENTRY(ntohll)
2124	movl	4(%esp), %edx
2125	movl	8(%esp), %eax
2126	bswap	%edx
2127	bswap	%eax
2128	ret
2129	SET_SIZE(ntohll)
2130	SET_SIZE(htonll)
2131
2132	ENTRY(htonl)
2133	ALTENTRY(ntohl)
2134	movl	4(%esp), %eax
2135	bswap	%eax
2136	ret
2137	SET_SIZE(ntohl)
2138	SET_SIZE(htonl)
2139
2140	ENTRY(htons)
2141	ALTENTRY(ntohs)
2142	movl	4(%esp), %eax
2143	bswap	%eax
2144	shrl	$16, %eax
2145	ret
2146	SET_SIZE(ntohs)
2147	SET_SIZE(htons)
2148
2149#endif	/* __i386 */
2150#endif	/* __lint */
2151
2152
2153#if defined(__lint)
2154
2155/* ARGSUSED */
2156void
2157intr_restore(ulong_t i)
2158{ return; }
2159
2160/* ARGSUSED */
2161void
2162restore_int_flag(ulong_t i)
2163{ return; }
2164
2165#else	/* __lint */
2166
2167#if defined(__amd64)
2168
2169	ENTRY(intr_restore)
2170	ENTRY(restore_int_flag)
2171	testq	$PS_IE, %rdi
2172	jz	1f
2173#if defined(__xpv)
2174	leaq	xpv_panicking, %rsi
2175	movl	(%rsi), %esi
2176	cmpl	$0, %esi
2177	jne	1f
2178	/*
2179	 * Since we're -really- running unprivileged, our attempt
2180	 * to change the state of the IF bit will be ignored.
2181	 * The virtual IF bit is tweaked by CLI and STI.
2182	 */
2183	IE_TO_EVENT_MASK(%rsi, %rdi)
2184#else
2185	sti
2186#endif
21871:
2188	ret
2189	SET_SIZE(restore_int_flag)
2190	SET_SIZE(intr_restore)
2191
2192#elif defined(__i386)
2193
2194	ENTRY(intr_restore)
2195	ENTRY(restore_int_flag)
2196	testl	$PS_IE, 4(%esp)
2197	jz	1f
2198#if defined(__xpv)
2199	leal	xpv_panicking, %edx
2200	movl	(%edx), %edx
2201	cmpl	$0, %edx
2202	jne	1f
2203	/*
2204	 * Since we're -really- running unprivileged, our attempt
2205	 * to change the state of the IF bit will be ignored.
2206	 * The virtual IF bit is tweaked by CLI and STI.
2207	 */
2208	IE_TO_EVENT_MASK(%edx, 4(%esp))
2209#else
2210	sti
2211#endif
22121:
2213	ret
2214	SET_SIZE(restore_int_flag)
2215	SET_SIZE(intr_restore)
2216
2217#endif	/* __i386 */
2218#endif	/* __lint */
2219
2220#if defined(__lint)
2221
2222void
2223sti(void)
2224{}
2225
2226void
2227cli(void)
2228{}
2229
2230#else	/* __lint */
2231
2232	ENTRY(sti)
2233	STI
2234	ret
2235	SET_SIZE(sti)
2236
2237	ENTRY(cli)
2238#if defined(__amd64)
2239	CLI(%rax)
2240#elif defined(__i386)
2241	CLI(%eax)
2242#endif	/* __i386 */
2243	ret
2244	SET_SIZE(cli)
2245
2246#endif	/* __lint */
2247
2248#if defined(__lint)
2249
2250dtrace_icookie_t
2251dtrace_interrupt_disable(void)
2252{ return (0); }
2253
2254#else   /* __lint */
2255
2256#if defined(__amd64)
2257
2258	ENTRY(dtrace_interrupt_disable)
2259	pushfq
2260	popq	%rax
2261#if defined(__xpv)
2262	leaq	xpv_panicking, %rdi
2263	movl	(%rdi), %edi
2264	cmpl	$0, %edi
2265	jne	.dtrace_interrupt_disable_done
2266	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2267	/*
2268	 * Synthesize the PS_IE bit from the event mask bit
2269	 */
2270	andq    $_BITNOT(PS_IE), %rax
2271	testb	$1, %dl
2272	jnz	.dtrace_interrupt_disable_done
2273	orq	$PS_IE, %rax
2274#else
2275	CLI(%rdx)
2276#endif
2277.dtrace_interrupt_disable_done:
2278	ret
2279	SET_SIZE(dtrace_interrupt_disable)
2280
2281#elif defined(__i386)
2282
2283	ENTRY(dtrace_interrupt_disable)
2284	pushfl
2285	popl	%eax
2286#if defined(__xpv)
2287	leal	xpv_panicking, %edx
2288	movl	(%edx), %edx
2289	cmpl	$0, %edx
2290	jne	.dtrace_interrupt_disable_done
2291	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2292	/*
2293	 * Synthesize the PS_IE bit from the event mask bit
2294	 */
2295	andl    $_BITNOT(PS_IE), %eax
2296	testb	$1, %cl
2297	jnz	.dtrace_interrupt_disable_done
2298	orl	$PS_IE, %eax
2299#else
2300	CLI(%edx)
2301#endif
2302.dtrace_interrupt_disable_done:
2303	ret
2304	SET_SIZE(dtrace_interrupt_disable)
2305
2306#endif	/* __i386 */
2307#endif	/* __lint */
2308
2309#if defined(__lint)
2310
2311/*ARGSUSED*/
2312void
2313dtrace_interrupt_enable(dtrace_icookie_t cookie)
2314{}
2315
2316#else	/* __lint */
2317
2318#if defined(__amd64)
2319
2320	ENTRY(dtrace_interrupt_enable)
2321	pushq	%rdi
2322	popfq
2323#if defined(__xpv)
2324	leaq	xpv_panicking, %rdx
2325	movl	(%rdx), %edx
2326	cmpl	$0, %edx
2327	jne	.dtrace_interrupt_enable_done
2328	/*
2329	 * Since we're -really- running unprivileged, our attempt
2330	 * to change the state of the IF bit will be ignored. The
2331	 * virtual IF bit is tweaked by CLI and STI.
2332	 */
2333	IE_TO_EVENT_MASK(%rdx, %rdi)
2334#endif
2335.dtrace_interrupt_enable_done:
2336	ret
2337	SET_SIZE(dtrace_interrupt_enable)
2338
2339#elif defined(__i386)
2340
2341	ENTRY(dtrace_interrupt_enable)
2342	movl	4(%esp), %eax
2343	pushl	%eax
2344	popfl
2345#if defined(__xpv)
2346	leal	xpv_panicking, %edx
2347	movl	(%edx), %edx
2348	cmpl	$0, %edx
2349	jne	.dtrace_interrupt_enable_done
2350	/*
2351	 * Since we're -really- running unprivileged, our attempt
2352	 * to change the state of the IF bit will be ignored. The
2353	 * virtual IF bit is tweaked by CLI and STI.
2354	 */
2355	IE_TO_EVENT_MASK(%edx, %eax)
2356#endif
2357.dtrace_interrupt_enable_done:
2358	ret
2359	SET_SIZE(dtrace_interrupt_enable)
2360
2361#endif	/* __i386 */
2362#endif	/* __lint */
2363
2364
2365#if defined(lint)
2366
2367void
2368dtrace_membar_producer(void)
2369{}
2370
2371void
2372dtrace_membar_consumer(void)
2373{}
2374
2375#else	/* __lint */
2376
2377	ENTRY(dtrace_membar_producer)
2378	rep;	ret	/* use 2 byte return instruction when branch target */
2379			/* AMD Software Optimization Guide - Section 6.2 */
2380	SET_SIZE(dtrace_membar_producer)
2381
2382	ENTRY(dtrace_membar_consumer)
2383	rep;	ret	/* use 2 byte return instruction when branch target */
2384			/* AMD Software Optimization Guide - Section 6.2 */
2385	SET_SIZE(dtrace_membar_consumer)
2386
2387#endif	/* __lint */
2388
2389#if defined(__lint)
2390
2391kthread_id_t
2392threadp(void)
2393{ return ((kthread_id_t)0); }
2394
2395#else	/* __lint */
2396
2397#if defined(__amd64)
2398
2399	ENTRY(threadp)
2400	movq	%gs:CPU_THREAD, %rax
2401	ret
2402	SET_SIZE(threadp)
2403
2404#elif defined(__i386)
2405
2406	ENTRY(threadp)
2407	movl	%gs:CPU_THREAD, %eax
2408	ret
2409	SET_SIZE(threadp)
2410
2411#endif	/* __i386 */
2412#endif	/* __lint */
2413
2414/*
2415 *   Checksum routine for Internet Protocol Headers
2416 */
2417
2418#if defined(__lint)
2419
2420/* ARGSUSED */
2421unsigned int
2422ip_ocsum(
2423	ushort_t *address,	/* ptr to 1st message buffer */
2424	int halfword_count,	/* length of data */
2425	unsigned int sum)	/* partial checksum */
2426{
2427	int		i;
2428	unsigned int	psum = 0;	/* partial sum */
2429
2430	for (i = 0; i < halfword_count; i++, address++) {
2431		psum += *address;
2432	}
2433
2434	while ((psum >> 16) != 0) {
2435		psum = (psum & 0xffff) + (psum >> 16);
2436	}
2437
2438	psum += sum;
2439
2440	while ((psum >> 16) != 0) {
2441		psum = (psum & 0xffff) + (psum >> 16);
2442	}
2443
2444	return (psum);
2445}
2446
2447#else	/* __lint */
2448
2449#if defined(__amd64)
2450
2451	ENTRY(ip_ocsum)
2452	pushq	%rbp
2453	movq	%rsp, %rbp
2454#ifdef DEBUG
2455	movq	postbootkernelbase(%rip), %rax
2456	cmpq	%rax, %rdi
2457	jnb	1f
2458	xorl	%eax, %eax
2459	movq	%rdi, %rsi
2460	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2461	call	panic
2462	/*NOTREACHED*/
2463.ip_ocsum_panic_msg:
2464	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24651:
2466#endif
2467	movl	%esi, %ecx	/* halfword_count */
2468	movq	%rdi, %rsi	/* address */
2469				/* partial sum in %edx */
2470	xorl	%eax, %eax
2471	testl	%ecx, %ecx
2472	jz	.ip_ocsum_done
2473	testq	$3, %rsi
2474	jnz	.ip_csum_notaligned
2475.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2476.next_iter:
2477	/* XX64 opportunities for prefetch? */
2478	/* XX64 compute csum with 64 bit quantities? */
2479	subl	$32, %ecx
2480	jl	.less_than_32
2481
2482	addl	0(%rsi), %edx
2483.only60:
2484	adcl	4(%rsi), %eax
2485.only56:
2486	adcl	8(%rsi), %edx
2487.only52:
2488	adcl	12(%rsi), %eax
2489.only48:
2490	adcl	16(%rsi), %edx
2491.only44:
2492	adcl	20(%rsi), %eax
2493.only40:
2494	adcl	24(%rsi), %edx
2495.only36:
2496	adcl	28(%rsi), %eax
2497.only32:
2498	adcl	32(%rsi), %edx
2499.only28:
2500	adcl	36(%rsi), %eax
2501.only24:
2502	adcl	40(%rsi), %edx
2503.only20:
2504	adcl	44(%rsi), %eax
2505.only16:
2506	adcl	48(%rsi), %edx
2507.only12:
2508	adcl	52(%rsi), %eax
2509.only8:
2510	adcl	56(%rsi), %edx
2511.only4:
2512	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2513.only0:
2514	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2515	adcl	$0, %eax
2516
2517	addq	$64, %rsi
2518	testl	%ecx, %ecx
2519	jnz	.next_iter
2520
2521.ip_ocsum_done:
2522	addl	%eax, %edx
2523	adcl	$0, %edx
2524	movl	%edx, %eax	/* form a 16 bit checksum by */
2525	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2526	addw	%dx, %ax
2527	adcw	$0, %ax
2528	andl	$0xffff, %eax
2529	leave
2530	ret
2531
2532.ip_csum_notaligned:
2533	xorl	%edi, %edi
2534	movw	(%rsi), %di
2535	addl	%edi, %edx
2536	adcl	$0, %edx
2537	addq	$2, %rsi
2538	decl	%ecx
2539	jmp	.ip_csum_aligned
2540
2541.less_than_32:
2542	addl	$32, %ecx
2543	testl	$1, %ecx
2544	jz	.size_aligned
2545	andl	$0xfe, %ecx
2546	movzwl	(%rsi, %rcx, 2), %edi
2547	addl	%edi, %edx
2548	adcl	$0, %edx
2549.size_aligned:
2550	movl	%ecx, %edi
2551	shrl	$1, %ecx
2552	shl	$1, %edi
2553	subq	$64, %rdi
2554	addq	%rdi, %rsi
2555	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2556	leaq	(%rdi, %rcx, 8), %rdi
2557	xorl	%ecx, %ecx
2558	clc
2559	jmp 	*(%rdi)
2560
2561	.align	8
2562.ip_ocsum_jmptbl:
2563	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2564	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2565	.quad	.only48, .only52, .only56, .only60
2566	SET_SIZE(ip_ocsum)
2567
2568#elif defined(__i386)
2569
2570	ENTRY(ip_ocsum)
2571	pushl	%ebp
2572	movl	%esp, %ebp
2573	pushl	%ebx
2574	pushl	%esi
2575	pushl	%edi
2576	movl	12(%ebp), %ecx	/* count of half words */
2577	movl	16(%ebp), %edx	/* partial checksum */
2578	movl	8(%ebp), %esi
2579	xorl	%eax, %eax
2580	testl	%ecx, %ecx
2581	jz	.ip_ocsum_done
2582
2583	testl	$3, %esi
2584	jnz	.ip_csum_notaligned
2585.ip_csum_aligned:
2586.next_iter:
2587	subl	$32, %ecx
2588	jl	.less_than_32
2589
2590	addl	0(%esi), %edx
2591.only60:
2592	adcl	4(%esi), %eax
2593.only56:
2594	adcl	8(%esi), %edx
2595.only52:
2596	adcl	12(%esi), %eax
2597.only48:
2598	adcl	16(%esi), %edx
2599.only44:
2600	adcl	20(%esi), %eax
2601.only40:
2602	adcl	24(%esi), %edx
2603.only36:
2604	adcl	28(%esi), %eax
2605.only32:
2606	adcl	32(%esi), %edx
2607.only28:
2608	adcl	36(%esi), %eax
2609.only24:
2610	adcl	40(%esi), %edx
2611.only20:
2612	adcl	44(%esi), %eax
2613.only16:
2614	adcl	48(%esi), %edx
2615.only12:
2616	adcl	52(%esi), %eax
2617.only8:
2618	adcl	56(%esi), %edx
2619.only4:
2620	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2621.only0:
2622	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2623	adcl	$0, %eax
2624
2625	addl	$64, %esi
2626	andl	%ecx, %ecx
2627	jnz	.next_iter
2628
2629.ip_ocsum_done:
2630	addl	%eax, %edx
2631	adcl	$0, %edx
2632	movl	%edx, %eax	/* form a 16 bit checksum by */
2633	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2634	addw	%dx, %ax
2635	adcw	$0, %ax
2636	andl	$0xffff, %eax
2637	popl	%edi		/* restore registers */
2638	popl	%esi
2639	popl	%ebx
2640	leave
2641	ret
2642
2643.ip_csum_notaligned:
2644	xorl	%edi, %edi
2645	movw	(%esi), %di
2646	addl	%edi, %edx
2647	adcl	$0, %edx
2648	addl	$2, %esi
2649	decl	%ecx
2650	jmp	.ip_csum_aligned
2651
2652.less_than_32:
2653	addl	$32, %ecx
2654	testl	$1, %ecx
2655	jz	.size_aligned
2656	andl	$0xfe, %ecx
2657	movzwl	(%esi, %ecx, 2), %edi
2658	addl	%edi, %edx
2659	adcl	$0, %edx
2660.size_aligned:
2661	movl	%ecx, %edi
2662	shrl	$1, %ecx
2663	shl	$1, %edi
2664	subl	$64, %edi
2665	addl	%edi, %esi
2666	movl	$.ip_ocsum_jmptbl, %edi
2667	lea	(%edi, %ecx, 4), %edi
2668	xorl	%ecx, %ecx
2669	clc
2670	jmp 	*(%edi)
2671	SET_SIZE(ip_ocsum)
2672
2673	.data
2674	.align	4
2675
2676.ip_ocsum_jmptbl:
2677	.long	.only0, .only4, .only8, .only12, .only16, .only20
2678	.long	.only24, .only28, .only32, .only36, .only40, .only44
2679	.long	.only48, .only52, .only56, .only60
2680
2681
2682#endif	/* __i386 */
2683#endif	/* __lint */
2684
2685/*
2686 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2687 * Provided to manipulate hrtime_t values.
2688 */
2689#if defined(__lint)
2690
2691/* result = a * b; */
2692
2693/* ARGSUSED */
2694unsigned long long
2695mul32(uint_t a, uint_t b)
2696{ return (0); }
2697
2698#else	/* __lint */
2699
2700#if defined(__amd64)
2701
2702	ENTRY(mul32)
2703	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2704	movl	%edi, %eax
2705	mull	%esi
2706	shlq	$32, %rdx
2707	orq	%rdx, %rax
2708	ret
2709	SET_SIZE(mul32)
2710
2711#elif defined(__i386)
2712
2713	ENTRY(mul32)
2714	movl	8(%esp), %eax
2715	movl	4(%esp), %ecx
2716	mull	%ecx
2717	ret
2718	SET_SIZE(mul32)
2719
2720#endif	/* __i386 */
2721#endif	/* __lint */
2722
2723#if defined(notused)
2724#if defined(__lint)
2725/* ARGSUSED */
2726void
2727load_pte64(uint64_t *pte, uint64_t pte_value)
2728{}
2729#else	/* __lint */
2730	.globl load_pte64
2731load_pte64:
2732	movl	4(%esp), %eax
2733	movl	8(%esp), %ecx
2734	movl	12(%esp), %edx
2735	movl	%edx, 4(%eax)
2736	movl	%ecx, (%eax)
2737	ret
2738#endif	/* __lint */
2739#endif	/* notused */
2740
2741#if defined(__lint)
2742
2743/*ARGSUSED*/
2744void
2745scan_memory(caddr_t addr, size_t size)
2746{}
2747
2748#else	/* __lint */
2749
2750#if defined(__amd64)
2751
2752	ENTRY(scan_memory)
2753	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2754	jz	.scanm_done
2755	movq	%rsi, %rcx	/* move count into rep control register */
2756	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2757	rep lodsq		/* scan the memory range */
2758.scanm_done:
2759	rep;	ret	/* use 2 byte return instruction when branch target */
2760			/* AMD Software Optimization Guide - Section 6.2 */
2761	SET_SIZE(scan_memory)
2762
2763#elif defined(__i386)
2764
2765	ENTRY(scan_memory)
2766	pushl	%ecx
2767	pushl	%esi
2768	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2769	shrl	$2, %ecx	/* convert from byte count to word count */
2770	jz	.scanm_done
2771	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2772	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2773	lodsl
2774.scanm_done:
2775	popl	%esi
2776	popl	%ecx
2777	ret
2778	SET_SIZE(scan_memory)
2779
2780#endif	/* __i386 */
2781#endif	/* __lint */
2782
2783
2784#if defined(__lint)
2785
2786/*ARGSUSED */
2787int
2788lowbit(ulong_t i)
2789{ return (0); }
2790
2791#else	/* __lint */
2792
2793#if defined(__amd64)
2794
2795	ENTRY(lowbit)
2796	movl	$-1, %eax
2797	bsfq	%rdi, %rax
2798	incl	%eax
2799	ret
2800	SET_SIZE(lowbit)
2801
2802#elif defined(__i386)
2803
2804	ENTRY(lowbit)
2805	movl	$-1, %eax
2806	bsfl	4(%esp), %eax
2807	incl	%eax
2808	ret
2809	SET_SIZE(lowbit)
2810
2811#endif	/* __i386 */
2812#endif	/* __lint */
2813
2814#if defined(__lint)
2815
2816/*ARGSUSED*/
2817int
2818highbit(ulong_t i)
2819{ return (0); }
2820
2821#else	/* __lint */
2822
2823#if defined(__amd64)
2824
2825	ENTRY(highbit)
2826	movl	$-1, %eax
2827	bsrq	%rdi, %rax
2828	incl	%eax
2829	ret
2830	SET_SIZE(highbit)
2831
2832#elif defined(__i386)
2833
2834	ENTRY(highbit)
2835	movl	$-1, %eax
2836	bsrl	4(%esp), %eax
2837	incl	%eax
2838	ret
2839	SET_SIZE(highbit)
2840
2841#endif	/* __i386 */
2842#endif	/* __lint */
2843
2844#if defined(__lint)
2845
2846/*ARGSUSED*/
2847uint64_t
2848rdmsr(uint_t r)
2849{ return (0); }
2850
2851/*ARGSUSED*/
2852void
2853wrmsr(uint_t r, const uint64_t val)
2854{}
2855
2856/*ARGSUSED*/
2857uint64_t
2858xrdmsr(uint_t r)
2859{ return (0); }
2860
2861/*ARGSUSED*/
2862void
2863xwrmsr(uint_t r, const uint64_t val)
2864{}
2865
2866void
2867invalidate_cache(void)
2868{}
2869
2870#else  /* __lint */
2871
2872#define	XMSR_ACCESS_VAL		$0x9c5a203a
2873
2874#if defined(__amd64)
2875
2876	ENTRY(rdmsr)
2877	movl	%edi, %ecx
2878	rdmsr
2879	shlq	$32, %rdx
2880	orq	%rdx, %rax
2881	ret
2882	SET_SIZE(rdmsr)
2883
2884	ENTRY(wrmsr)
2885	movq	%rsi, %rdx
2886	shrq	$32, %rdx
2887	movl	%esi, %eax
2888	movl	%edi, %ecx
2889	wrmsr
2890	ret
2891	SET_SIZE(wrmsr)
2892
2893	ENTRY(xrdmsr)
2894	pushq	%rbp
2895	movq	%rsp, %rbp
2896	movl	%edi, %ecx
2897	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2898	rdmsr
2899	shlq	$32, %rdx
2900	orq	%rdx, %rax
2901	leave
2902	ret
2903	SET_SIZE(xrdmsr)
2904
2905	ENTRY(xwrmsr)
2906	pushq	%rbp
2907	movq	%rsp, %rbp
2908	movl	%edi, %ecx
2909	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2910	movq	%rsi, %rdx
2911	shrq	$32, %rdx
2912	movl	%esi, %eax
2913	wrmsr
2914	leave
2915	ret
2916	SET_SIZE(xwrmsr)
2917
2918#elif defined(__i386)
2919
2920	ENTRY(rdmsr)
2921	movl	4(%esp), %ecx
2922	rdmsr
2923	ret
2924	SET_SIZE(rdmsr)
2925
2926	ENTRY(wrmsr)
2927	movl	4(%esp), %ecx
2928	movl	8(%esp), %eax
2929	movl	12(%esp), %edx
2930	wrmsr
2931	ret
2932	SET_SIZE(wrmsr)
2933
2934	ENTRY(xrdmsr)
2935	pushl	%ebp
2936	movl	%esp, %ebp
2937	movl	8(%esp), %ecx
2938	pushl	%edi
2939	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2940	rdmsr
2941	popl	%edi
2942	leave
2943	ret
2944	SET_SIZE(xrdmsr)
2945
2946	ENTRY(xwrmsr)
2947	pushl	%ebp
2948	movl	%esp, %ebp
2949	movl	8(%esp), %ecx
2950	movl	12(%esp), %eax
2951	movl	16(%esp), %edx
2952	pushl	%edi
2953	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2954	wrmsr
2955	popl	%edi
2956	leave
2957	ret
2958	SET_SIZE(xwrmsr)
2959
2960#endif	/* __i386 */
2961
2962	ENTRY(invalidate_cache)
2963	wbinvd
2964	ret
2965	SET_SIZE(invalidate_cache)
2966
2967#endif	/* __lint */
2968
2969#if defined(__lint)
2970
2971/*ARGSUSED*/
2972void
2973getcregs(struct cregs *crp)
2974{}
2975
2976#else	/* __lint */
2977
2978#if defined(__amd64)
2979
2980	ENTRY_NP(getcregs)
2981#if defined(__xpv)
2982	/*
2983	 * Only a few of the hardware control registers or descriptor tables
2984	 * are directly accessible to us, so just zero the structure.
2985	 *
2986	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2987	 *	virtualized versions of these for post-mortem use.
2988	 *	(Need to reevaluate - perhaps it already does!)
2989	 */
2990	pushq	%rdi		/* save *crp */
2991	movq	$CREGSZ, %rsi
2992	call	bzero
2993	popq	%rdi
2994
2995	/*
2996	 * Dump what limited information we can
2997	 */
2998	movq	%cr0, %rax
2999	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
3000	movq	%cr2, %rax
3001	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
3002	movq	%cr3, %rax
3003	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
3004	movq	%cr4, %rax
3005	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
3006
3007#else	/* __xpv */
3008
3009#define	GETMSR(r, off, d)	\
3010	movl	$r, %ecx;	\
3011	rdmsr;			\
3012	movl	%eax, off(d);	\
3013	movl	%edx, off+4(d)
3014
3015	xorl	%eax, %eax
3016	movq	%rax, CREG_GDT+8(%rdi)
3017	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
3018	movq	%rax, CREG_IDT+8(%rdi)
3019	sidt	CREG_IDT(%rdi)		/* 10 bytes */
3020	movq	%rax, CREG_LDT(%rdi)
3021	sldt	CREG_LDT(%rdi)		/* 2 bytes */
3022	movq	%rax, CREG_TASKR(%rdi)
3023	str	CREG_TASKR(%rdi)	/* 2 bytes */
3024	movq	%cr0, %rax
3025	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
3026	movq	%cr2, %rax
3027	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
3028	movq	%cr3, %rax
3029	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
3030	movq	%cr4, %rax
3031	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
3032	movq	%cr8, %rax
3033	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
3034	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
3035	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
3036#endif	/* __xpv */
3037	ret
3038	SET_SIZE(getcregs)
3039
3040#undef GETMSR
3041
3042#elif defined(__i386)
3043
3044	ENTRY_NP(getcregs)
3045#if defined(__xpv)
3046	/*
3047	 * Only a few of the hardware control registers or descriptor tables
3048	 * are directly accessible to us, so just zero the structure.
3049	 *
3050	 * XXPV	Perhaps it would be helpful for the hypervisor to return
3051	 *	virtualized versions of these for post-mortem use.
3052	 *	(Need to reevaluate - perhaps it already does!)
3053	 */
3054	movl	4(%esp), %edx
3055	pushl	$CREGSZ
3056	pushl	%edx
3057	call	bzero
3058	addl	$8, %esp
3059	movl	4(%esp), %edx
3060
3061	/*
3062	 * Dump what limited information we can
3063	 */
3064	movl	%cr0, %eax
3065	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3066	movl	%cr2, %eax
3067	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3068	movl	%cr3, %eax
3069	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3070	movl	%cr4, %eax
3071	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3072
3073#else	/* __xpv */
3074
3075	movl	4(%esp), %edx
3076	movw	$0, CREG_GDT+6(%edx)
3077	movw	$0, CREG_IDT+6(%edx)
3078	sgdt	CREG_GDT(%edx)		/* gdt */
3079	sidt	CREG_IDT(%edx)		/* idt */
3080	sldt	CREG_LDT(%edx)		/* ldt */
3081	str	CREG_TASKR(%edx)	/* task */
3082	movl	%cr0, %eax
3083	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3084	movl	%cr2, %eax
3085	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3086	movl	%cr3, %eax
3087	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3088	bt	$X86FSET_LARGEPAGE, x86_featureset
3089	jnc	.nocr4
3090	movl	%cr4, %eax
3091	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3092	jmp	.skip
3093.nocr4:
3094	movl	$0, CREG_CR4(%edx)
3095.skip:
3096#endif
3097	ret
3098	SET_SIZE(getcregs)
3099
3100#endif	/* __i386 */
3101#endif	/* __lint */
3102
3103
3104/*
3105 * A panic trigger is a word which is updated atomically and can only be set
3106 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3107 * previous value was 0, we succeed and return 1; otherwise return 0.
3108 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3109 * has its own version of this function to allow it to panic correctly from
3110 * probe context.
3111 */
3112#if defined(__lint)
3113
3114/*ARGSUSED*/
3115int
3116panic_trigger(int *tp)
3117{ return (0); }
3118
3119/*ARGSUSED*/
3120int
3121dtrace_panic_trigger(int *tp)
3122{ return (0); }
3123
3124#else	/* __lint */
3125
3126#if defined(__amd64)
3127
3128	ENTRY_NP(panic_trigger)
3129	xorl	%eax, %eax
3130	movl	$0xdefacedd, %edx
3131	lock
3132	  xchgl	%edx, (%rdi)
3133	cmpl	$0, %edx
3134	je	0f
3135	movl	$0, %eax
3136	ret
31370:	movl	$1, %eax
3138	ret
3139	SET_SIZE(panic_trigger)
3140
3141	ENTRY_NP(dtrace_panic_trigger)
3142	xorl	%eax, %eax
3143	movl	$0xdefacedd, %edx
3144	lock
3145	  xchgl	%edx, (%rdi)
3146	cmpl	$0, %edx
3147	je	0f
3148	movl	$0, %eax
3149	ret
31500:	movl	$1, %eax
3151	ret
3152	SET_SIZE(dtrace_panic_trigger)
3153
3154#elif defined(__i386)
3155
3156	ENTRY_NP(panic_trigger)
3157	movl	4(%esp), %edx		/ %edx = address of trigger
3158	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3159	lock				/ assert lock
3160	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3161	cmpl	$0, %eax		/ if (%eax == 0x0)
3162	je	0f			/   return (1);
3163	movl	$0, %eax		/ else
3164	ret				/   return (0);
31650:	movl	$1, %eax
3166	ret
3167	SET_SIZE(panic_trigger)
3168
3169	ENTRY_NP(dtrace_panic_trigger)
3170	movl	4(%esp), %edx		/ %edx = address of trigger
3171	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3172	lock				/ assert lock
3173	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3174	cmpl	$0, %eax		/ if (%eax == 0x0)
3175	je	0f			/   return (1);
3176	movl	$0, %eax		/ else
3177	ret				/   return (0);
31780:	movl	$1, %eax
3179	ret
3180	SET_SIZE(dtrace_panic_trigger)
3181
3182#endif	/* __i386 */
3183#endif	/* __lint */
3184
3185/*
3186 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3187 * into the panic code implemented in panicsys().  vpanic() is responsible
3188 * for passing through the format string and arguments, and constructing a
3189 * regs structure on the stack into which it saves the current register
3190 * values.  If we are not dying due to a fatal trap, these registers will
3191 * then be preserved in panicbuf as the current processor state.  Before
3192 * invoking panicsys(), vpanic() activates the first panic trigger (see
3193 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3194 * DTrace takes a slightly different panic path if it must panic from probe
3195 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3196 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3197 * branches back into vpanic().
3198 */
3199#if defined(__lint)
3200
3201/*ARGSUSED*/
3202void
3203vpanic(const char *format, va_list alist)
3204{}
3205
3206/*ARGSUSED*/
3207void
3208dtrace_vpanic(const char *format, va_list alist)
3209{}
3210
3211#else	/* __lint */
3212
3213#if defined(__amd64)
3214
3215	ENTRY_NP(vpanic)			/* Initial stack layout: */
3216
3217	pushq	%rbp				/* | %rip | 	0x60	*/
3218	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3219	pushfq					/* | rfl  |	0x50	*/
3220	pushq	%r11				/* | %r11 |	0x48	*/
3221	pushq	%r10				/* | %r10 |	0x40	*/
3222	pushq	%rbx				/* | %rbx |	0x38	*/
3223	pushq	%rax				/* | %rax |	0x30	*/
3224	pushq	%r9				/* | %r9  |	0x28	*/
3225	pushq	%r8				/* | %r8  |	0x20	*/
3226	pushq	%rcx				/* | %rcx |	0x18	*/
3227	pushq	%rdx				/* | %rdx |	0x10	*/
3228	pushq	%rsi				/* | %rsi |	0x8 alist */
3229	pushq	%rdi				/* | %rdi |	0x0 format */
3230
3231	movq	%rsp, %rbx			/* %rbx = current %rsp */
3232
3233	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3234	call	panic_trigger			/* %eax = panic_trigger() */
3235
3236vpanic_common:
3237	/*
3238	 * The panic_trigger result is in %eax from the call above, and
3239	 * dtrace_panic places it in %eax before branching here.
3240	 * The rdmsr instructions that follow below will clobber %eax so
3241	 * we stash the panic_trigger result in %r11d.
3242	 */
3243	movl	%eax, %r11d
3244	cmpl	$0, %r11d
3245	je	0f
3246
3247	/*
3248	 * If panic_trigger() was successful, we are the first to initiate a
3249	 * panic: we now switch to the reserved panic_stack before continuing.
3250	 */
3251	leaq	panic_stack(%rip), %rsp
3252	addq	$PANICSTKSIZE, %rsp
32530:	subq	$REGSIZE, %rsp
3254	/*
3255	 * Now that we've got everything set up, store the register values as
3256	 * they were when we entered vpanic() to the designated location in
3257	 * the regs structure we allocated on the stack.
3258	 */
3259	movq	0x0(%rbx), %rcx
3260	movq	%rcx, REGOFF_RDI(%rsp)
3261	movq	0x8(%rbx), %rcx
3262	movq	%rcx, REGOFF_RSI(%rsp)
3263	movq	0x10(%rbx), %rcx
3264	movq	%rcx, REGOFF_RDX(%rsp)
3265	movq	0x18(%rbx), %rcx
3266	movq	%rcx, REGOFF_RCX(%rsp)
3267	movq	0x20(%rbx), %rcx
3268
3269	movq	%rcx, REGOFF_R8(%rsp)
3270	movq	0x28(%rbx), %rcx
3271	movq	%rcx, REGOFF_R9(%rsp)
3272	movq	0x30(%rbx), %rcx
3273	movq	%rcx, REGOFF_RAX(%rsp)
3274	movq	0x38(%rbx), %rcx
3275	movq	%rcx, REGOFF_RBX(%rsp)
3276	movq	0x58(%rbx), %rcx
3277
3278	movq	%rcx, REGOFF_RBP(%rsp)
3279	movq	0x40(%rbx), %rcx
3280	movq	%rcx, REGOFF_R10(%rsp)
3281	movq	0x48(%rbx), %rcx
3282	movq	%rcx, REGOFF_R11(%rsp)
3283	movq	%r12, REGOFF_R12(%rsp)
3284
3285	movq	%r13, REGOFF_R13(%rsp)
3286	movq	%r14, REGOFF_R14(%rsp)
3287	movq	%r15, REGOFF_R15(%rsp)
3288
3289	xorl	%ecx, %ecx
3290	movw	%ds, %cx
3291	movq	%rcx, REGOFF_DS(%rsp)
3292	movw	%es, %cx
3293	movq	%rcx, REGOFF_ES(%rsp)
3294	movw	%fs, %cx
3295	movq	%rcx, REGOFF_FS(%rsp)
3296	movw	%gs, %cx
3297	movq	%rcx, REGOFF_GS(%rsp)
3298
3299	movq	$0, REGOFF_TRAPNO(%rsp)
3300
3301	movq	$0, REGOFF_ERR(%rsp)
3302	leaq	vpanic(%rip), %rcx
3303	movq	%rcx, REGOFF_RIP(%rsp)
3304	movw	%cs, %cx
3305	movzwq	%cx, %rcx
3306	movq	%rcx, REGOFF_CS(%rsp)
3307	movq	0x50(%rbx), %rcx
3308	movq	%rcx, REGOFF_RFL(%rsp)
3309	movq	%rbx, %rcx
3310	addq	$0x60, %rcx
3311	movq	%rcx, REGOFF_RSP(%rsp)
3312	movw	%ss, %cx
3313	movzwq	%cx, %rcx
3314	movq	%rcx, REGOFF_SS(%rsp)
3315
3316	/*
3317	 * panicsys(format, alist, rp, on_panic_stack)
3318	 */
3319	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3320	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3321	movq	%rsp, %rdx			/* struct regs */
3322	movl	%r11d, %ecx			/* on_panic_stack */
3323	call	panicsys
3324	addq	$REGSIZE, %rsp
3325	popq	%rdi
3326	popq	%rsi
3327	popq	%rdx
3328	popq	%rcx
3329	popq	%r8
3330	popq	%r9
3331	popq	%rax
3332	popq	%rbx
3333	popq	%r10
3334	popq	%r11
3335	popfq
3336	leave
3337	ret
3338	SET_SIZE(vpanic)
3339
3340	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3341
3342	pushq	%rbp				/* | %rip | 	0x60	*/
3343	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3344	pushfq					/* | rfl  |	0x50	*/
3345	pushq	%r11				/* | %r11 |	0x48	*/
3346	pushq	%r10				/* | %r10 |	0x40	*/
3347	pushq	%rbx				/* | %rbx |	0x38	*/
3348	pushq	%rax				/* | %rax |	0x30	*/
3349	pushq	%r9				/* | %r9  |	0x28	*/
3350	pushq	%r8				/* | %r8  |	0x20	*/
3351	pushq	%rcx				/* | %rcx |	0x18	*/
3352	pushq	%rdx				/* | %rdx |	0x10	*/
3353	pushq	%rsi				/* | %rsi |	0x8 alist */
3354	pushq	%rdi				/* | %rdi |	0x0 format */
3355
3356	movq	%rsp, %rbx			/* %rbx = current %rsp */
3357
3358	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3359	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3360	jmp	vpanic_common
3361
3362	SET_SIZE(dtrace_vpanic)
3363
3364#elif defined(__i386)
3365
3366	ENTRY_NP(vpanic)			/ Initial stack layout:
3367
3368	pushl	%ebp				/ | %eip | 20
3369	movl	%esp, %ebp			/ | %ebp | 16
3370	pushl	%eax				/ | %eax | 12
3371	pushl	%ebx				/ | %ebx |  8
3372	pushl	%ecx				/ | %ecx |  4
3373	pushl	%edx				/ | %edx |  0
3374
3375	movl	%esp, %ebx			/ %ebx = current stack pointer
3376
3377	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3378	pushl	%eax				/ push &panic_quiesce
3379	call	panic_trigger			/ %eax = panic_trigger()
3380	addl	$4, %esp			/ reset stack pointer
3381
3382vpanic_common:
3383	cmpl	$0, %eax			/ if (%eax == 0)
3384	je	0f				/   goto 0f;
3385
3386	/*
3387	 * If panic_trigger() was successful, we are the first to initiate a
3388	 * panic: we now switch to the reserved panic_stack before continuing.
3389	 */
3390	lea	panic_stack, %esp		/ %esp  = panic_stack
3391	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3392
33930:	subl	$REGSIZE, %esp			/ allocate struct regs
3394
3395	/*
3396	 * Now that we've got everything set up, store the register values as
3397	 * they were when we entered vpanic() to the designated location in
3398	 * the regs structure we allocated on the stack.
3399	 */
3400#if !defined(__GNUC_AS__)
3401	movw	%gs, %edx
3402	movl	%edx, REGOFF_GS(%esp)
3403	movw	%fs, %edx
3404	movl	%edx, REGOFF_FS(%esp)
3405	movw	%es, %edx
3406	movl	%edx, REGOFF_ES(%esp)
3407	movw	%ds, %edx
3408	movl	%edx, REGOFF_DS(%esp)
3409#else	/* __GNUC_AS__ */
3410	mov	%gs, %edx
3411	mov	%edx, REGOFF_GS(%esp)
3412	mov	%fs, %edx
3413	mov	%edx, REGOFF_FS(%esp)
3414	mov	%es, %edx
3415	mov	%edx, REGOFF_ES(%esp)
3416	mov	%ds, %edx
3417	mov	%edx, REGOFF_DS(%esp)
3418#endif	/* __GNUC_AS__ */
3419	movl	%edi, REGOFF_EDI(%esp)
3420	movl	%esi, REGOFF_ESI(%esp)
3421	movl	16(%ebx), %ecx
3422	movl	%ecx, REGOFF_EBP(%esp)
3423	movl	%ebx, %ecx
3424	addl	$20, %ecx
3425	movl	%ecx, REGOFF_ESP(%esp)
3426	movl	8(%ebx), %ecx
3427	movl	%ecx, REGOFF_EBX(%esp)
3428	movl	0(%ebx), %ecx
3429	movl	%ecx, REGOFF_EDX(%esp)
3430	movl	4(%ebx), %ecx
3431	movl	%ecx, REGOFF_ECX(%esp)
3432	movl	12(%ebx), %ecx
3433	movl	%ecx, REGOFF_EAX(%esp)
3434	movl	$0, REGOFF_TRAPNO(%esp)
3435	movl	$0, REGOFF_ERR(%esp)
3436	lea	vpanic, %ecx
3437	movl	%ecx, REGOFF_EIP(%esp)
3438#if !defined(__GNUC_AS__)
3439	movw	%cs, %edx
3440#else	/* __GNUC_AS__ */
3441	mov	%cs, %edx
3442#endif	/* __GNUC_AS__ */
3443	movl	%edx, REGOFF_CS(%esp)
3444	pushfl
3445	popl	%ecx
3446#if defined(__xpv)
3447	/*
3448	 * Synthesize the PS_IE bit from the event mask bit
3449	 */
3450	CURTHREAD(%edx)
3451	KPREEMPT_DISABLE(%edx)
3452	EVENT_MASK_TO_IE(%edx, %ecx)
3453	CURTHREAD(%edx)
3454	KPREEMPT_ENABLE_NOKP(%edx)
3455#endif
3456	movl	%ecx, REGOFF_EFL(%esp)
3457	movl	$0, REGOFF_UESP(%esp)
3458#if !defined(__GNUC_AS__)
3459	movw	%ss, %edx
3460#else	/* __GNUC_AS__ */
3461	mov	%ss, %edx
3462#endif	/* __GNUC_AS__ */
3463	movl	%edx, REGOFF_SS(%esp)
3464
3465	movl	%esp, %ecx			/ %ecx = &regs
3466	pushl	%eax				/ push on_panic_stack
3467	pushl	%ecx				/ push &regs
3468	movl	12(%ebp), %ecx			/ %ecx = alist
3469	pushl	%ecx				/ push alist
3470	movl	8(%ebp), %ecx			/ %ecx = format
3471	pushl	%ecx				/ push format
3472	call	panicsys			/ panicsys();
3473	addl	$16, %esp			/ pop arguments
3474
3475	addl	$REGSIZE, %esp
3476	popl	%edx
3477	popl	%ecx
3478	popl	%ebx
3479	popl	%eax
3480	leave
3481	ret
3482	SET_SIZE(vpanic)
3483
3484	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3485
3486	pushl	%ebp				/ | %eip | 20
3487	movl	%esp, %ebp			/ | %ebp | 16
3488	pushl	%eax				/ | %eax | 12
3489	pushl	%ebx				/ | %ebx |  8
3490	pushl	%ecx				/ | %ecx |  4
3491	pushl	%edx				/ | %edx |  0
3492
3493	movl	%esp, %ebx			/ %ebx = current stack pointer
3494
3495	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3496	pushl	%eax				/ push &panic_quiesce
3497	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3498	addl	$4, %esp			/ reset stack pointer
3499	jmp	vpanic_common			/ jump back to common code
3500
3501	SET_SIZE(dtrace_vpanic)
3502
3503#endif	/* __i386 */
3504#endif	/* __lint */
3505
3506#if defined(__lint)
3507
3508void
3509hres_tick(void)
3510{}
3511
3512int64_t timedelta;
3513hrtime_t hres_last_tick;
3514volatile timestruc_t hrestime;
3515int64_t hrestime_adj;
3516volatile int hres_lock;
3517hrtime_t hrtime_base;
3518
3519#else	/* __lint */
3520
3521	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3522	.NWORD	0, 0
3523
3524	DGDEF3(hrestime_adj, 8, 8)
3525	.long	0, 0
3526
3527	DGDEF3(hres_last_tick, 8, 8)
3528	.long	0, 0
3529
3530	DGDEF3(timedelta, 8, 8)
3531	.long	0, 0
3532
3533	DGDEF3(hres_lock, 4, 8)
3534	.long	0
3535
3536	/*
3537	 * initialized to a non zero value to make pc_gethrtime()
3538	 * work correctly even before clock is initialized
3539	 */
3540	DGDEF3(hrtime_base, 8, 8)
3541	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3542
3543	DGDEF3(adj_shift, 4, 4)
3544	.long	ADJ_SHIFT
3545
3546#if defined(__amd64)
3547
3548	ENTRY_NP(hres_tick)
3549	pushq	%rbp
3550	movq	%rsp, %rbp
3551
3552	/*
3553	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3554	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3555	 * At worst, performing this now instead of under CLOCK_LOCK may
3556	 * introduce some jitter in pc_gethrestime().
3557	 */
3558	call	*gethrtimef(%rip)
3559	movq	%rax, %r8
3560
3561	leaq	hres_lock(%rip), %rax
3562	movb	$-1, %dl
3563.CL1:
3564	xchgb	%dl, (%rax)
3565	testb	%dl, %dl
3566	jz	.CL3			/* got it */
3567.CL2:
3568	cmpb	$0, (%rax)		/* possible to get lock? */
3569	pause
3570	jne	.CL2
3571	jmp	.CL1			/* yes, try again */
3572.CL3:
3573	/*
3574	 * compute the interval since last time hres_tick was called
3575	 * and adjust hrtime_base and hrestime accordingly
3576	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3577	 * a timestruc_t (sec, nsec)
3578	 */
3579	leaq	hres_last_tick(%rip), %rax
3580	movq	%r8, %r11
3581	subq	(%rax), %r8
3582	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3583	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3584	/*
3585	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3586	 */
3587	movq	%r11, (%rax)
3588
3589	call	__adj_hrestime
3590
3591	/*
3592	 * release the hres_lock
3593	 */
3594	incl	hres_lock(%rip)
3595	leave
3596	ret
3597	SET_SIZE(hres_tick)
3598
3599#elif defined(__i386)
3600
3601	ENTRY_NP(hres_tick)
3602	pushl	%ebp
3603	movl	%esp, %ebp
3604	pushl	%esi
3605	pushl	%ebx
3606
3607	/*
3608	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3609	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3610	 * At worst, performing this now instead of under CLOCK_LOCK may
3611	 * introduce some jitter in pc_gethrestime().
3612	 */
3613	call	*gethrtimef
3614	movl	%eax, %ebx
3615	movl	%edx, %esi
3616
3617	movl	$hres_lock, %eax
3618	movl	$-1, %edx
3619.CL1:
3620	xchgb	%dl, (%eax)
3621	testb	%dl, %dl
3622	jz	.CL3			/ got it
3623.CL2:
3624	cmpb	$0, (%eax)		/ possible to get lock?
3625	pause
3626	jne	.CL2
3627	jmp	.CL1			/ yes, try again
3628.CL3:
3629	/*
3630	 * compute the interval since last time hres_tick was called
3631	 * and adjust hrtime_base and hrestime accordingly
3632	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3633	 * timestruc_t (sec, nsec)
3634	 */
3635
3636	lea	hres_last_tick, %eax
3637
3638	movl	%ebx, %edx
3639	movl	%esi, %ecx
3640
3641	subl 	(%eax), %edx
3642	sbbl 	4(%eax), %ecx
3643
3644	addl	%edx, hrtime_base	/ add interval to hrtime_base
3645	adcl	%ecx, hrtime_base+4
3646
3647	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3648
3649	/
3650	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3651	/
3652	movl	%ebx, (%eax)
3653	movl	%esi,  4(%eax)
3654
3655	/ get hrestime at this moment. used as base for pc_gethrestime
3656	/
3657	/ Apply adjustment, if any
3658	/
3659	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3660	/ (max_hres_adj)
3661	/
3662	/ void
3663	/ adj_hrestime()
3664	/ {
3665	/	long long adj;
3666	/
3667	/	if (hrestime_adj == 0)
3668	/		adj = 0;
3669	/	else if (hrestime_adj > 0) {
3670	/		if (hrestime_adj < HRES_ADJ)
3671	/			adj = hrestime_adj;
3672	/		else
3673	/			adj = HRES_ADJ;
3674	/	}
3675	/	else {
3676	/		if (hrestime_adj < -(HRES_ADJ))
3677	/			adj = -(HRES_ADJ);
3678	/		else
3679	/			adj = hrestime_adj;
3680	/	}
3681	/
3682	/	timedelta -= adj;
3683	/	hrestime_adj = timedelta;
3684	/	hrestime.tv_nsec += adj;
3685	/
3686	/	while (hrestime.tv_nsec >= NANOSEC) {
3687	/		one_sec++;
3688	/		hrestime.tv_sec++;
3689	/		hrestime.tv_nsec -= NANOSEC;
3690	/	}
3691	/ }
3692__adj_hrestime:
3693	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3694	movl	hrestime_adj+4, %edx
3695	andl	%esi, %esi
3696	jne	.CL4			/ no
3697	andl	%edx, %edx
3698	jne	.CL4			/ no
3699	subl	%ecx, %ecx		/ yes, adj = 0;
3700	subl	%edx, %edx
3701	jmp	.CL5
3702.CL4:
3703	subl	%ecx, %ecx
3704	subl	%eax, %eax
3705	subl	%esi, %ecx
3706	sbbl	%edx, %eax
3707	andl	%eax, %eax		/ if (hrestime_adj > 0)
3708	jge	.CL6
3709
3710	/ In the following comments, HRES_ADJ is used, while in the code
3711	/ max_hres_adj is used.
3712	/
3713	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3714	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3715	/ on the logical equivalence of:
3716	/
3717	/	!(hrestime_adj < HRES_ADJ)
3718	/
3719	/ and the two step sequence:
3720	/
3721	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3722	/
3723	/ which computes whether or not the least significant 32-bits
3724	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3725	/
3726	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3727	/
3728	/ which generates a carry whenever step 1 is true or the most
3729	/ significant long of the longlong hrestime_adj is non-zero.
3730
3731	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3732	subl	%esi, %ecx
3733	movl	%edx, %eax
3734	adcl	$-1, %eax
3735	jnc	.CL7
3736	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3737	subl	%edx, %edx
3738	jmp	.CL5
3739
3740	/ The following computation is similar to the one above.
3741	/
3742	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3743	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3744	/ on the logical equivalence of:
3745	/
3746	/	(hrestime_adj > -HRES_ADJ)
3747	/
3748	/ and the two step sequence:
3749	/
3750	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3751	/
3752	/ which means the least significant 32-bits of hrestime_adj is
3753	/ greater than -HRES_ADJ, followed by:
3754	/
3755	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3756	/
3757	/ which generates a carry only when step 1 is true and the most
3758	/ significant long of the longlong hrestime_adj is -1.
3759
3760.CL6:					/ hrestime_adj is negative
3761	movl	%esi, %ecx
3762	addl	max_hres_adj, %ecx
3763	movl	%edx, %eax
3764	adcl	$0, %eax
3765	jc	.CL7
3766	xor	%ecx, %ecx
3767	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3768	movl	$-1, %edx
3769	jmp	.CL5
3770.CL7:
3771	movl	%esi, %ecx		/ adj = hrestime_adj;
3772.CL5:
3773	movl	timedelta, %esi
3774	subl	%ecx, %esi
3775	movl	timedelta+4, %eax
3776	sbbl	%edx, %eax
3777	movl	%esi, timedelta
3778	movl	%eax, timedelta+4	/ timedelta -= adj;
3779	movl	%esi, hrestime_adj
3780	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3781	addl	hrestime+4, %ecx
3782
3783	movl	%ecx, %eax		/ eax = tv_nsec
37841:
3785	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3786	jb	.CL8			/ no
3787	incl	one_sec			/ yes,  one_sec++;
3788	incl	hrestime		/ hrestime.tv_sec++;
3789	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3790	jmp	1b			/ check for more seconds
3791
3792.CL8:
3793	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3794	incl	hres_lock		/ release the hres_lock
3795
3796	popl	%ebx
3797	popl	%esi
3798	leave
3799	ret
3800	SET_SIZE(hres_tick)
3801
3802#endif	/* __i386 */
3803#endif	/* __lint */
3804
3805/*
3806 * void prefetch_smap_w(void *)
3807 *
3808 * Prefetch ahead within a linear list of smap structures.
3809 * Not implemented for ia32.  Stub for compatibility.
3810 */
3811
3812#if defined(__lint)
3813
3814/*ARGSUSED*/
3815void prefetch_smap_w(void *smp)
3816{}
3817
3818#else	/* __lint */
3819
3820	ENTRY(prefetch_smap_w)
3821	rep;	ret	/* use 2 byte return instruction when branch target */
3822			/* AMD Software Optimization Guide - Section 6.2 */
3823	SET_SIZE(prefetch_smap_w)
3824
3825#endif	/* __lint */
3826
3827/*
3828 * prefetch_page_r(page_t *)
3829 * issue prefetch instructions for a page_t
3830 */
3831#if defined(__lint)
3832
3833/*ARGSUSED*/
3834void
3835prefetch_page_r(void *pp)
3836{}
3837
3838#else	/* __lint */
3839
3840	ENTRY(prefetch_page_r)
3841	rep;	ret	/* use 2 byte return instruction when branch target */
3842			/* AMD Software Optimization Guide - Section 6.2 */
3843	SET_SIZE(prefetch_page_r)
3844
3845#endif	/* __lint */
3846
3847#if defined(__lint)
3848
3849/*ARGSUSED*/
3850int
3851bcmp(const void *s1, const void *s2, size_t count)
3852{ return (0); }
3853
3854#else   /* __lint */
3855
3856#if defined(__amd64)
3857
3858	ENTRY(bcmp)
3859	pushq	%rbp
3860	movq	%rsp, %rbp
3861#ifdef DEBUG
3862	movq	postbootkernelbase(%rip), %r11
3863	cmpq	%r11, %rdi
3864	jb	0f
3865	cmpq	%r11, %rsi
3866	jnb	1f
38670:	leaq	.bcmp_panic_msg(%rip), %rdi
3868	xorl	%eax, %eax
3869	call	panic
38701:
3871#endif	/* DEBUG */
3872	call	memcmp
3873	testl	%eax, %eax
3874	setne	%dl
3875	leave
3876	movzbl	%dl, %eax
3877	ret
3878	SET_SIZE(bcmp)
3879
3880#elif defined(__i386)
3881
3882#define	ARG_S1		8
3883#define	ARG_S2		12
3884#define	ARG_LENGTH	16
3885
3886	ENTRY(bcmp)
3887	pushl	%ebp
3888	movl	%esp, %ebp	/ create new stack frame
3889#ifdef DEBUG
3890	movl    postbootkernelbase, %eax
3891	cmpl    %eax, ARG_S1(%ebp)
3892	jb	0f
3893	cmpl    %eax, ARG_S2(%ebp)
3894	jnb	1f
38950:	pushl   $.bcmp_panic_msg
3896	call    panic
38971:
3898#endif	/* DEBUG */
3899
3900	pushl	%edi		/ save register variable
3901	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3902	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3903	cmpl	%eax, %ecx	/ if the same string
3904	je	.equal		/ goto .equal
3905	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3906	cmpl	$4, %edi	/ if %edi < 4
3907	jb	.byte_check	/ goto .byte_check
3908	.align	4
3909.word_loop:
3910	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3911	leal	-4(%edi), %edi	/ %edi -= 4
3912	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3913	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3914	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3915	leal	4(%eax), %eax	/ %eax += 4 (next word)
3916	cmpl	$4, %edi	/ if %edi >= 4
3917	jae	.word_loop	/ goto .word_loop
3918.byte_check:
3919	cmpl	$0, %edi	/ if %edi == 0
3920	je	.equal		/ goto .equal
3921	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3922.word_not_equal:
3923	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3924	.align	4
3925.byte_loop:
3926	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3927	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3928	jne	.not_equal	/ if not equal, goto .not_equal
3929	incl	%ecx		/ %ecx++ (next byte)
3930	incl	%eax		/ %eax++ (next byte)
3931	decl	%edi		/ %edi--
3932	jnz	.byte_loop	/ if not zero, goto .byte_loop
3933.equal:
3934	xorl	%eax, %eax	/ %eax = 0
3935	popl	%edi		/ restore register variable
3936	leave			/ restore old stack frame
3937	ret			/ return (NULL)
3938	.align	4
3939.not_equal:
3940	movl	$1, %eax	/ return 1
3941	popl	%edi		/ restore register variable
3942	leave			/ restore old stack frame
3943	ret			/ return (NULL)
3944	SET_SIZE(bcmp)
3945
3946#endif	/* __i386 */
3947
3948#ifdef DEBUG
3949	.text
3950.bcmp_panic_msg:
3951	.string "bcmp: arguments below kernelbase"
3952#endif	/* DEBUG */
3953
3954#endif	/* __lint */
3955
3956#if defined(__lint)
3957
3958uint_t
3959bsrw_insn(uint16_t mask)
3960{
3961	uint_t index = sizeof (mask) * NBBY - 1;
3962
3963	while ((mask & (1 << index)) == 0)
3964		index--;
3965	return (index);
3966}
3967
3968#else	/* __lint */
3969
3970#if defined(__amd64)
3971
3972	ENTRY_NP(bsrw_insn)
3973	xorl	%eax, %eax
3974	bsrw	%di, %ax
3975	ret
3976	SET_SIZE(bsrw_insn)
3977
3978#elif defined(__i386)
3979
3980	ENTRY_NP(bsrw_insn)
3981	movw	4(%esp), %cx
3982	xorl	%eax, %eax
3983	bsrw	%cx, %ax
3984	ret
3985	SET_SIZE(bsrw_insn)
3986
3987#endif	/* __i386 */
3988#endif	/* __lint */
3989
3990#if defined(__lint)
3991
3992uint_t
3993atomic_btr32(uint32_t *pending, uint_t pil)
3994{
3995	return (*pending &= ~(1 << pil));
3996}
3997
3998#else	/* __lint */
3999
4000#if defined(__i386)
4001
4002	ENTRY_NP(atomic_btr32)
4003	movl	4(%esp), %ecx
4004	movl	8(%esp), %edx
4005	xorl	%eax, %eax
4006	lock
4007	btrl	%edx, (%ecx)
4008	setc	%al
4009	ret
4010	SET_SIZE(atomic_btr32)
4011
4012#endif	/* __i386 */
4013#endif	/* __lint */
4014
4015#if defined(__lint)
4016
4017/*ARGSUSED*/
4018void
4019switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
4020	    uint_t arg2)
4021{}
4022
4023#else	/* __lint */
4024
4025#if defined(__amd64)
4026
4027	ENTRY_NP(switch_sp_and_call)
4028	pushq	%rbp
4029	movq	%rsp, %rbp		/* set up stack frame */
4030	movq	%rdi, %rsp		/* switch stack pointer */
4031	movq	%rdx, %rdi		/* pass func arg 1 */
4032	movq	%rsi, %r11		/* save function to call */
4033	movq	%rcx, %rsi		/* pass func arg 2 */
4034	call	*%r11			/* call function */
4035	leave				/* restore stack */
4036	ret
4037	SET_SIZE(switch_sp_and_call)
4038
4039#elif defined(__i386)
4040
4041	ENTRY_NP(switch_sp_and_call)
4042	pushl	%ebp
4043	mov	%esp, %ebp		/* set up stack frame */
4044	movl	8(%ebp), %esp		/* switch stack pointer */
4045	pushl	20(%ebp)		/* push func arg 2 */
4046	pushl	16(%ebp)		/* push func arg 1 */
4047	call	*12(%ebp)		/* call function */
4048	addl	$8, %esp		/* pop arguments */
4049	leave				/* restore stack */
4050	ret
4051	SET_SIZE(switch_sp_and_call)
4052
4053#endif	/* __i386 */
4054#endif	/* __lint */
4055
4056#if defined(__lint)
4057
4058void
4059kmdb_enter(void)
4060{}
4061
4062#else	/* __lint */
4063
4064#if defined(__amd64)
4065
4066	ENTRY_NP(kmdb_enter)
4067	pushq	%rbp
4068	movq	%rsp, %rbp
4069
4070	/*
4071	 * Save flags, do a 'cli' then return the saved flags
4072	 */
4073	call	intr_clear
4074
4075	int	$T_DBGENTR
4076
4077	/*
4078	 * Restore the saved flags
4079	 */
4080	movq	%rax, %rdi
4081	call	intr_restore
4082
4083	leave
4084	ret
4085	SET_SIZE(kmdb_enter)
4086
4087#elif defined(__i386)
4088
4089	ENTRY_NP(kmdb_enter)
4090	pushl	%ebp
4091	movl	%esp, %ebp
4092
4093	/*
4094	 * Save flags, do a 'cli' then return the saved flags
4095	 */
4096	call	intr_clear
4097
4098	int	$T_DBGENTR
4099
4100	/*
4101	 * Restore the saved flags
4102	 */
4103	pushl	%eax
4104	call	intr_restore
4105	addl	$4, %esp
4106
4107	leave
4108	ret
4109	SET_SIZE(kmdb_enter)
4110
4111#endif	/* __i386 */
4112#endif	/* __lint */
4113
4114#if defined(__lint)
4115
4116void
4117return_instr(void)
4118{}
4119
4120#else	/* __lint */
4121
4122	ENTRY_NP(return_instr)
4123	rep;	ret	/* use 2 byte instruction when branch target */
4124			/* AMD Software Optimization Guide - Section 6.2 */
4125	SET_SIZE(return_instr)
4126
4127#endif	/* __lint */
4128
4129#if defined(__lint)
4130
4131ulong_t
4132getflags(void)
4133{
4134	return (0);
4135}
4136
4137#else	/* __lint */
4138
4139#if defined(__amd64)
4140
4141	ENTRY(getflags)
4142	pushfq
4143	popq	%rax
4144#if defined(__xpv)
4145	CURTHREAD(%rdi)
4146	KPREEMPT_DISABLE(%rdi)
4147	/*
4148	 * Synthesize the PS_IE bit from the event mask bit
4149	 */
4150	CURVCPU(%r11)
4151	andq    $_BITNOT(PS_IE), %rax
4152	XEN_TEST_UPCALL_MASK(%r11)
4153	jnz	1f
4154	orq	$PS_IE, %rax
41551:
4156	KPREEMPT_ENABLE_NOKP(%rdi)
4157#endif
4158	ret
4159	SET_SIZE(getflags)
4160
4161#elif defined(__i386)
4162
4163	ENTRY(getflags)
4164	pushfl
4165	popl	%eax
4166#if defined(__xpv)
4167	CURTHREAD(%ecx)
4168	KPREEMPT_DISABLE(%ecx)
4169	/*
4170	 * Synthesize the PS_IE bit from the event mask bit
4171	 */
4172	CURVCPU(%edx)
4173	andl    $_BITNOT(PS_IE), %eax
4174	XEN_TEST_UPCALL_MASK(%edx)
4175	jnz	1f
4176	orl	$PS_IE, %eax
41771:
4178	KPREEMPT_ENABLE_NOKP(%ecx)
4179#endif
4180	ret
4181	SET_SIZE(getflags)
4182
4183#endif	/* __i386 */
4184
4185#endif	/* __lint */
4186
4187#if defined(__lint)
4188
4189ftrace_icookie_t
4190ftrace_interrupt_disable(void)
4191{ return (0); }
4192
4193#else   /* __lint */
4194
4195#if defined(__amd64)
4196
4197	ENTRY(ftrace_interrupt_disable)
4198	pushfq
4199	popq	%rax
4200	CLI(%rdx)
4201	ret
4202	SET_SIZE(ftrace_interrupt_disable)
4203
4204#elif defined(__i386)
4205
4206	ENTRY(ftrace_interrupt_disable)
4207	pushfl
4208	popl	%eax
4209	CLI(%edx)
4210	ret
4211	SET_SIZE(ftrace_interrupt_disable)
4212
4213#endif	/* __i386 */
4214#endif	/* __lint */
4215
4216#if defined(__lint)
4217
4218/*ARGSUSED*/
4219void
4220ftrace_interrupt_enable(ftrace_icookie_t cookie)
4221{}
4222
4223#else	/* __lint */
4224
4225#if defined(__amd64)
4226
4227	ENTRY(ftrace_interrupt_enable)
4228	pushq	%rdi
4229	popfq
4230	ret
4231	SET_SIZE(ftrace_interrupt_enable)
4232
4233#elif defined(__i386)
4234
4235	ENTRY(ftrace_interrupt_enable)
4236	movl	4(%esp), %eax
4237	pushl	%eax
4238	popfl
4239	ret
4240	SET_SIZE(ftrace_interrupt_enable)
4241
4242#endif	/* __i386 */
4243#endif	/* __lint */
4244
4245#if defined (__lint)
4246
4247/*ARGSUSED*/
4248void
4249iommu_cpu_nop(void)
4250{}
4251
4252#else /* __lint */
4253
4254	ENTRY(iommu_cpu_nop)
4255	rep;	nop
4256	ret
4257	SET_SIZE(iommu_cpu_nop)
4258
4259#endif /* __lint */
4260
4261#if defined (__lint)
4262
4263/*ARGSUSED*/
4264void
4265clflush_insn(caddr_t addr)
4266{}
4267
4268#else /* __lint */
4269
4270#if defined (__amd64)
4271	ENTRY(clflush_insn)
4272	clflush (%rdi)
4273	ret
4274	SET_SIZE(clflush_insn)
4275#elif defined (__i386)
4276	ENTRY(clflush_insn)
4277	movl	4(%esp), %eax
4278	clflush (%eax)
4279	ret
4280	SET_SIZE(clflush_insn)
4281
4282#endif /* __i386 */
4283#endif /* __lint */
4284
4285#if defined (__lint)
4286/*ARGSUSED*/
4287void
4288mfence_insn(void)
4289{}
4290
4291#else /* __lint */
4292
4293#if defined (__amd64)
4294	ENTRY(mfence_insn)
4295	mfence
4296	ret
4297	SET_SIZE(mfence_insn)
4298#elif defined (__i386)
4299	ENTRY(mfence_insn)
4300	mfence
4301	ret
4302	SET_SIZE(mfence_insn)
4303
4304#endif /* __i386 */
4305#endif /* __lint */
4306
4307/*
4308 * This is how VMware lets the guests figure that they are running
4309 * on top of VMWare platform :
4310 * Write 0xA in the ECX register and put the I/O port address value of
4311 * 0x564D5868 in the EAX register. Then read a word from port 0x5658.
4312 * If VMWare is installed than this code will be executed correctly and
4313 * the EBX register will contain the same I/O port address value of 0x564D5868.
4314 * If VMWare is not installed then OS will return an exception on port access.
4315 */
4316#if defined(__lint)
4317
4318int
4319vmware_platform(void) { return (1); }
4320
4321#else
4322
4323#if defined(__amd64)
4324
4325	ENTRY(vmware_platform)
4326	pushq	%rbx
4327	xorl	%ebx, %ebx
4328	movl	$0x564d5868, %eax
4329	movl	$0xa, %ecx
4330	movl	$0x5658, %edx
4331	inl	(%dx)
4332	movl	$0x564d5868, %ecx
4333	xorl	%eax, %eax
4334	cmpl	%ecx, %ebx
4335	jne	1f
4336	incl	%eax
43371:
4338	popq	%rbx
4339	ret
4340	SET_SIZE(vmware_platform)
4341
4342#elif defined(__i386)
4343
4344	ENTRY(vmware_platform)
4345	pushl	%ebx
4346	pushl	%ecx
4347	pushl	%edx
4348	xorl	%ebx, %ebx
4349	movl	$0x564d5868, %eax
4350	movl	$0xa, %ecx
4351	movl	$0x5658, %edx
4352	inl	(%dx)
4353	movl	$0x564d5868, %ecx
4354	xorl	%eax, %eax
4355	cmpl	%ecx, %ebx
4356	jne	1f
4357	incl	%eax
43581:
4359	popl	%edx
4360	popl	%ecx
4361	popl	%ebx
4362	ret
4363	SET_SIZE(vmware_platform)
4364
4365#endif /* __i386 */
4366#endif /* __lint */
4367