xref: /titanic_44/usr/src/uts/intel/ia32/ml/i86_subr.s (revision 72612f86fafbe2510a166b48e158c9031e0dd63b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 *  Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.
29 *  Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T
30 *    All Rights Reserved
31 */
32
33/*
34 * General assembly language routines.
35 * It is the intent of this file to contain routines that are
36 * independent of the specific kernel architecture, and those that are
37 * common across kernel architectures.
38 * As architectures diverge, and implementations of specific
39 * architecture-dependent routines change, the routines should be moved
40 * from this file into the respective ../`arch -k`/subr.s file.
41 */
42
43#include <sys/asm_linkage.h>
44#include <sys/asm_misc.h>
45#include <sys/panic.h>
46#include <sys/ontrap.h>
47#include <sys/regset.h>
48#include <sys/privregs.h>
49#include <sys/reboot.h>
50#include <sys/psw.h>
51#include <sys/x86_archext.h>
52
53#if defined(__lint)
54#include <sys/types.h>
55#include <sys/systm.h>
56#include <sys/thread.h>
57#include <sys/archsystm.h>
58#include <sys/byteorder.h>
59#include <sys/dtrace.h>
60#include <sys/ftrace.h>
61#else	/* __lint */
62#include "assym.h"
63#endif	/* __lint */
64#include <sys/dditypes.h>
65
66/*
67 * on_fault()
68 * Catch lofault faults. Like setjmp except it returns one
69 * if code following causes uncorrectable fault. Turned off
70 * by calling no_fault().
71 */
72
73#if defined(__lint)
74
75/* ARGSUSED */
76int
77on_fault(label_t *ljb)
78{ return (0); }
79
80void
81no_fault(void)
82{}
83
84#else	/* __lint */
85
86#if defined(__amd64)
87
88	ENTRY(on_fault)
89	movq	%gs:CPU_THREAD, %rsi
90	leaq	catch_fault(%rip), %rdx
91	movq	%rdi, T_ONFAULT(%rsi)		/* jumpbuf in t_onfault */
92	movq	%rdx, T_LOFAULT(%rsi)		/* catch_fault in t_lofault */
93	jmp	setjmp				/* let setjmp do the rest */
94
95catch_fault:
96	movq	%gs:CPU_THREAD, %rsi
97	movq	T_ONFAULT(%rsi), %rdi		/* address of save area */
98	xorl	%eax, %eax
99	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
100	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
101	jmp	longjmp				/* let longjmp do the rest */
102	SET_SIZE(on_fault)
103
104	ENTRY(no_fault)
105	movq	%gs:CPU_THREAD, %rsi
106	xorl	%eax, %eax
107	movq	%rax, T_ONFAULT(%rsi)		/* turn off onfault */
108	movq	%rax, T_LOFAULT(%rsi)		/* turn off lofault */
109	ret
110	SET_SIZE(no_fault)
111
112#elif defined(__i386)
113
114	ENTRY(on_fault)
115	movl	%gs:CPU_THREAD, %edx
116	movl	4(%esp), %eax			/* jumpbuf address */
117	leal	catch_fault, %ecx
118	movl	%eax, T_ONFAULT(%edx)		/* jumpbuf in t_onfault */
119	movl	%ecx, T_LOFAULT(%edx)		/* catch_fault in t_lofault */
120	jmp	setjmp				/* let setjmp do the rest */
121
122catch_fault:
123	movl	%gs:CPU_THREAD, %edx
124	xorl	%eax, %eax
125	movl	T_ONFAULT(%edx), %ecx		/* address of save area */
126	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
127	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
128	pushl	%ecx
129	call	longjmp				/* let longjmp do the rest */
130	SET_SIZE(on_fault)
131
132	ENTRY(no_fault)
133	movl	%gs:CPU_THREAD, %edx
134	xorl	%eax, %eax
135	movl	%eax, T_ONFAULT(%edx)		/* turn off onfault */
136	movl	%eax, T_LOFAULT(%edx)		/* turn off lofault */
137	ret
138	SET_SIZE(no_fault)
139
140#endif	/* __i386 */
141#endif	/* __lint */
142
143/*
144 * Default trampoline code for on_trap() (see <sys/ontrap.h>).  We just
145 * do a longjmp(&curthread->t_ontrap->ot_jmpbuf) if this is ever called.
146 */
147
148#if defined(lint)
149
150void
151on_trap_trampoline(void)
152{}
153
154#else	/* __lint */
155
156#if defined(__amd64)
157
158	ENTRY(on_trap_trampoline)
159	movq	%gs:CPU_THREAD, %rsi
160	movq	T_ONTRAP(%rsi), %rdi
161	addq	$OT_JMPBUF, %rdi
162	jmp	longjmp
163	SET_SIZE(on_trap_trampoline)
164
165#elif defined(__i386)
166
167	ENTRY(on_trap_trampoline)
168	movl	%gs:CPU_THREAD, %eax
169	movl	T_ONTRAP(%eax), %eax
170	addl	$OT_JMPBUF, %eax
171	pushl	%eax
172	call	longjmp
173	SET_SIZE(on_trap_trampoline)
174
175#endif	/* __i386 */
176#endif	/* __lint */
177
178/*
179 * Push a new element on to the t_ontrap stack.  Refer to <sys/ontrap.h> for
180 * more information about the on_trap() mechanism.  If the on_trap_data is the
181 * same as the topmost stack element, we just modify that element.
182 */
183#if defined(lint)
184
185/*ARGSUSED*/
186int
187on_trap(on_trap_data_t *otp, uint_t prot)
188{ return (0); }
189
190#else	/* __lint */
191
192#if defined(__amd64)
193
194	ENTRY(on_trap)
195	movw	%si, OT_PROT(%rdi)		/* ot_prot = prot */
196	movw	$0, OT_TRAP(%rdi)		/* ot_trap = 0 */
197	leaq	on_trap_trampoline(%rip), %rdx	/* rdx = &on_trap_trampoline */
198	movq	%rdx, OT_TRAMPOLINE(%rdi)	/* ot_trampoline = rdx */
199	xorl	%ecx, %ecx
200	movq	%rcx, OT_HANDLE(%rdi)		/* ot_handle = NULL */
201	movq	%rcx, OT_PAD1(%rdi)		/* ot_pad1 = NULL */
202	movq	%gs:CPU_THREAD, %rdx		/* rdx = curthread */
203	movq	T_ONTRAP(%rdx), %rcx		/* rcx = curthread->t_ontrap */
204	cmpq	%rdi, %rcx			/* if (otp == %rcx)	*/
205	je	0f				/*	don't modify t_ontrap */
206
207	movq	%rcx, OT_PREV(%rdi)		/* ot_prev = t_ontrap */
208	movq	%rdi, T_ONTRAP(%rdx)		/* curthread->t_ontrap = otp */
209
2100:	addq	$OT_JMPBUF, %rdi		/* &ot_jmpbuf */
211	jmp	setjmp
212	SET_SIZE(on_trap)
213
214#elif defined(__i386)
215
216	ENTRY(on_trap)
217	movl	4(%esp), %eax			/* %eax = otp */
218	movl	8(%esp), %edx			/* %edx = prot */
219
220	movw	%dx, OT_PROT(%eax)		/* ot_prot = prot */
221	movw	$0, OT_TRAP(%eax)		/* ot_trap = 0 */
222	leal	on_trap_trampoline, %edx	/* %edx = &on_trap_trampoline */
223	movl	%edx, OT_TRAMPOLINE(%eax)	/* ot_trampoline = %edx */
224	movl	$0, OT_HANDLE(%eax)		/* ot_handle = NULL */
225	movl	$0, OT_PAD1(%eax)		/* ot_pad1 = NULL */
226	movl	%gs:CPU_THREAD, %edx		/* %edx = curthread */
227	movl	T_ONTRAP(%edx), %ecx		/* %ecx = curthread->t_ontrap */
228	cmpl	%eax, %ecx			/* if (otp == %ecx) */
229	je	0f				/*    don't modify t_ontrap */
230
231	movl	%ecx, OT_PREV(%eax)		/* ot_prev = t_ontrap */
232	movl	%eax, T_ONTRAP(%edx)		/* curthread->t_ontrap = otp */
233
2340:	addl	$OT_JMPBUF, %eax		/* %eax = &ot_jmpbuf */
235	movl	%eax, 4(%esp)			/* put %eax back on the stack */
236	jmp	setjmp				/* let setjmp do the rest */
237	SET_SIZE(on_trap)
238
239#endif	/* __i386 */
240#endif	/* __lint */
241
242/*
243 * Setjmp and longjmp implement non-local gotos using state vectors
244 * type label_t.
245 */
246
247#if defined(__lint)
248
249/* ARGSUSED */
250int
251setjmp(label_t *lp)
252{ return (0); }
253
254/* ARGSUSED */
255void
256longjmp(label_t *lp)
257{}
258
259#else	/* __lint */
260
261#if LABEL_PC != 0
262#error LABEL_PC MUST be defined as 0 for setjmp/longjmp to work as coded
263#endif	/* LABEL_PC != 0 */
264
265#if defined(__amd64)
266
267	ENTRY(setjmp)
268	movq	%rsp, LABEL_SP(%rdi)
269	movq	%rbp, LABEL_RBP(%rdi)
270	movq	%rbx, LABEL_RBX(%rdi)
271	movq	%r12, LABEL_R12(%rdi)
272	movq	%r13, LABEL_R13(%rdi)
273	movq	%r14, LABEL_R14(%rdi)
274	movq	%r15, LABEL_R15(%rdi)
275	movq	(%rsp), %rdx		/* return address */
276	movq	%rdx, (%rdi)		/* LABEL_PC is 0 */
277	xorl	%eax, %eax		/* return 0 */
278	ret
279	SET_SIZE(setjmp)
280
281	ENTRY(longjmp)
282	movq	LABEL_SP(%rdi), %rsp
283	movq	LABEL_RBP(%rdi), %rbp
284	movq	LABEL_RBX(%rdi), %rbx
285	movq	LABEL_R12(%rdi), %r12
286	movq	LABEL_R13(%rdi), %r13
287	movq	LABEL_R14(%rdi), %r14
288	movq	LABEL_R15(%rdi), %r15
289	movq	(%rdi), %rdx		/* return address; LABEL_PC is 0 */
290	movq	%rdx, (%rsp)
291	xorl	%eax, %eax
292	incl	%eax			/* return 1 */
293	ret
294	SET_SIZE(longjmp)
295
296#elif defined(__i386)
297
298	ENTRY(setjmp)
299	movl	4(%esp), %edx		/* address of save area */
300	movl	%ebp, LABEL_EBP(%edx)
301	movl	%ebx, LABEL_EBX(%edx)
302	movl	%esi, LABEL_ESI(%edx)
303	movl	%edi, LABEL_EDI(%edx)
304	movl	%esp, 4(%edx)
305	movl	(%esp), %ecx		/* %eip (return address) */
306	movl	%ecx, (%edx)		/* LABEL_PC is 0 */
307	subl	%eax, %eax		/* return 0 */
308	ret
309	SET_SIZE(setjmp)
310
311	ENTRY(longjmp)
312	movl	4(%esp), %edx		/* address of save area */
313	movl	LABEL_EBP(%edx), %ebp
314	movl	LABEL_EBX(%edx), %ebx
315	movl	LABEL_ESI(%edx), %esi
316	movl	LABEL_EDI(%edx), %edi
317	movl	4(%edx), %esp
318	movl	(%edx), %ecx		/* %eip (return addr); LABEL_PC is 0 */
319	movl	$1, %eax
320	addl	$4, %esp		/* pop ret adr */
321	jmp	*%ecx			/* indirect */
322	SET_SIZE(longjmp)
323
324#endif	/* __i386 */
325#endif	/* __lint */
326
327/*
328 * if a() calls b() calls caller(),
329 * caller() returns return address in a().
330 * (Note: We assume a() and b() are C routines which do the normal entry/exit
331 *  sequence.)
332 */
333
334#if defined(__lint)
335
336caddr_t
337caller(void)
338{ return (0); }
339
340#else	/* __lint */
341
342#if defined(__amd64)
343
344	ENTRY(caller)
345	movq	8(%rbp), %rax		/* b()'s return pc, in a() */
346	ret
347	SET_SIZE(caller)
348
349#elif defined(__i386)
350
351	ENTRY(caller)
352	movl	4(%ebp), %eax		/* b()'s return pc, in a() */
353	ret
354	SET_SIZE(caller)
355
356#endif	/* __i386 */
357#endif	/* __lint */
358
359/*
360 * if a() calls callee(), callee() returns the
361 * return address in a();
362 */
363
364#if defined(__lint)
365
366caddr_t
367callee(void)
368{ return (0); }
369
370#else	/* __lint */
371
372#if defined(__amd64)
373
374	ENTRY(callee)
375	movq	(%rsp), %rax		/* callee()'s return pc, in a() */
376	ret
377	SET_SIZE(callee)
378
379#elif defined(__i386)
380
381	ENTRY(callee)
382	movl	(%esp), %eax		/* callee()'s return pc, in a() */
383	ret
384	SET_SIZE(callee)
385
386#endif	/* __i386 */
387#endif	/* __lint */
388
389/*
390 * return the current frame pointer
391 */
392
393#if defined(__lint)
394
395greg_t
396getfp(void)
397{ return (0); }
398
399#else	/* __lint */
400
401#if defined(__amd64)
402
403	ENTRY(getfp)
404	movq	%rbp, %rax
405	ret
406	SET_SIZE(getfp)
407
408#elif defined(__i386)
409
410	ENTRY(getfp)
411	movl	%ebp, %eax
412	ret
413	SET_SIZE(getfp)
414
415#endif	/* __i386 */
416#endif	/* __lint */
417
418/*
419 * Invalidate a single page table entry in the TLB
420 */
421
422#if defined(__lint)
423
424/* ARGSUSED */
425void
426mmu_tlbflush_entry(caddr_t m)
427{}
428
429#else	/* __lint */
430
431#if defined(__amd64)
432
433	ENTRY(mmu_tlbflush_entry)
434	invlpg	(%rdi)
435	ret
436	SET_SIZE(mmu_tlbflush_entry)
437
438#elif defined(__i386)
439
440	ENTRY(mmu_tlbflush_entry)
441	movl	4(%esp), %eax
442	invlpg	(%eax)
443	ret
444	SET_SIZE(mmu_tlbflush_entry)
445
446#endif	/* __i386 */
447#endif	/* __lint */
448
449
450/*
451 * Get/Set the value of various control registers
452 */
453
454#if defined(__lint)
455
456ulong_t
457getcr0(void)
458{ return (0); }
459
460/* ARGSUSED */
461void
462setcr0(ulong_t value)
463{}
464
465ulong_t
466getcr2(void)
467{ return (0); }
468
469ulong_t
470getcr3(void)
471{ return (0); }
472
473#if !defined(__xpv)
474/* ARGSUSED */
475void
476setcr3(ulong_t val)
477{}
478
479void
480reload_cr3(void)
481{}
482#endif
483
484ulong_t
485getcr4(void)
486{ return (0); }
487
488/* ARGSUSED */
489void
490setcr4(ulong_t val)
491{}
492
493#if defined(__amd64)
494
495ulong_t
496getcr8(void)
497{ return (0); }
498
499/* ARGSUSED */
500void
501setcr8(ulong_t val)
502{}
503
504#endif	/* __amd64 */
505
506#else	/* __lint */
507
508#if defined(__amd64)
509
510	ENTRY(getcr0)
511	movq	%cr0, %rax
512	ret
513	SET_SIZE(getcr0)
514
515	ENTRY(setcr0)
516	movq	%rdi, %cr0
517	ret
518	SET_SIZE(setcr0)
519
520        ENTRY(getcr2)
521#if defined(__xpv)
522	movq	%gs:CPU_VCPU_INFO, %rax
523	movq	VCPU_INFO_ARCH_CR2(%rax), %rax
524#else
525        movq    %cr2, %rax
526#endif
527        ret
528	SET_SIZE(getcr2)
529
530	ENTRY(getcr3)
531	movq    %cr3, %rax
532	ret
533	SET_SIZE(getcr3)
534
535#if !defined(__xpv)
536
537        ENTRY(setcr3)
538        movq    %rdi, %cr3
539        ret
540	SET_SIZE(setcr3)
541
542	ENTRY(reload_cr3)
543	movq	%cr3, %rdi
544	movq	%rdi, %cr3
545	ret
546	SET_SIZE(reload_cr3)
547
548#endif	/* __xpv */
549
550	ENTRY(getcr4)
551	movq	%cr4, %rax
552	ret
553	SET_SIZE(getcr4)
554
555	ENTRY(setcr4)
556	movq	%rdi, %cr4
557	ret
558	SET_SIZE(setcr4)
559
560	ENTRY(getcr8)
561	movq	%cr8, %rax
562	ret
563	SET_SIZE(getcr8)
564
565	ENTRY(setcr8)
566	movq	%rdi, %cr8
567	ret
568	SET_SIZE(setcr8)
569
570#elif defined(__i386)
571
572        ENTRY(getcr0)
573        movl    %cr0, %eax
574        ret
575	SET_SIZE(getcr0)
576
577        ENTRY(setcr0)
578        movl    4(%esp), %eax
579        movl    %eax, %cr0
580        ret
581	SET_SIZE(setcr0)
582
583        ENTRY(getcr2)
584#if defined(__xpv)
585	movl	%gs:CPU_VCPU_INFO, %eax
586	movl	VCPU_INFO_ARCH_CR2(%eax), %eax
587#else
588        movl    %cr2, %eax
589#endif
590        ret
591	SET_SIZE(getcr2)
592
593	ENTRY(getcr3)
594	movl    %cr3, %eax
595	ret
596	SET_SIZE(getcr3)
597
598#if !defined(__xpv)
599
600        ENTRY(setcr3)
601        movl    4(%esp), %eax
602        movl    %eax, %cr3
603        ret
604	SET_SIZE(setcr3)
605
606	ENTRY(reload_cr3)
607	movl    %cr3, %eax
608	movl    %eax, %cr3
609	ret
610	SET_SIZE(reload_cr3)
611
612#endif	/* __xpv */
613
614	ENTRY(getcr4)
615	movl    %cr4, %eax
616	ret
617	SET_SIZE(getcr4)
618
619        ENTRY(setcr4)
620        movl    4(%esp), %eax
621        movl    %eax, %cr4
622        ret
623	SET_SIZE(setcr4)
624
625#endif	/* __i386 */
626#endif	/* __lint */
627
628#if defined(__lint)
629
630/*ARGSUSED*/
631uint32_t
632__cpuid_insn(struct cpuid_regs *regs)
633{ return (0); }
634
635#else	/* __lint */
636
637#if defined(__amd64)
638
639	ENTRY(__cpuid_insn)
640	movq	%rbx, %r8
641	movq	%rcx, %r9
642	movq	%rdx, %r11
643	movl	(%rdi), %eax		/* %eax = regs->cp_eax */
644	movl	0x4(%rdi), %ebx		/* %ebx = regs->cp_ebx */
645	movl	0x8(%rdi), %ecx		/* %ecx = regs->cp_ecx */
646	movl	0xc(%rdi), %edx		/* %edx = regs->cp_edx */
647	cpuid
648	movl	%eax, (%rdi)		/* regs->cp_eax = %eax */
649	movl	%ebx, 0x4(%rdi)		/* regs->cp_ebx = %ebx */
650	movl	%ecx, 0x8(%rdi)		/* regs->cp_ecx = %ecx */
651	movl	%edx, 0xc(%rdi)		/* regs->cp_edx = %edx */
652	movq	%r8, %rbx
653	movq	%r9, %rcx
654	movq	%r11, %rdx
655	ret
656	SET_SIZE(__cpuid_insn)
657
658#elif defined(__i386)
659
660        ENTRY(__cpuid_insn)
661	pushl	%ebp
662	movl	0x8(%esp), %ebp		/* %ebp = regs */
663	pushl	%ebx
664	pushl	%ecx
665	pushl	%edx
666	movl	(%ebp), %eax		/* %eax = regs->cp_eax */
667	movl	0x4(%ebp), %ebx		/* %ebx = regs->cp_ebx */
668	movl	0x8(%ebp), %ecx		/* %ecx = regs->cp_ecx */
669	movl	0xc(%ebp), %edx		/* %edx = regs->cp_edx */
670	cpuid
671	movl	%eax, (%ebp)		/* regs->cp_eax = %eax */
672	movl	%ebx, 0x4(%ebp)		/* regs->cp_ebx = %ebx */
673	movl	%ecx, 0x8(%ebp)		/* regs->cp_ecx = %ecx */
674	movl	%edx, 0xc(%ebp)		/* regs->cp_edx = %edx */
675	popl	%edx
676	popl	%ecx
677	popl	%ebx
678	popl	%ebp
679	ret
680	SET_SIZE(__cpuid_insn)
681
682#endif	/* __i386 */
683#endif	/* __lint */
684
685#if defined(__xpv)
686	/*
687	 * Defined in C
688	 */
689#else
690
691#if defined(__lint)
692
693/*ARGSUSED*/
694void
695i86_monitor(volatile uint32_t *addr, uint32_t extensions, uint32_t hints)
696{ return; }
697
698#else   /* __lint */
699
700#if defined(__amd64)
701
702	ENTRY_NP(i86_monitor)
703	pushq	%rbp
704	movq	%rsp, %rbp
705	movq	%rdi, %rax		/* addr */
706	movq	%rsi, %rcx		/* extensions */
707	/* rdx contains input arg3: hints */
708	.byte	0x0f, 0x01, 0xc8	/* monitor */
709	leave
710	ret
711	SET_SIZE(i86_monitor)
712
713#elif defined(__i386)
714
715ENTRY_NP(i86_monitor)
716	pushl	%ebp
717	movl	%esp, %ebp
718	movl	0x8(%ebp),%eax		/* addr */
719	movl	0xc(%ebp),%ecx		/* extensions */
720	movl	0x10(%ebp),%edx		/* hints */
721	.byte	0x0f, 0x01, 0xc8	/* monitor */
722	leave
723	ret
724	SET_SIZE(i86_monitor)
725
726#endif	/* __i386 */
727#endif	/* __lint */
728
729#if defined(__lint)
730
731/*ARGSUSED*/
732void
733i86_mwait(uint32_t data, uint32_t extensions)
734{ return; }
735
736#else	/* __lint */
737
738#if defined(__amd64)
739
740	ENTRY_NP(i86_mwait)
741	pushq	%rbp
742	movq	%rsp, %rbp
743	movq	%rdi, %rax		/* data */
744	movq	%rsi, %rcx		/* extensions */
745	.byte	0x0f, 0x01, 0xc9	/* mwait */
746	leave
747	ret
748	SET_SIZE(i86_mwait)
749
750#elif defined(__i386)
751
752	ENTRY_NP(i86_mwait)
753	pushl	%ebp
754	movl	%esp, %ebp
755	movl	0x8(%ebp),%eax		/* data */
756	movl	0xc(%ebp),%ecx		/* extensions */
757	.byte	0x0f, 0x01, 0xc9	/* mwait */
758	leave
759	ret
760	SET_SIZE(i86_mwait)
761
762#endif	/* __i386 */
763#endif	/* __lint */
764
765#if defined(__lint)
766
767hrtime_t
768tsc_read(void)
769{
770	return (0);
771}
772
773#else	/* __lint */
774
775#if defined(__amd64)
776
777	ENTRY_NP(tsc_read)
778	movq	%rbx, %r11
779	movl	$0, %eax
780	cpuid
781	rdtsc
782	movq	%r11, %rbx
783	shlq	$32, %rdx
784	orq	%rdx, %rax
785	ret
786	.globl _tsc_mfence_start
787_tsc_mfence_start:
788	mfence
789	rdtsc
790	shlq	$32, %rdx
791	orq	%rdx, %rax
792	ret
793	.globl _tsc_mfence_end
794_tsc_mfence_end:
795	.globl _tscp_start
796_tscp_start:
797	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
798	shlq	$32, %rdx
799	orq	%rdx, %rax
800	ret
801	.globl _tscp_end
802_tscp_end:
803	.globl _no_rdtsc_start
804_no_rdtsc_start:
805	xorl	%edx, %edx
806	xorl	%eax, %eax
807	ret
808	.globl _no_rdtsc_end
809_no_rdtsc_end:
810	.globl _tsc_lfence_start
811_tsc_lfence_start:
812	lfence
813	rdtsc
814	shlq	$32, %rdx
815	orq	%rdx, %rax
816	ret
817	.globl _tsc_lfence_end
818_tsc_lfence_end:
819	SET_SIZE(tsc_read)
820
821#else /* __i386 */
822
823	ENTRY_NP(tsc_read)
824	pushl	%ebx
825	movl	$0, %eax
826	cpuid
827	rdtsc
828	popl	%ebx
829	ret
830	.globl _tsc_mfence_start
831_tsc_mfence_start:
832	mfence
833	rdtsc
834	ret
835	.globl _tsc_mfence_end
836_tsc_mfence_end:
837	.globl	_tscp_start
838_tscp_start:
839	.byte	0x0f, 0x01, 0xf9	/* rdtscp instruction */
840	ret
841	.globl _tscp_end
842_tscp_end:
843	.globl _no_rdtsc_start
844_no_rdtsc_start:
845	xorl	%edx, %edx
846	xorl	%eax, %eax
847	ret
848	.globl _no_rdtsc_end
849_no_rdtsc_end:
850	.globl _tsc_lfence_start
851_tsc_lfence_start:
852	lfence
853	rdtsc
854	ret
855	.globl _tsc_lfence_end
856_tsc_lfence_end:
857	SET_SIZE(tsc_read)
858
859#endif	/* __i386 */
860
861#endif	/* __lint */
862
863
864#endif	/* __xpv */
865
866#ifdef __lint
867/*
868 * Do not use this function for obtaining clock tick.  This
869 * is called by callers who do not need to have a guarenteed
870 * correct tick value.  The proper routine to use is tsc_read().
871 */
872hrtime_t
873randtick(void)
874{
875	return (0);
876}
877#else
878#if defined(__amd64)
879	ENTRY_NP(randtick)
880	rdtsc
881	shlq    $32, %rdx
882	orq     %rdx, %rax
883	ret
884	SET_SIZE(randtick)
885#else
886	ENTRY_NP(randtick)
887	rdtsc
888	ret
889	SET_SIZE(randtick)
890#endif /* __i386 */
891#endif /* __lint */
892/*
893 * Insert entryp after predp in a doubly linked list.
894 */
895
896#if defined(__lint)
897
898/*ARGSUSED*/
899void
900_insque(caddr_t entryp, caddr_t predp)
901{}
902
903#else	/* __lint */
904
905#if defined(__amd64)
906
907	ENTRY(_insque)
908	movq	(%rsi), %rax		/* predp->forw 			*/
909	movq	%rsi, CPTRSIZE(%rdi)	/* entryp->back = predp		*/
910	movq	%rax, (%rdi)		/* entryp->forw = predp->forw	*/
911	movq	%rdi, (%rsi)		/* predp->forw = entryp		*/
912	movq	%rdi, CPTRSIZE(%rax)	/* predp->forw->back = entryp	*/
913	ret
914	SET_SIZE(_insque)
915
916#elif defined(__i386)
917
918	ENTRY(_insque)
919	movl	8(%esp), %edx
920	movl	4(%esp), %ecx
921	movl	(%edx), %eax		/* predp->forw			*/
922	movl	%edx, CPTRSIZE(%ecx)	/* entryp->back = predp		*/
923	movl	%eax, (%ecx)		/* entryp->forw = predp->forw	*/
924	movl	%ecx, (%edx)		/* predp->forw = entryp		*/
925	movl	%ecx, CPTRSIZE(%eax)	/* predp->forw->back = entryp	*/
926	ret
927	SET_SIZE(_insque)
928
929#endif	/* __i386 */
930#endif	/* __lint */
931
932/*
933 * Remove entryp from a doubly linked list
934 */
935
936#if defined(__lint)
937
938/*ARGSUSED*/
939void
940_remque(caddr_t entryp)
941{}
942
943#else	/* __lint */
944
945#if defined(__amd64)
946
947	ENTRY(_remque)
948	movq	(%rdi), %rax		/* entry->forw */
949	movq	CPTRSIZE(%rdi), %rdx	/* entry->back */
950	movq	%rax, (%rdx)		/* entry->back->forw = entry->forw */
951	movq	%rdx, CPTRSIZE(%rax)	/* entry->forw->back = entry->back */
952	ret
953	SET_SIZE(_remque)
954
955#elif defined(__i386)
956
957	ENTRY(_remque)
958	movl	4(%esp), %ecx
959	movl	(%ecx), %eax		/* entry->forw */
960	movl	CPTRSIZE(%ecx), %edx	/* entry->back */
961	movl	%eax, (%edx)		/* entry->back->forw = entry->forw */
962	movl	%edx, CPTRSIZE(%eax)	/* entry->forw->back = entry->back */
963	ret
964	SET_SIZE(_remque)
965
966#endif	/* __i386 */
967#endif	/* __lint */
968
969/*
970 * Returns the number of
971 * non-NULL bytes in string argument.
972 */
973
974#if defined(__lint)
975
976/* ARGSUSED */
977size_t
978strlen(const char *str)
979{ return (0); }
980
981#else	/* __lint */
982
983#if defined(__amd64)
984
985/*
986 * This is close to a simple transliteration of a C version of this
987 * routine.  We should either just -make- this be a C version, or
988 * justify having it in assembler by making it significantly faster.
989 *
990 * size_t
991 * strlen(const char *s)
992 * {
993 *	const char *s0;
994 * #if defined(DEBUG)
995 *	if ((uintptr_t)s < KERNELBASE)
996 *		panic(.str_panic_msg);
997 * #endif
998 *	for (s0 = s; *s; s++)
999 *		;
1000 *	return (s - s0);
1001 * }
1002 */
1003
1004	ENTRY(strlen)
1005#ifdef DEBUG
1006	movq	postbootkernelbase(%rip), %rax
1007	cmpq	%rax, %rdi
1008	jae	str_valid
1009	pushq	%rbp
1010	movq	%rsp, %rbp
1011	leaq	.str_panic_msg(%rip), %rdi
1012	xorl	%eax, %eax
1013	call	panic
1014#endif	/* DEBUG */
1015str_valid:
1016	cmpb	$0, (%rdi)
1017	movq	%rdi, %rax
1018	je	.null_found
1019	.align	4
1020.strlen_loop:
1021	incq	%rdi
1022	cmpb	$0, (%rdi)
1023	jne	.strlen_loop
1024.null_found:
1025	subq	%rax, %rdi
1026	movq	%rdi, %rax
1027	ret
1028	SET_SIZE(strlen)
1029
1030#elif defined(__i386)
1031
1032	ENTRY(strlen)
1033#ifdef DEBUG
1034	movl	postbootkernelbase, %eax
1035	cmpl	%eax, 4(%esp)
1036	jae	str_valid
1037	pushl	%ebp
1038	movl	%esp, %ebp
1039	pushl	$.str_panic_msg
1040	call	panic
1041#endif /* DEBUG */
1042
1043str_valid:
1044	movl	4(%esp), %eax		/* %eax = string address */
1045	testl	$3, %eax		/* if %eax not word aligned */
1046	jnz	.not_word_aligned	/* goto .not_word_aligned */
1047	.align	4
1048.word_aligned:
1049	movl	(%eax), %edx		/* move 1 word from (%eax) to %edx */
1050	movl	$0x7f7f7f7f, %ecx
1051	andl	%edx, %ecx		/* %ecx = %edx & 0x7f7f7f7f */
1052	addl	$4, %eax		/* next word */
1053	addl	$0x7f7f7f7f, %ecx	/* %ecx += 0x7f7f7f7f */
1054	orl	%edx, %ecx		/* %ecx |= %edx */
1055	andl	$0x80808080, %ecx	/* %ecx &= 0x80808080 */
1056	cmpl	$0x80808080, %ecx	/* if no null byte in this word */
1057	je	.word_aligned		/* goto .word_aligned */
1058	subl	$4, %eax		/* post-incremented */
1059.not_word_aligned:
1060	cmpb	$0, (%eax)		/* if a byte in (%eax) is null */
1061	je	.null_found		/* goto .null_found */
1062	incl	%eax			/* next byte */
1063	testl	$3, %eax		/* if %eax not word aligned */
1064	jnz	.not_word_aligned	/* goto .not_word_aligned */
1065	jmp	.word_aligned		/* goto .word_aligned */
1066	.align	4
1067.null_found:
1068	subl	4(%esp), %eax		/* %eax -= string address */
1069	ret
1070	SET_SIZE(strlen)
1071
1072#endif	/* __i386 */
1073
1074#ifdef DEBUG
1075	.text
1076.str_panic_msg:
1077	.string "strlen: argument below kernelbase"
1078#endif /* DEBUG */
1079
1080#endif	/* __lint */
1081
1082	/*
1083	 * Berkley 4.3 introduced symbolically named interrupt levels
1084	 * as a way deal with priority in a machine independent fashion.
1085	 * Numbered priorities are machine specific, and should be
1086	 * discouraged where possible.
1087	 *
1088	 * Note, for the machine specific priorities there are
1089	 * examples listed for devices that use a particular priority.
1090	 * It should not be construed that all devices of that
1091	 * type should be at that priority.  It is currently were
1092	 * the current devices fit into the priority scheme based
1093	 * upon time criticalness.
1094	 *
1095	 * The underlying assumption of these assignments is that
1096	 * IPL 10 is the highest level from which a device
1097	 * routine can call wakeup.  Devices that interrupt from higher
1098	 * levels are restricted in what they can do.  If they need
1099	 * kernels services they should schedule a routine at a lower
1100	 * level (via software interrupt) to do the required
1101	 * processing.
1102	 *
1103	 * Examples of this higher usage:
1104	 *	Level	Usage
1105	 *	14	Profiling clock (and PROM uart polling clock)
1106	 *	12	Serial ports
1107	 *
1108	 * The serial ports request lower level processing on level 6.
1109	 *
1110	 * Also, almost all splN routines (where N is a number or a
1111	 * mnemonic) will do a RAISE(), on the assumption that they are
1112	 * never used to lower our priority.
1113	 * The exceptions are:
1114	 *	spl8()		Because you can't be above 15 to begin with!
1115	 *	splzs()		Because this is used at boot time to lower our
1116	 *			priority, to allow the PROM to poll the uart.
1117	 *	spl0()		Used to lower priority to 0.
1118	 */
1119
1120#if defined(__lint)
1121
1122int spl0(void)		{ return (0); }
1123int spl6(void)		{ return (0); }
1124int spl7(void)		{ return (0); }
1125int spl8(void)		{ return (0); }
1126int splhigh(void)	{ return (0); }
1127int splhi(void)		{ return (0); }
1128int splzs(void)		{ return (0); }
1129
1130/* ARGSUSED */
1131void
1132splx(int level)
1133{}
1134
1135#else	/* __lint */
1136
1137#if defined(__amd64)
1138
1139#define	SETPRI(level) \
1140	movl	$/**/level, %edi;	/* new priority */		\
1141	jmp	do_splx			/* redirect to do_splx */
1142
1143#define	RAISE(level) \
1144	movl	$/**/level, %edi;	/* new priority */		\
1145	jmp	splr			/* redirect to splr */
1146
1147#elif defined(__i386)
1148
1149#define	SETPRI(level) \
1150	pushl	$/**/level;	/* new priority */			\
1151	call	do_splx;	/* invoke common splx code */		\
1152	addl	$4, %esp;	/* unstack arg */			\
1153	ret
1154
1155#define	RAISE(level) \
1156	pushl	$/**/level;	/* new priority */			\
1157	call	splr;		/* invoke common splr code */		\
1158	addl	$4, %esp;	/* unstack args */			\
1159	ret
1160
1161#endif	/* __i386 */
1162
1163	/* locks out all interrupts, including memory errors */
1164	ENTRY(spl8)
1165	SETPRI(15)
1166	SET_SIZE(spl8)
1167
1168	/* just below the level that profiling runs */
1169	ENTRY(spl7)
1170	RAISE(13)
1171	SET_SIZE(spl7)
1172
1173	/* sun specific - highest priority onboard serial i/o asy ports */
1174	ENTRY(splzs)
1175	SETPRI(12)	/* Can't be a RAISE, as it's used to lower us */
1176	SET_SIZE(splzs)
1177
1178	ENTRY(splhi)
1179	ALTENTRY(splhigh)
1180	ALTENTRY(spl6)
1181	ALTENTRY(i_ddi_splhigh)
1182
1183	RAISE(DISP_LEVEL)
1184
1185	SET_SIZE(i_ddi_splhigh)
1186	SET_SIZE(spl6)
1187	SET_SIZE(splhigh)
1188	SET_SIZE(splhi)
1189
1190	/* allow all interrupts */
1191	ENTRY(spl0)
1192	SETPRI(0)
1193	SET_SIZE(spl0)
1194
1195
1196	/* splx implentation */
1197	ENTRY(splx)
1198	jmp	do_splx		/* redirect to common splx code */
1199	SET_SIZE(splx)
1200
1201#endif	/* __lint */
1202
1203#if defined(__i386)
1204
1205/*
1206 * Read and write the %gs register
1207 */
1208
1209#if defined(__lint)
1210
1211/*ARGSUSED*/
1212uint16_t
1213getgs(void)
1214{ return (0); }
1215
1216/*ARGSUSED*/
1217void
1218setgs(uint16_t sel)
1219{}
1220
1221#else	/* __lint */
1222
1223	ENTRY(getgs)
1224	clr	%eax
1225	movw	%gs, %ax
1226	ret
1227	SET_SIZE(getgs)
1228
1229	ENTRY(setgs)
1230	movw	4(%esp), %gs
1231	ret
1232	SET_SIZE(setgs)
1233
1234#endif	/* __lint */
1235#endif	/* __i386 */
1236
1237#if defined(__lint)
1238
1239void
1240pc_reset(void)
1241{}
1242
1243void
1244efi_reset(void)
1245{}
1246
1247#else	/* __lint */
1248
1249	ENTRY(wait_500ms)
1250	push	%ebx
1251	movl	$50000, %ebx
12521:
1253	call	tenmicrosec
1254	decl	%ebx
1255	jnz	1b
1256	pop	%ebx
1257	ret
1258	SET_SIZE(wait_500ms)
1259
1260#define	RESET_METHOD_KBC	1
1261#define	RESET_METHOD_PORT92	2
1262#define RESET_METHOD_PCI	4
1263
1264	DGDEF3(pc_reset_methods, 4, 8)
1265	.long RESET_METHOD_KBC|RESET_METHOD_PORT92|RESET_METHOD_PCI;
1266
1267	ENTRY(pc_reset)
1268
1269#if defined(__i386)
1270	testl	$RESET_METHOD_KBC, pc_reset_methods
1271#elif defined(__amd64)
1272	testl	$RESET_METHOD_KBC, pc_reset_methods(%rip)
1273#endif
1274	jz	1f
1275
1276	/
1277	/ Try the classic keyboard controller-triggered reset.
1278	/
1279	movw	$0x64, %dx
1280	movb	$0xfe, %al
1281	outb	(%dx)
1282
1283	/ Wait up to 500 milliseconds here for the keyboard controller
1284	/ to pull the reset line.  On some systems where the keyboard
1285	/ controller is slow to pull the reset line, the next reset method
1286	/ may be executed (which may be bad if those systems hang when the
1287	/ next reset method is used, e.g. Ferrari 3400 (doesn't like port 92),
1288	/ and Ferrari 4000 (doesn't like the cf9 reset method))
1289
1290	call	wait_500ms
1291
12921:
1293#if defined(__i386)
1294	testl	$RESET_METHOD_PORT92, pc_reset_methods
1295#elif defined(__amd64)
1296	testl	$RESET_METHOD_PORT92, pc_reset_methods(%rip)
1297#endif
1298	jz	3f
1299
1300	/
1301	/ Try port 0x92 fast reset
1302	/
1303	movw	$0x92, %dx
1304	inb	(%dx)
1305	cmpb	$0xff, %al	/ If port's not there, we should get back 0xFF
1306	je	1f
1307	testb	$1, %al		/ If bit 0
1308	jz	2f		/ is clear, jump to perform the reset
1309	andb	$0xfe, %al	/ otherwise,
1310	outb	(%dx)		/ clear bit 0 first, then
13112:
1312	orb	$1, %al		/ Set bit 0
1313	outb	(%dx)		/ and reset the system
13141:
1315
1316	call	wait_500ms
1317
13183:
1319#if defined(__i386)
1320	testl	$RESET_METHOD_PCI, pc_reset_methods
1321#elif defined(__amd64)
1322	testl	$RESET_METHOD_PCI, pc_reset_methods(%rip)
1323#endif
1324	jz	4f
1325
1326	/ Try the PCI (soft) reset vector (should work on all modern systems,
1327	/ but has been shown to cause problems on 450NX systems, and some newer
1328	/ systems (e.g. ATI IXP400-equipped systems))
1329	/ When resetting via this method, 2 writes are required.  The first
1330	/ targets bit 1 (0=hard reset without power cycle, 1=hard reset with
1331	/ power cycle).
1332	/ The reset occurs on the second write, during bit 2's transition from
1333	/ 0->1.
1334	movw	$0xcf9, %dx
1335	movb	$0x2, %al	/ Reset mode = hard, no power cycle
1336	outb	(%dx)
1337	movb	$0x6, %al
1338	outb	(%dx)
1339
1340	call	wait_500ms
1341
13424:
1343	/
1344	/ port 0xcf9 failed also.  Last-ditch effort is to
1345	/ triple-fault the CPU.
1346	/ Also, use triple fault for EFI firmware
1347	/
1348	ENTRY(efi_reset)
1349#if defined(__amd64)
1350	pushq	$0x0
1351	pushq	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1352	lidt	(%rsp)
1353#elif defined(__i386)
1354	pushl	$0x0
1355	pushl	$0x0		/ IDT base of 0, limit of 0 + 2 unused bytes
1356	lidt	(%esp)
1357#endif
1358	int	$0x0		/ Trigger interrupt, generate triple-fault
1359
1360	cli
1361	hlt			/ Wait forever
1362	/*NOTREACHED*/
1363	SET_SIZE(efi_reset)
1364	SET_SIZE(pc_reset)
1365
1366#endif	/* __lint */
1367
1368/*
1369 * C callable in and out routines
1370 */
1371
1372#if defined(__lint)
1373
1374/* ARGSUSED */
1375void
1376outl(int port_address, uint32_t val)
1377{}
1378
1379#else	/* __lint */
1380
1381#if defined(__amd64)
1382
1383	ENTRY(outl)
1384	movw	%di, %dx
1385	movl	%esi, %eax
1386	outl	(%dx)
1387	ret
1388	SET_SIZE(outl)
1389
1390#elif defined(__i386)
1391
1392	.set	PORT, 4
1393	.set	VAL, 8
1394
1395	ENTRY(outl)
1396	movw	PORT(%esp), %dx
1397	movl	VAL(%esp), %eax
1398	outl	(%dx)
1399	ret
1400	SET_SIZE(outl)
1401
1402#endif	/* __i386 */
1403#endif	/* __lint */
1404
1405#if defined(__lint)
1406
1407/* ARGSUSED */
1408void
1409outw(int port_address, uint16_t val)
1410{}
1411
1412#else	/* __lint */
1413
1414#if defined(__amd64)
1415
1416	ENTRY(outw)
1417	movw	%di, %dx
1418	movw	%si, %ax
1419	D16 outl (%dx)		/* XX64 why not outw? */
1420	ret
1421	SET_SIZE(outw)
1422
1423#elif defined(__i386)
1424
1425	ENTRY(outw)
1426	movw	PORT(%esp), %dx
1427	movw	VAL(%esp), %ax
1428	D16 outl (%dx)
1429	ret
1430	SET_SIZE(outw)
1431
1432#endif	/* __i386 */
1433#endif	/* __lint */
1434
1435#if defined(__lint)
1436
1437/* ARGSUSED */
1438void
1439outb(int port_address, uint8_t val)
1440{}
1441
1442#else	/* __lint */
1443
1444#if defined(__amd64)
1445
1446	ENTRY(outb)
1447	movw	%di, %dx
1448	movb	%sil, %al
1449	outb	(%dx)
1450	ret
1451	SET_SIZE(outb)
1452
1453#elif defined(__i386)
1454
1455	ENTRY(outb)
1456	movw	PORT(%esp), %dx
1457	movb	VAL(%esp), %al
1458	outb	(%dx)
1459	ret
1460	SET_SIZE(outb)
1461
1462#endif	/* __i386 */
1463#endif	/* __lint */
1464
1465#if defined(__lint)
1466
1467/* ARGSUSED */
1468uint32_t
1469inl(int port_address)
1470{ return (0); }
1471
1472#else	/* __lint */
1473
1474#if defined(__amd64)
1475
1476	ENTRY(inl)
1477	xorl	%eax, %eax
1478	movw	%di, %dx
1479	inl	(%dx)
1480	ret
1481	SET_SIZE(inl)
1482
1483#elif defined(__i386)
1484
1485	ENTRY(inl)
1486	movw	PORT(%esp), %dx
1487	inl	(%dx)
1488	ret
1489	SET_SIZE(inl)
1490
1491#endif	/* __i386 */
1492#endif	/* __lint */
1493
1494#if defined(__lint)
1495
1496/* ARGSUSED */
1497uint16_t
1498inw(int port_address)
1499{ return (0); }
1500
1501#else	/* __lint */
1502
1503#if defined(__amd64)
1504
1505	ENTRY(inw)
1506	xorl	%eax, %eax
1507	movw	%di, %dx
1508	D16 inl	(%dx)
1509	ret
1510	SET_SIZE(inw)
1511
1512#elif defined(__i386)
1513
1514	ENTRY(inw)
1515	subl	%eax, %eax
1516	movw	PORT(%esp), %dx
1517	D16 inl	(%dx)
1518	ret
1519	SET_SIZE(inw)
1520
1521#endif	/* __i386 */
1522#endif	/* __lint */
1523
1524
1525#if defined(__lint)
1526
1527/* ARGSUSED */
1528uint8_t
1529inb(int port_address)
1530{ return (0); }
1531
1532#else	/* __lint */
1533
1534#if defined(__amd64)
1535
1536	ENTRY(inb)
1537	xorl	%eax, %eax
1538	movw	%di, %dx
1539	inb	(%dx)
1540	ret
1541	SET_SIZE(inb)
1542
1543#elif defined(__i386)
1544
1545	ENTRY(inb)
1546	subl    %eax, %eax
1547	movw	PORT(%esp), %dx
1548	inb	(%dx)
1549	ret
1550	SET_SIZE(inb)
1551
1552#endif	/* __i386 */
1553#endif	/* __lint */
1554
1555
1556#if defined(__lint)
1557
1558/* ARGSUSED */
1559void
1560repoutsw(int port, uint16_t *addr, int cnt)
1561{}
1562
1563#else	/* __lint */
1564
1565#if defined(__amd64)
1566
1567	ENTRY(repoutsw)
1568	movl	%edx, %ecx
1569	movw	%di, %dx
1570	rep
1571	  D16 outsl
1572	ret
1573	SET_SIZE(repoutsw)
1574
1575#elif defined(__i386)
1576
1577	/*
1578	 * The arguments and saved registers are on the stack in the
1579	 *  following order:
1580	 *      |  cnt  |  +16
1581	 *      | *addr |  +12
1582	 *      | port  |  +8
1583	 *      |  eip  |  +4
1584	 *      |  esi  |  <-- %esp
1585	 * If additional values are pushed onto the stack, make sure
1586	 * to adjust the following constants accordingly.
1587	 */
1588	.set	PORT, 8
1589	.set	ADDR, 12
1590	.set	COUNT, 16
1591
1592	ENTRY(repoutsw)
1593	pushl	%esi
1594	movl	PORT(%esp), %edx
1595	movl	ADDR(%esp), %esi
1596	movl	COUNT(%esp), %ecx
1597	rep
1598	  D16 outsl
1599	popl	%esi
1600	ret
1601	SET_SIZE(repoutsw)
1602
1603#endif	/* __i386 */
1604#endif	/* __lint */
1605
1606
1607#if defined(__lint)
1608
1609/* ARGSUSED */
1610void
1611repinsw(int port_addr, uint16_t *addr, int cnt)
1612{}
1613
1614#else	/* __lint */
1615
1616#if defined(__amd64)
1617
1618	ENTRY(repinsw)
1619	movl	%edx, %ecx
1620	movw	%di, %dx
1621	rep
1622	  D16 insl
1623	ret
1624	SET_SIZE(repinsw)
1625
1626#elif defined(__i386)
1627
1628	ENTRY(repinsw)
1629	pushl	%edi
1630	movl	PORT(%esp), %edx
1631	movl	ADDR(%esp), %edi
1632	movl	COUNT(%esp), %ecx
1633	rep
1634	  D16 insl
1635	popl	%edi
1636	ret
1637	SET_SIZE(repinsw)
1638
1639#endif	/* __i386 */
1640#endif	/* __lint */
1641
1642
1643#if defined(__lint)
1644
1645/* ARGSUSED */
1646void
1647repinsb(int port, uint8_t *addr, int count)
1648{}
1649
1650#else	/* __lint */
1651
1652#if defined(__amd64)
1653
1654	ENTRY(repinsb)
1655	movl	%edx, %ecx
1656	movw	%di, %dx
1657	movq	%rsi, %rdi
1658	rep
1659	  insb
1660	ret
1661	SET_SIZE(repinsb)
1662
1663#elif defined(__i386)
1664
1665	/*
1666	 * The arguments and saved registers are on the stack in the
1667	 *  following order:
1668	 *      |  cnt  |  +16
1669	 *      | *addr |  +12
1670	 *      | port  |  +8
1671	 *      |  eip  |  +4
1672	 *      |  esi  |  <-- %esp
1673	 * If additional values are pushed onto the stack, make sure
1674	 * to adjust the following constants accordingly.
1675	 */
1676	.set	IO_PORT, 8
1677	.set	IO_ADDR, 12
1678	.set	IO_COUNT, 16
1679
1680	ENTRY(repinsb)
1681	pushl	%edi
1682	movl	IO_ADDR(%esp), %edi
1683	movl	IO_COUNT(%esp), %ecx
1684	movl	IO_PORT(%esp), %edx
1685	rep
1686	  insb
1687	popl	%edi
1688	ret
1689	SET_SIZE(repinsb)
1690
1691#endif	/* __i386 */
1692#endif	/* __lint */
1693
1694
1695/*
1696 * Input a stream of 32-bit words.
1697 * NOTE: count is a DWORD count.
1698 */
1699#if defined(__lint)
1700
1701/* ARGSUSED */
1702void
1703repinsd(int port, uint32_t *addr, int count)
1704{}
1705
1706#else	/* __lint */
1707
1708#if defined(__amd64)
1709
1710	ENTRY(repinsd)
1711	movl	%edx, %ecx
1712	movw	%di, %dx
1713	movq	%rsi, %rdi
1714	rep
1715	  insl
1716	ret
1717	SET_SIZE(repinsd)
1718
1719#elif defined(__i386)
1720
1721	ENTRY(repinsd)
1722	pushl	%edi
1723	movl	IO_ADDR(%esp), %edi
1724	movl	IO_COUNT(%esp), %ecx
1725	movl	IO_PORT(%esp), %edx
1726	rep
1727	  insl
1728	popl	%edi
1729	ret
1730	SET_SIZE(repinsd)
1731
1732#endif	/* __i386 */
1733#endif	/* __lint */
1734
1735/*
1736 * Output a stream of bytes
1737 * NOTE: count is a byte count
1738 */
1739#if defined(__lint)
1740
1741/* ARGSUSED */
1742void
1743repoutsb(int port, uint8_t *addr, int count)
1744{}
1745
1746#else	/* __lint */
1747
1748#if defined(__amd64)
1749
1750	ENTRY(repoutsb)
1751	movl	%edx, %ecx
1752	movw	%di, %dx
1753	rep
1754	  outsb
1755	ret
1756	SET_SIZE(repoutsb)
1757
1758#elif defined(__i386)
1759
1760	ENTRY(repoutsb)
1761	pushl	%esi
1762	movl	IO_ADDR(%esp), %esi
1763	movl	IO_COUNT(%esp), %ecx
1764	movl	IO_PORT(%esp), %edx
1765	rep
1766	  outsb
1767	popl	%esi
1768	ret
1769	SET_SIZE(repoutsb)
1770
1771#endif	/* __i386 */
1772#endif	/* __lint */
1773
1774/*
1775 * Output a stream of 32-bit words
1776 * NOTE: count is a DWORD count
1777 */
1778#if defined(__lint)
1779
1780/* ARGSUSED */
1781void
1782repoutsd(int port, uint32_t *addr, int count)
1783{}
1784
1785#else	/* __lint */
1786
1787#if defined(__amd64)
1788
1789	ENTRY(repoutsd)
1790	movl	%edx, %ecx
1791	movw	%di, %dx
1792	rep
1793	  outsl
1794	ret
1795	SET_SIZE(repoutsd)
1796
1797#elif defined(__i386)
1798
1799	ENTRY(repoutsd)
1800	pushl	%esi
1801	movl	IO_ADDR(%esp), %esi
1802	movl	IO_COUNT(%esp), %ecx
1803	movl	IO_PORT(%esp), %edx
1804	rep
1805	  outsl
1806	popl	%esi
1807	ret
1808	SET_SIZE(repoutsd)
1809
1810#endif	/* __i386 */
1811#endif	/* __lint */
1812
1813/*
1814 * void int3(void)
1815 * void int18(void)
1816 * void int20(void)
1817 * void int_cmci(void)
1818 */
1819
1820#if defined(__lint)
1821
1822void
1823int3(void)
1824{}
1825
1826void
1827int18(void)
1828{}
1829
1830void
1831int20(void)
1832{}
1833
1834void
1835int_cmci(void)
1836{}
1837
1838#else	/* __lint */
1839
1840	ENTRY(int3)
1841	int	$T_BPTFLT
1842	ret
1843	SET_SIZE(int3)
1844
1845	ENTRY(int18)
1846	int	$T_MCE
1847	ret
1848	SET_SIZE(int18)
1849
1850	ENTRY(int20)
1851	movl	boothowto, %eax
1852	andl	$RB_DEBUG, %eax
1853	jz	1f
1854
1855	int	$T_DBGENTR
18561:
1857	rep;	ret	/* use 2 byte return instruction when branch target */
1858			/* AMD Software Optimization Guide - Section 6.2 */
1859	SET_SIZE(int20)
1860
1861	ENTRY(int_cmci)
1862	int	$T_ENOEXTFLT
1863	ret
1864	SET_SIZE(int_cmci)
1865
1866#endif	/* __lint */
1867
1868#if defined(__lint)
1869
1870/* ARGSUSED */
1871int
1872scanc(size_t size, uchar_t *cp, uchar_t *table, uchar_t mask)
1873{ return (0); }
1874
1875#else	/* __lint */
1876
1877#if defined(__amd64)
1878
1879	ENTRY(scanc)
1880					/* rdi == size */
1881					/* rsi == cp */
1882					/* rdx == table */
1883					/* rcx == mask */
1884	addq	%rsi, %rdi		/* end = &cp[size] */
1885.scanloop:
1886	cmpq	%rdi, %rsi		/* while (cp < end */
1887	jnb	.scandone
1888	movzbq	(%rsi), %r8		/* %r8 = *cp */
1889	incq	%rsi			/* cp++ */
1890	testb	%cl, (%r8, %rdx)
1891	jz	.scanloop		/*  && (table[*cp] & mask) == 0) */
1892	decq	%rsi			/* (fix post-increment) */
1893.scandone:
1894	movl	%edi, %eax
1895	subl	%esi, %eax		/* return (end - cp) */
1896	ret
1897	SET_SIZE(scanc)
1898
1899#elif defined(__i386)
1900
1901	ENTRY(scanc)
1902	pushl	%edi
1903	pushl	%esi
1904	movb	24(%esp), %cl		/* mask = %cl */
1905	movl	16(%esp), %esi		/* cp = %esi */
1906	movl	20(%esp), %edx		/* table = %edx */
1907	movl	%esi, %edi
1908	addl	12(%esp), %edi		/* end = &cp[size]; */
1909.scanloop:
1910	cmpl	%edi, %esi		/* while (cp < end */
1911	jnb	.scandone
1912	movzbl	(%esi),  %eax		/* %al = *cp */
1913	incl	%esi			/* cp++ */
1914	movb	(%edx,  %eax), %al	/* %al = table[*cp] */
1915	testb	%al, %cl
1916	jz	.scanloop		/*   && (table[*cp] & mask) == 0) */
1917	dec	%esi			/* post-incremented */
1918.scandone:
1919	movl	%edi, %eax
1920	subl	%esi, %eax		/* return (end - cp) */
1921	popl	%esi
1922	popl	%edi
1923	ret
1924	SET_SIZE(scanc)
1925
1926#endif	/* __i386 */
1927#endif	/* __lint */
1928
1929/*
1930 * Replacement functions for ones that are normally inlined.
1931 * In addition to the copy in i86.il, they are defined here just in case.
1932 */
1933
1934#if defined(__lint)
1935
1936ulong_t
1937intr_clear(void)
1938{ return (0); }
1939
1940ulong_t
1941clear_int_flag(void)
1942{ return (0); }
1943
1944#else	/* __lint */
1945
1946#if defined(__amd64)
1947
1948	ENTRY(intr_clear)
1949	ENTRY(clear_int_flag)
1950	pushfq
1951	popq	%rax
1952#if defined(__xpv)
1953	leaq	xpv_panicking, %rdi
1954	movl	(%rdi), %edi
1955	cmpl	$0, %edi
1956	jne	2f
1957	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
1958	/*
1959	 * Synthesize the PS_IE bit from the event mask bit
1960	 */
1961	andq    $_BITNOT(PS_IE), %rax
1962	testb	$1, %dl
1963	jnz	1f
1964	orq	$PS_IE, %rax
19651:
1966	ret
19672:
1968#endif
1969	CLI(%rdi)
1970	ret
1971	SET_SIZE(clear_int_flag)
1972	SET_SIZE(intr_clear)
1973
1974#elif defined(__i386)
1975
1976	ENTRY(intr_clear)
1977	ENTRY(clear_int_flag)
1978	pushfl
1979	popl	%eax
1980#if defined(__xpv)
1981	leal	xpv_panicking, %edx
1982	movl	(%edx), %edx
1983	cmpl	$0, %edx
1984	jne	2f
1985	CLIRET(%edx, %cl)	/* returns event mask in %cl */
1986	/*
1987	 * Synthesize the PS_IE bit from the event mask bit
1988	 */
1989	andl    $_BITNOT(PS_IE), %eax
1990	testb	$1, %cl
1991	jnz	1f
1992	orl	$PS_IE, %eax
19931:
1994	ret
19952:
1996#endif
1997	CLI(%edx)
1998	ret
1999	SET_SIZE(clear_int_flag)
2000	SET_SIZE(intr_clear)
2001
2002#endif	/* __i386 */
2003#endif	/* __lint */
2004
2005#if defined(__lint)
2006
2007struct cpu *
2008curcpup(void)
2009{ return 0; }
2010
2011#else	/* __lint */
2012
2013#if defined(__amd64)
2014
2015	ENTRY(curcpup)
2016	movq	%gs:CPU_SELF, %rax
2017	ret
2018	SET_SIZE(curcpup)
2019
2020#elif defined(__i386)
2021
2022	ENTRY(curcpup)
2023	movl	%gs:CPU_SELF, %eax
2024	ret
2025	SET_SIZE(curcpup)
2026
2027#endif	/* __i386 */
2028#endif	/* __lint */
2029
2030#if defined(__lint)
2031
2032/* ARGSUSED */
2033uint32_t
2034htonl(uint32_t i)
2035{ return (0); }
2036
2037/* ARGSUSED */
2038uint32_t
2039ntohl(uint32_t i)
2040{ return (0); }
2041
2042#else	/* __lint */
2043
2044#if defined(__amd64)
2045
2046	/* XX64 there must be shorter sequences for this */
2047	ENTRY(htonl)
2048	ALTENTRY(ntohl)
2049	movl	%edi, %eax
2050	bswap	%eax
2051	ret
2052	SET_SIZE(ntohl)
2053	SET_SIZE(htonl)
2054
2055#elif defined(__i386)
2056
2057	ENTRY(htonl)
2058	ALTENTRY(ntohl)
2059	movl	4(%esp), %eax
2060	bswap	%eax
2061	ret
2062	SET_SIZE(ntohl)
2063	SET_SIZE(htonl)
2064
2065#endif	/* __i386 */
2066#endif	/* __lint */
2067
2068#if defined(__lint)
2069
2070/* ARGSUSED */
2071uint16_t
2072htons(uint16_t i)
2073{ return (0); }
2074
2075/* ARGSUSED */
2076uint16_t
2077ntohs(uint16_t i)
2078{ return (0); }
2079
2080
2081#else	/* __lint */
2082
2083#if defined(__amd64)
2084
2085	/* XX64 there must be better sequences for this */
2086	ENTRY(htons)
2087	ALTENTRY(ntohs)
2088	movl	%edi, %eax
2089	bswap	%eax
2090	shrl	$16, %eax
2091	ret
2092	SET_SIZE(ntohs)
2093	SET_SIZE(htons)
2094
2095#elif defined(__i386)
2096
2097	ENTRY(htons)
2098	ALTENTRY(ntohs)
2099	movl	4(%esp), %eax
2100	bswap	%eax
2101	shrl	$16, %eax
2102	ret
2103	SET_SIZE(ntohs)
2104	SET_SIZE(htons)
2105
2106#endif	/* __i386 */
2107#endif	/* __lint */
2108
2109
2110#if defined(__lint)
2111
2112/* ARGSUSED */
2113void
2114intr_restore(ulong_t i)
2115{ return; }
2116
2117/* ARGSUSED */
2118void
2119restore_int_flag(ulong_t i)
2120{ return; }
2121
2122#else	/* __lint */
2123
2124#if defined(__amd64)
2125
2126	ENTRY(intr_restore)
2127	ENTRY(restore_int_flag)
2128	testq	$PS_IE, %rdi
2129	jz	1f
2130#if defined(__xpv)
2131	leaq	xpv_panicking, %rsi
2132	movl	(%rsi), %esi
2133	cmpl	$0, %esi
2134	jne	1f
2135	/*
2136	 * Since we're -really- running unprivileged, our attempt
2137	 * to change the state of the IF bit will be ignored.
2138	 * The virtual IF bit is tweaked by CLI and STI.
2139	 */
2140	IE_TO_EVENT_MASK(%rsi, %rdi)
2141#else
2142	sti
2143#endif
21441:
2145	ret
2146	SET_SIZE(restore_int_flag)
2147	SET_SIZE(intr_restore)
2148
2149#elif defined(__i386)
2150
2151	ENTRY(intr_restore)
2152	ENTRY(restore_int_flag)
2153	testl	$PS_IE, 4(%esp)
2154	jz	1f
2155#if defined(__xpv)
2156	leal	xpv_panicking, %edx
2157	movl	(%edx), %edx
2158	cmpl	$0, %edx
2159	jne	1f
2160	/*
2161	 * Since we're -really- running unprivileged, our attempt
2162	 * to change the state of the IF bit will be ignored.
2163	 * The virtual IF bit is tweaked by CLI and STI.
2164	 */
2165	IE_TO_EVENT_MASK(%edx, 4(%esp))
2166#else
2167	sti
2168#endif
21691:
2170	ret
2171	SET_SIZE(restore_int_flag)
2172	SET_SIZE(intr_restore)
2173
2174#endif	/* __i386 */
2175#endif	/* __lint */
2176
2177#if defined(__lint)
2178
2179void
2180sti(void)
2181{}
2182
2183void
2184cli(void)
2185{}
2186
2187#else	/* __lint */
2188
2189	ENTRY(sti)
2190	STI
2191	ret
2192	SET_SIZE(sti)
2193
2194	ENTRY(cli)
2195#if defined(__amd64)
2196	CLI(%rax)
2197#elif defined(__i386)
2198	CLI(%eax)
2199#endif	/* __i386 */
2200	ret
2201	SET_SIZE(cli)
2202
2203#endif	/* __lint */
2204
2205#if defined(__lint)
2206
2207dtrace_icookie_t
2208dtrace_interrupt_disable(void)
2209{ return (0); }
2210
2211#else   /* __lint */
2212
2213#if defined(__amd64)
2214
2215	ENTRY(dtrace_interrupt_disable)
2216	pushfq
2217	popq	%rax
2218#if defined(__xpv)
2219	leaq	xpv_panicking, %rdi
2220	movl	(%rdi), %edi
2221	cmpl	$0, %edi
2222	jne	1f
2223	CLIRET(%rdi, %dl)	/* returns event mask in %dl */
2224	/*
2225	 * Synthesize the PS_IE bit from the event mask bit
2226	 */
2227	andq    $_BITNOT(PS_IE), %rax
2228	testb	$1, %dl
2229	jnz	1f
2230	orq	$PS_IE, %rax
22311:
2232#else
2233	CLI(%rdx)
2234#endif
2235	ret
2236	SET_SIZE(dtrace_interrupt_disable)
2237
2238#elif defined(__i386)
2239
2240	ENTRY(dtrace_interrupt_disable)
2241	pushfl
2242	popl	%eax
2243#if defined(__xpv)
2244	leal	xpv_panicking, %edx
2245	movl	(%edx), %edx
2246	cmpl	$0, %edx
2247	jne	1f
2248	CLIRET(%edx, %cl)	/* returns event mask in %cl */
2249	/*
2250	 * Synthesize the PS_IE bit from the event mask bit
2251	 */
2252	andl    $_BITNOT(PS_IE), %eax
2253	testb	$1, %cl
2254	jnz	1f
2255	orl	$PS_IE, %eax
22561:
2257#else
2258	CLI(%edx)
2259#endif
2260	ret
2261	SET_SIZE(dtrace_interrupt_disable)
2262
2263#endif	/* __i386 */
2264#endif	/* __lint */
2265
2266#if defined(__lint)
2267
2268/*ARGSUSED*/
2269void
2270dtrace_interrupt_enable(dtrace_icookie_t cookie)
2271{}
2272
2273#else	/* __lint */
2274
2275#if defined(__amd64)
2276
2277	ENTRY(dtrace_interrupt_enable)
2278	pushq	%rdi
2279	popfq
2280#if defined(__xpv)
2281	leaq	xpv_panicking, %rdx
2282	movl	(%rdx), %edx
2283	cmpl	$0, %edx
2284	jne	1f
2285	/*
2286	 * Since we're -really- running unprivileged, our attempt
2287	 * to change the state of the IF bit will be ignored. The
2288	 * virtual IF bit is tweaked by CLI and STI.
2289	 */
2290	IE_TO_EVENT_MASK(%rdx, %rdi)
2291#endif
2292	ret
2293	SET_SIZE(dtrace_interrupt_enable)
2294
2295#elif defined(__i386)
2296
2297	ENTRY(dtrace_interrupt_enable)
2298	movl	4(%esp), %eax
2299	pushl	%eax
2300	popfl
2301#if defined(__xpv)
2302	leal	xpv_panicking, %edx
2303	movl	(%edx), %edx
2304	cmpl	$0, %edx
2305	jne	1f
2306	/*
2307	 * Since we're -really- running unprivileged, our attempt
2308	 * to change the state of the IF bit will be ignored. The
2309	 * virtual IF bit is tweaked by CLI and STI.
2310	 */
2311	IE_TO_EVENT_MASK(%edx, %eax)
2312#endif
2313	ret
2314	SET_SIZE(dtrace_interrupt_enable)
2315
2316#endif	/* __i386 */
2317#endif	/* __lint */
2318
2319
2320#if defined(lint)
2321
2322void
2323dtrace_membar_producer(void)
2324{}
2325
2326void
2327dtrace_membar_consumer(void)
2328{}
2329
2330#else	/* __lint */
2331
2332	ENTRY(dtrace_membar_producer)
2333	rep;	ret	/* use 2 byte return instruction when branch target */
2334			/* AMD Software Optimization Guide - Section 6.2 */
2335	SET_SIZE(dtrace_membar_producer)
2336
2337	ENTRY(dtrace_membar_consumer)
2338	rep;	ret	/* use 2 byte return instruction when branch target */
2339			/* AMD Software Optimization Guide - Section 6.2 */
2340	SET_SIZE(dtrace_membar_consumer)
2341
2342#endif	/* __lint */
2343
2344#if defined(__lint)
2345
2346kthread_id_t
2347threadp(void)
2348{ return ((kthread_id_t)0); }
2349
2350#else	/* __lint */
2351
2352#if defined(__amd64)
2353
2354	ENTRY(threadp)
2355	movq	%gs:CPU_THREAD, %rax
2356	ret
2357	SET_SIZE(threadp)
2358
2359#elif defined(__i386)
2360
2361	ENTRY(threadp)
2362	movl	%gs:CPU_THREAD, %eax
2363	ret
2364	SET_SIZE(threadp)
2365
2366#endif	/* __i386 */
2367#endif	/* __lint */
2368
2369/*
2370 *   Checksum routine for Internet Protocol Headers
2371 */
2372
2373#if defined(__lint)
2374
2375/* ARGSUSED */
2376unsigned int
2377ip_ocsum(
2378	ushort_t *address,	/* ptr to 1st message buffer */
2379	int halfword_count,	/* length of data */
2380	unsigned int sum)	/* partial checksum */
2381{
2382	int		i;
2383	unsigned int	psum = 0;	/* partial sum */
2384
2385	for (i = 0; i < halfword_count; i++, address++) {
2386		psum += *address;
2387	}
2388
2389	while ((psum >> 16) != 0) {
2390		psum = (psum & 0xffff) + (psum >> 16);
2391	}
2392
2393	psum += sum;
2394
2395	while ((psum >> 16) != 0) {
2396		psum = (psum & 0xffff) + (psum >> 16);
2397	}
2398
2399	return (psum);
2400}
2401
2402#else	/* __lint */
2403
2404#if defined(__amd64)
2405
2406	ENTRY(ip_ocsum)
2407	pushq	%rbp
2408	movq	%rsp, %rbp
2409#ifdef DEBUG
2410	movq	postbootkernelbase(%rip), %rax
2411	cmpq	%rax, %rdi
2412	jnb	1f
2413	xorl	%eax, %eax
2414	movq	%rdi, %rsi
2415	leaq	.ip_ocsum_panic_msg(%rip), %rdi
2416	call	panic
2417	/*NOTREACHED*/
2418.ip_ocsum_panic_msg:
2419	.string	"ip_ocsum: address 0x%p below kernelbase\n"
24201:
2421#endif
2422	movl	%esi, %ecx	/* halfword_count */
2423	movq	%rdi, %rsi	/* address */
2424				/* partial sum in %edx */
2425	xorl	%eax, %eax
2426	testl	%ecx, %ecx
2427	jz	.ip_ocsum_done
2428	testq	$3, %rsi
2429	jnz	.ip_csum_notaligned
2430.ip_csum_aligned:	/* XX64 opportunities for 8-byte operations? */
2431.next_iter:
2432	/* XX64 opportunities for prefetch? */
2433	/* XX64 compute csum with 64 bit quantities? */
2434	subl	$32, %ecx
2435	jl	.less_than_32
2436
2437	addl	0(%rsi), %edx
2438.only60:
2439	adcl	4(%rsi), %eax
2440.only56:
2441	adcl	8(%rsi), %edx
2442.only52:
2443	adcl	12(%rsi), %eax
2444.only48:
2445	adcl	16(%rsi), %edx
2446.only44:
2447	adcl	20(%rsi), %eax
2448.only40:
2449	adcl	24(%rsi), %edx
2450.only36:
2451	adcl	28(%rsi), %eax
2452.only32:
2453	adcl	32(%rsi), %edx
2454.only28:
2455	adcl	36(%rsi), %eax
2456.only24:
2457	adcl	40(%rsi), %edx
2458.only20:
2459	adcl	44(%rsi), %eax
2460.only16:
2461	adcl	48(%rsi), %edx
2462.only12:
2463	adcl	52(%rsi), %eax
2464.only8:
2465	adcl	56(%rsi), %edx
2466.only4:
2467	adcl	60(%rsi), %eax	/* could be adding -1 and -1 with a carry */
2468.only0:
2469	adcl	$0, %eax	/* could be adding -1 in eax with a carry */
2470	adcl	$0, %eax
2471
2472	addq	$64, %rsi
2473	testl	%ecx, %ecx
2474	jnz	.next_iter
2475
2476.ip_ocsum_done:
2477	addl	%eax, %edx
2478	adcl	$0, %edx
2479	movl	%edx, %eax	/* form a 16 bit checksum by */
2480	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2481	addw	%dx, %ax
2482	adcw	$0, %ax
2483	andl	$0xffff, %eax
2484	leave
2485	ret
2486
2487.ip_csum_notaligned:
2488	xorl	%edi, %edi
2489	movw	(%rsi), %di
2490	addl	%edi, %edx
2491	adcl	$0, %edx
2492	addq	$2, %rsi
2493	decl	%ecx
2494	jmp	.ip_csum_aligned
2495
2496.less_than_32:
2497	addl	$32, %ecx
2498	testl	$1, %ecx
2499	jz	.size_aligned
2500	andl	$0xfe, %ecx
2501	movzwl	(%rsi, %rcx, 2), %edi
2502	addl	%edi, %edx
2503	adcl	$0, %edx
2504.size_aligned:
2505	movl	%ecx, %edi
2506	shrl	$1, %ecx
2507	shl	$1, %edi
2508	subq	$64, %rdi
2509	addq	%rdi, %rsi
2510	leaq    .ip_ocsum_jmptbl(%rip), %rdi
2511	leaq	(%rdi, %rcx, 8), %rdi
2512	xorl	%ecx, %ecx
2513	clc
2514	jmp 	*(%rdi)
2515
2516	.align	8
2517.ip_ocsum_jmptbl:
2518	.quad	.only0, .only4, .only8, .only12, .only16, .only20
2519	.quad	.only24, .only28, .only32, .only36, .only40, .only44
2520	.quad	.only48, .only52, .only56, .only60
2521	SET_SIZE(ip_ocsum)
2522
2523#elif defined(__i386)
2524
2525	ENTRY(ip_ocsum)
2526	pushl	%ebp
2527	movl	%esp, %ebp
2528	pushl	%ebx
2529	pushl	%esi
2530	pushl	%edi
2531	movl	12(%ebp), %ecx	/* count of half words */
2532	movl	16(%ebp), %edx	/* partial checksum */
2533	movl	8(%ebp), %esi
2534	xorl	%eax, %eax
2535	testl	%ecx, %ecx
2536	jz	.ip_ocsum_done
2537
2538	testl	$3, %esi
2539	jnz	.ip_csum_notaligned
2540.ip_csum_aligned:
2541.next_iter:
2542	subl	$32, %ecx
2543	jl	.less_than_32
2544
2545	addl	0(%esi), %edx
2546.only60:
2547	adcl	4(%esi), %eax
2548.only56:
2549	adcl	8(%esi), %edx
2550.only52:
2551	adcl	12(%esi), %eax
2552.only48:
2553	adcl	16(%esi), %edx
2554.only44:
2555	adcl	20(%esi), %eax
2556.only40:
2557	adcl	24(%esi), %edx
2558.only36:
2559	adcl	28(%esi), %eax
2560.only32:
2561	adcl	32(%esi), %edx
2562.only28:
2563	adcl	36(%esi), %eax
2564.only24:
2565	adcl	40(%esi), %edx
2566.only20:
2567	adcl	44(%esi), %eax
2568.only16:
2569	adcl	48(%esi), %edx
2570.only12:
2571	adcl	52(%esi), %eax
2572.only8:
2573	adcl	56(%esi), %edx
2574.only4:
2575	adcl	60(%esi), %eax	/* We could be adding -1 and -1 with a carry */
2576.only0:
2577	adcl	$0, %eax	/* we could be adding -1 in eax with a carry */
2578	adcl	$0, %eax
2579
2580	addl	$64, %esi
2581	andl	%ecx, %ecx
2582	jnz	.next_iter
2583
2584.ip_ocsum_done:
2585	addl	%eax, %edx
2586	adcl	$0, %edx
2587	movl	%edx, %eax	/* form a 16 bit checksum by */
2588	shrl	$16, %eax	/* adding two halves of 32 bit checksum */
2589	addw	%dx, %ax
2590	adcw	$0, %ax
2591	andl	$0xffff, %eax
2592	popl	%edi		/* restore registers */
2593	popl	%esi
2594	popl	%ebx
2595	leave
2596	ret
2597
2598.ip_csum_notaligned:
2599	xorl	%edi, %edi
2600	movw	(%esi), %di
2601	addl	%edi, %edx
2602	adcl	$0, %edx
2603	addl	$2, %esi
2604	decl	%ecx
2605	jmp	.ip_csum_aligned
2606
2607.less_than_32:
2608	addl	$32, %ecx
2609	testl	$1, %ecx
2610	jz	.size_aligned
2611	andl	$0xfe, %ecx
2612	movzwl	(%esi, %ecx, 2), %edi
2613	addl	%edi, %edx
2614	adcl	$0, %edx
2615.size_aligned:
2616	movl	%ecx, %edi
2617	shrl	$1, %ecx
2618	shl	$1, %edi
2619	subl	$64, %edi
2620	addl	%edi, %esi
2621	movl	$.ip_ocsum_jmptbl, %edi
2622	lea	(%edi, %ecx, 4), %edi
2623	xorl	%ecx, %ecx
2624	clc
2625	jmp 	*(%edi)
2626	SET_SIZE(ip_ocsum)
2627
2628	.data
2629	.align	4
2630
2631.ip_ocsum_jmptbl:
2632	.long	.only0, .only4, .only8, .only12, .only16, .only20
2633	.long	.only24, .only28, .only32, .only36, .only40, .only44
2634	.long	.only48, .only52, .only56, .only60
2635
2636
2637#endif	/* __i386 */
2638#endif	/* __lint */
2639
2640/*
2641 * multiply two long numbers and yield a u_longlong_t result, callable from C.
2642 * Provided to manipulate hrtime_t values.
2643 */
2644#if defined(__lint)
2645
2646/* result = a * b; */
2647
2648/* ARGSUSED */
2649unsigned long long
2650mul32(uint_t a, uint_t b)
2651{ return (0); }
2652
2653#else	/* __lint */
2654
2655#if defined(__amd64)
2656
2657	ENTRY(mul32)
2658	xorl	%edx, %edx	/* XX64 joe, paranoia? */
2659	movl	%edi, %eax
2660	mull	%esi
2661	shlq	$32, %rdx
2662	orq	%rdx, %rax
2663	ret
2664	SET_SIZE(mul32)
2665
2666#elif defined(__i386)
2667
2668	ENTRY(mul32)
2669	movl	8(%esp), %eax
2670	movl	4(%esp), %ecx
2671	mull	%ecx
2672	ret
2673	SET_SIZE(mul32)
2674
2675#endif	/* __i386 */
2676#endif	/* __lint */
2677
2678#if defined(notused)
2679#if defined(__lint)
2680/* ARGSUSED */
2681void
2682load_pte64(uint64_t *pte, uint64_t pte_value)
2683{}
2684#else	/* __lint */
2685	.globl load_pte64
2686load_pte64:
2687	movl	4(%esp), %eax
2688	movl	8(%esp), %ecx
2689	movl	12(%esp), %edx
2690	movl	%edx, 4(%eax)
2691	movl	%ecx, (%eax)
2692	ret
2693#endif	/* __lint */
2694#endif	/* notused */
2695
2696#if defined(__lint)
2697
2698/*ARGSUSED*/
2699void
2700scan_memory(caddr_t addr, size_t size)
2701{}
2702
2703#else	/* __lint */
2704
2705#if defined(__amd64)
2706
2707	ENTRY(scan_memory)
2708	shrq	$3, %rsi	/* convert %rsi from byte to quadword count */
2709	jz	.scanm_done
2710	movq	%rsi, %rcx	/* move count into rep control register */
2711	movq	%rdi, %rsi	/* move addr into lodsq control reg. */
2712	rep lodsq		/* scan the memory range */
2713.scanm_done:
2714	rep;	ret	/* use 2 byte return instruction when branch target */
2715			/* AMD Software Optimization Guide - Section 6.2 */
2716	SET_SIZE(scan_memory)
2717
2718#elif defined(__i386)
2719
2720	ENTRY(scan_memory)
2721	pushl	%ecx
2722	pushl	%esi
2723	movl	16(%esp), %ecx	/* move 2nd arg into rep control register */
2724	shrl	$2, %ecx	/* convert from byte count to word count */
2725	jz	.scanm_done
2726	movl	12(%esp), %esi	/* move 1st arg into lodsw control register */
2727	.byte	0xf3		/* rep prefix.  lame assembler.  sigh. */
2728	lodsl
2729.scanm_done:
2730	popl	%esi
2731	popl	%ecx
2732	ret
2733	SET_SIZE(scan_memory)
2734
2735#endif	/* __i386 */
2736#endif	/* __lint */
2737
2738
2739#if defined(__lint)
2740
2741/*ARGSUSED */
2742int
2743lowbit(ulong_t i)
2744{ return (0); }
2745
2746#else	/* __lint */
2747
2748#if defined(__amd64)
2749
2750	ENTRY(lowbit)
2751	movl	$-1, %eax
2752	bsfq	%rdi, %rax
2753	incl	%eax
2754	ret
2755	SET_SIZE(lowbit)
2756
2757#elif defined(__i386)
2758
2759	ENTRY(lowbit)
2760	movl	$-1, %eax
2761	bsfl	4(%esp), %eax
2762	incl	%eax
2763	ret
2764	SET_SIZE(lowbit)
2765
2766#endif	/* __i386 */
2767#endif	/* __lint */
2768
2769#if defined(__lint)
2770
2771/*ARGSUSED*/
2772int
2773highbit(ulong_t i)
2774{ return (0); }
2775
2776#else	/* __lint */
2777
2778#if defined(__amd64)
2779
2780	ENTRY(highbit)
2781	movl	$-1, %eax
2782	bsrq	%rdi, %rax
2783	incl	%eax
2784	ret
2785	SET_SIZE(highbit)
2786
2787#elif defined(__i386)
2788
2789	ENTRY(highbit)
2790	movl	$-1, %eax
2791	bsrl	4(%esp), %eax
2792	incl	%eax
2793	ret
2794	SET_SIZE(highbit)
2795
2796#endif	/* __i386 */
2797#endif	/* __lint */
2798
2799#if defined(__lint)
2800
2801/*ARGSUSED*/
2802uint64_t
2803rdmsr(uint_t r)
2804{ return (0); }
2805
2806/*ARGSUSED*/
2807void
2808wrmsr(uint_t r, const uint64_t val)
2809{}
2810
2811/*ARGSUSED*/
2812uint64_t
2813xrdmsr(uint_t r)
2814{ return (0); }
2815
2816/*ARGSUSED*/
2817void
2818xwrmsr(uint_t r, const uint64_t val)
2819{}
2820
2821void
2822invalidate_cache(void)
2823{}
2824
2825#else  /* __lint */
2826
2827#define	XMSR_ACCESS_VAL		$0x9c5a203a
2828
2829#if defined(__amd64)
2830
2831	ENTRY(rdmsr)
2832	movl	%edi, %ecx
2833	rdmsr
2834	shlq	$32, %rdx
2835	orq	%rdx, %rax
2836	ret
2837	SET_SIZE(rdmsr)
2838
2839	ENTRY(wrmsr)
2840	movq	%rsi, %rdx
2841	shrq	$32, %rdx
2842	movl	%esi, %eax
2843	movl	%edi, %ecx
2844	wrmsr
2845	ret
2846	SET_SIZE(wrmsr)
2847
2848	ENTRY(xrdmsr)
2849	pushq	%rbp
2850	movq	%rsp, %rbp
2851	movl	%edi, %ecx
2852	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2853	rdmsr
2854	shlq	$32, %rdx
2855	orq	%rdx, %rax
2856	leave
2857	ret
2858	SET_SIZE(xrdmsr)
2859
2860	ENTRY(xwrmsr)
2861	pushq	%rbp
2862	movq	%rsp, %rbp
2863	movl	%edi, %ecx
2864	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2865	movq	%rsi, %rdx
2866	shrq	$32, %rdx
2867	movl	%esi, %eax
2868	wrmsr
2869	leave
2870	ret
2871	SET_SIZE(xwrmsr)
2872
2873#elif defined(__i386)
2874
2875	ENTRY(rdmsr)
2876	movl	4(%esp), %ecx
2877	rdmsr
2878	ret
2879	SET_SIZE(rdmsr)
2880
2881	ENTRY(wrmsr)
2882	movl	4(%esp), %ecx
2883	movl	8(%esp), %eax
2884	movl	12(%esp), %edx
2885	wrmsr
2886	ret
2887	SET_SIZE(wrmsr)
2888
2889	ENTRY(xrdmsr)
2890	pushl	%ebp
2891	movl	%esp, %ebp
2892	movl	8(%esp), %ecx
2893	pushl	%edi
2894	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2895	rdmsr
2896	popl	%edi
2897	leave
2898	ret
2899	SET_SIZE(xrdmsr)
2900
2901	ENTRY(xwrmsr)
2902	pushl	%ebp
2903	movl	%esp, %ebp
2904	movl	8(%esp), %ecx
2905	movl	12(%esp), %eax
2906	movl	16(%esp), %edx
2907	pushl	%edi
2908	movl	XMSR_ACCESS_VAL, %edi	/* this value is needed to access MSR */
2909	wrmsr
2910	popl	%edi
2911	leave
2912	ret
2913	SET_SIZE(xwrmsr)
2914
2915#endif	/* __i386 */
2916
2917	ENTRY(invalidate_cache)
2918	wbinvd
2919	ret
2920	SET_SIZE(invalidate_cache)
2921
2922#endif	/* __lint */
2923
2924#if defined(__lint)
2925
2926/*ARGSUSED*/
2927void
2928getcregs(struct cregs *crp)
2929{}
2930
2931#else	/* __lint */
2932
2933#if defined(__amd64)
2934
2935	ENTRY_NP(getcregs)
2936#if defined(__xpv)
2937	/*
2938	 * Only a few of the hardware control registers or descriptor tables
2939	 * are directly accessible to us, so just zero the structure.
2940	 *
2941	 * XXPV	Perhaps it would be helpful for the hypervisor to return
2942	 *	virtualized versions of these for post-mortem use.
2943	 *	(Need to reevaluate - perhaps it already does!)
2944	 */
2945	pushq	%rdi		/* save *crp */
2946	movq	$CREGSZ, %rsi
2947	call	bzero
2948	popq	%rdi
2949
2950	/*
2951	 * Dump what limited information we can
2952	 */
2953	movq	%cr0, %rax
2954	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2955	movq	%cr2, %rax
2956	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2957	movq	%cr3, %rax
2958	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2959	movq	%cr4, %rax
2960	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2961
2962#else	/* __xpv */
2963
2964#define	GETMSR(r, off, d)	\
2965	movl	$r, %ecx;	\
2966	rdmsr;			\
2967	movl	%eax, off(d);	\
2968	movl	%edx, off+4(d)
2969
2970	xorl	%eax, %eax
2971	movq	%rax, CREG_GDT+8(%rdi)
2972	sgdt	CREG_GDT(%rdi)		/* 10 bytes */
2973	movq	%rax, CREG_IDT+8(%rdi)
2974	sidt	CREG_IDT(%rdi)		/* 10 bytes */
2975	movq	%rax, CREG_LDT(%rdi)
2976	sldt	CREG_LDT(%rdi)		/* 2 bytes */
2977	movq	%rax, CREG_TASKR(%rdi)
2978	str	CREG_TASKR(%rdi)	/* 2 bytes */
2979	movq	%cr0, %rax
2980	movq	%rax, CREG_CR0(%rdi)	/* cr0 */
2981	movq	%cr2, %rax
2982	movq	%rax, CREG_CR2(%rdi)	/* cr2 */
2983	movq	%cr3, %rax
2984	movq	%rax, CREG_CR3(%rdi)	/* cr3 */
2985	movq	%cr4, %rax
2986	movq	%rax, CREG_CR4(%rdi)	/* cr4 */
2987	movq	%cr8, %rax
2988	movq	%rax, CREG_CR8(%rdi)	/* cr8 */
2989	GETMSR(MSR_AMD_KGSBASE, CREG_KGSBASE, %rdi)
2990	GETMSR(MSR_AMD_EFER, CREG_EFER, %rdi)
2991#endif	/* __xpv */
2992	ret
2993	SET_SIZE(getcregs)
2994
2995#undef GETMSR
2996
2997#elif defined(__i386)
2998
2999	ENTRY_NP(getcregs)
3000#if defined(__xpv)
3001	/*
3002	 * Only a few of the hardware control registers or descriptor tables
3003	 * are directly accessible to us, so just zero the structure.
3004	 *
3005	 * XXPV	Perhaps it would be helpful for the hypervisor to return
3006	 *	virtualized versions of these for post-mortem use.
3007	 *	(Need to reevaluate - perhaps it already does!)
3008	 */
3009	movl	4(%esp), %edx
3010	pushl	$CREGSZ
3011	pushl	%edx
3012	call	bzero
3013	addl	$8, %esp
3014	movl	4(%esp), %edx
3015
3016	/*
3017	 * Dump what limited information we can
3018	 */
3019	movl	%cr0, %eax
3020	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3021	movl	%cr2, %eax
3022	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3023	movl	%cr3, %eax
3024	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3025	movl	%cr4, %eax
3026	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3027
3028#else	/* __xpv */
3029
3030	movl	4(%esp), %edx
3031	movw	$0, CREG_GDT+6(%edx)
3032	movw	$0, CREG_IDT+6(%edx)
3033	sgdt	CREG_GDT(%edx)		/* gdt */
3034	sidt	CREG_IDT(%edx)		/* idt */
3035	sldt	CREG_LDT(%edx)		/* ldt */
3036	str	CREG_TASKR(%edx)	/* task */
3037	movl	%cr0, %eax
3038	movl	%eax, CREG_CR0(%edx)	/* cr0 */
3039	movl	%cr2, %eax
3040	movl	%eax, CREG_CR2(%edx)	/* cr2 */
3041	movl	%cr3, %eax
3042	movl	%eax, CREG_CR3(%edx)	/* cr3 */
3043	testl	$X86_LARGEPAGE, x86_feature
3044	jz	.nocr4
3045	movl	%cr4, %eax
3046	movl	%eax, CREG_CR4(%edx)	/* cr4 */
3047	jmp	.skip
3048.nocr4:
3049	movl	$0, CREG_CR4(%edx)
3050.skip:
3051#endif
3052	ret
3053	SET_SIZE(getcregs)
3054
3055#endif	/* __i386 */
3056#endif	/* __lint */
3057
3058
3059/*
3060 * A panic trigger is a word which is updated atomically and can only be set
3061 * once.  We atomically store 0xDEFACEDD and load the old value.  If the
3062 * previous value was 0, we succeed and return 1; otherwise return 0.
3063 * This allows a partially corrupt trigger to still trigger correctly.  DTrace
3064 * has its own version of this function to allow it to panic correctly from
3065 * probe context.
3066 */
3067#if defined(__lint)
3068
3069/*ARGSUSED*/
3070int
3071panic_trigger(int *tp)
3072{ return (0); }
3073
3074/*ARGSUSED*/
3075int
3076dtrace_panic_trigger(int *tp)
3077{ return (0); }
3078
3079#else	/* __lint */
3080
3081#if defined(__amd64)
3082
3083	ENTRY_NP(panic_trigger)
3084	xorl	%eax, %eax
3085	movl	$0xdefacedd, %edx
3086	lock
3087	  xchgl	%edx, (%rdi)
3088	cmpl	$0, %edx
3089	je	0f
3090	movl	$0, %eax
3091	ret
30920:	movl	$1, %eax
3093	ret
3094	SET_SIZE(panic_trigger)
3095
3096	ENTRY_NP(dtrace_panic_trigger)
3097	xorl	%eax, %eax
3098	movl	$0xdefacedd, %edx
3099	lock
3100	  xchgl	%edx, (%rdi)
3101	cmpl	$0, %edx
3102	je	0f
3103	movl	$0, %eax
3104	ret
31050:	movl	$1, %eax
3106	ret
3107	SET_SIZE(dtrace_panic_trigger)
3108
3109#elif defined(__i386)
3110
3111	ENTRY_NP(panic_trigger)
3112	movl	4(%esp), %edx		/ %edx = address of trigger
3113	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3114	lock				/ assert lock
3115	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3116	cmpl	$0, %eax		/ if (%eax == 0x0)
3117	je	0f			/   return (1);
3118	movl	$0, %eax		/ else
3119	ret				/   return (0);
31200:	movl	$1, %eax
3121	ret
3122	SET_SIZE(panic_trigger)
3123
3124	ENTRY_NP(dtrace_panic_trigger)
3125	movl	4(%esp), %edx		/ %edx = address of trigger
3126	movl	$0xdefacedd, %eax	/ %eax = 0xdefacedd
3127	lock				/ assert lock
3128	xchgl %eax, (%edx)		/ exchange %eax and the trigger
3129	cmpl	$0, %eax		/ if (%eax == 0x0)
3130	je	0f			/   return (1);
3131	movl	$0, %eax		/ else
3132	ret				/   return (0);
31330:	movl	$1, %eax
3134	ret
3135	SET_SIZE(dtrace_panic_trigger)
3136
3137#endif	/* __i386 */
3138#endif	/* __lint */
3139
3140/*
3141 * The panic() and cmn_err() functions invoke vpanic() as a common entry point
3142 * into the panic code implemented in panicsys().  vpanic() is responsible
3143 * for passing through the format string and arguments, and constructing a
3144 * regs structure on the stack into which it saves the current register
3145 * values.  If we are not dying due to a fatal trap, these registers will
3146 * then be preserved in panicbuf as the current processor state.  Before
3147 * invoking panicsys(), vpanic() activates the first panic trigger (see
3148 * common/os/panic.c) and switches to the panic_stack if successful.  Note that
3149 * DTrace takes a slightly different panic path if it must panic from probe
3150 * context.  Instead of calling panic, it calls into dtrace_vpanic(), which
3151 * sets up the initial stack as vpanic does, calls dtrace_panic_trigger(), and
3152 * branches back into vpanic().
3153 */
3154#if defined(__lint)
3155
3156/*ARGSUSED*/
3157void
3158vpanic(const char *format, va_list alist)
3159{}
3160
3161/*ARGSUSED*/
3162void
3163dtrace_vpanic(const char *format, va_list alist)
3164{}
3165
3166#else	/* __lint */
3167
3168#if defined(__amd64)
3169
3170	ENTRY_NP(vpanic)			/* Initial stack layout: */
3171
3172	pushq	%rbp				/* | %rip | 	0x60	*/
3173	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3174	pushfq					/* | rfl  |	0x50	*/
3175	pushq	%r11				/* | %r11 |	0x48	*/
3176	pushq	%r10				/* | %r10 |	0x40	*/
3177	pushq	%rbx				/* | %rbx |	0x38	*/
3178	pushq	%rax				/* | %rax |	0x30	*/
3179	pushq	%r9				/* | %r9  |	0x28	*/
3180	pushq	%r8				/* | %r8  |	0x20	*/
3181	pushq	%rcx				/* | %rcx |	0x18	*/
3182	pushq	%rdx				/* | %rdx |	0x10	*/
3183	pushq	%rsi				/* | %rsi |	0x8 alist */
3184	pushq	%rdi				/* | %rdi |	0x0 format */
3185
3186	movq	%rsp, %rbx			/* %rbx = current %rsp */
3187
3188	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3189	call	panic_trigger			/* %eax = panic_trigger() */
3190
3191vpanic_common:
3192	/*
3193	 * The panic_trigger result is in %eax from the call above, and
3194	 * dtrace_panic places it in %eax before branching here.
3195	 * The rdmsr instructions that follow below will clobber %eax so
3196	 * we stash the panic_trigger result in %r11d.
3197	 */
3198	movl	%eax, %r11d
3199	cmpl	$0, %r11d
3200	je	0f
3201
3202	/*
3203	 * If panic_trigger() was successful, we are the first to initiate a
3204	 * panic: we now switch to the reserved panic_stack before continuing.
3205	 */
3206	leaq	panic_stack(%rip), %rsp
3207	addq	$PANICSTKSIZE, %rsp
32080:	subq	$REGSIZE, %rsp
3209	/*
3210	 * Now that we've got everything set up, store the register values as
3211	 * they were when we entered vpanic() to the designated location in
3212	 * the regs structure we allocated on the stack.
3213	 */
3214	movq	0x0(%rbx), %rcx
3215	movq	%rcx, REGOFF_RDI(%rsp)
3216	movq	0x8(%rbx), %rcx
3217	movq	%rcx, REGOFF_RSI(%rsp)
3218	movq	0x10(%rbx), %rcx
3219	movq	%rcx, REGOFF_RDX(%rsp)
3220	movq	0x18(%rbx), %rcx
3221	movq	%rcx, REGOFF_RCX(%rsp)
3222	movq	0x20(%rbx), %rcx
3223
3224	movq	%rcx, REGOFF_R8(%rsp)
3225	movq	0x28(%rbx), %rcx
3226	movq	%rcx, REGOFF_R9(%rsp)
3227	movq	0x30(%rbx), %rcx
3228	movq	%rcx, REGOFF_RAX(%rsp)
3229	movq	0x38(%rbx), %rcx
3230	movq	%rcx, REGOFF_RBX(%rsp)
3231	movq	0x58(%rbx), %rcx
3232
3233	movq	%rcx, REGOFF_RBP(%rsp)
3234	movq	0x40(%rbx), %rcx
3235	movq	%rcx, REGOFF_R10(%rsp)
3236	movq	0x48(%rbx), %rcx
3237	movq	%rcx, REGOFF_R11(%rsp)
3238	movq	%r12, REGOFF_R12(%rsp)
3239
3240	movq	%r13, REGOFF_R13(%rsp)
3241	movq	%r14, REGOFF_R14(%rsp)
3242	movq	%r15, REGOFF_R15(%rsp)
3243
3244	xorl	%ecx, %ecx
3245	movw	%ds, %cx
3246	movq	%rcx, REGOFF_DS(%rsp)
3247	movw	%es, %cx
3248	movq	%rcx, REGOFF_ES(%rsp)
3249	movw	%fs, %cx
3250	movq	%rcx, REGOFF_FS(%rsp)
3251	movw	%gs, %cx
3252	movq	%rcx, REGOFF_GS(%rsp)
3253
3254	movq	$0, REGOFF_TRAPNO(%rsp)
3255
3256	movq	$0, REGOFF_ERR(%rsp)
3257	leaq	vpanic(%rip), %rcx
3258	movq	%rcx, REGOFF_RIP(%rsp)
3259	movw	%cs, %cx
3260	movzwq	%cx, %rcx
3261	movq	%rcx, REGOFF_CS(%rsp)
3262	movq	0x50(%rbx), %rcx
3263	movq	%rcx, REGOFF_RFL(%rsp)
3264	movq	%rbx, %rcx
3265	addq	$0x60, %rcx
3266	movq	%rcx, REGOFF_RSP(%rsp)
3267	movw	%ss, %cx
3268	movzwq	%cx, %rcx
3269	movq	%rcx, REGOFF_SS(%rsp)
3270
3271	/*
3272	 * panicsys(format, alist, rp, on_panic_stack)
3273	 */
3274	movq	REGOFF_RDI(%rsp), %rdi		/* format */
3275	movq	REGOFF_RSI(%rsp), %rsi		/* alist */
3276	movq	%rsp, %rdx			/* struct regs */
3277	movl	%r11d, %ecx			/* on_panic_stack */
3278	call	panicsys
3279	addq	$REGSIZE, %rsp
3280	popq	%rdi
3281	popq	%rsi
3282	popq	%rdx
3283	popq	%rcx
3284	popq	%r8
3285	popq	%r9
3286	popq	%rax
3287	popq	%rbx
3288	popq	%r10
3289	popq	%r11
3290	popfq
3291	leave
3292	ret
3293	SET_SIZE(vpanic)
3294
3295	ENTRY_NP(dtrace_vpanic)			/* Initial stack layout: */
3296
3297	pushq	%rbp				/* | %rip | 	0x60	*/
3298	movq	%rsp, %rbp			/* | %rbp |	0x58	*/
3299	pushfq					/* | rfl  |	0x50	*/
3300	pushq	%r11				/* | %r11 |	0x48	*/
3301	pushq	%r10				/* | %r10 |	0x40	*/
3302	pushq	%rbx				/* | %rbx |	0x38	*/
3303	pushq	%rax				/* | %rax |	0x30	*/
3304	pushq	%r9				/* | %r9  |	0x28	*/
3305	pushq	%r8				/* | %r8  |	0x20	*/
3306	pushq	%rcx				/* | %rcx |	0x18	*/
3307	pushq	%rdx				/* | %rdx |	0x10	*/
3308	pushq	%rsi				/* | %rsi |	0x8 alist */
3309	pushq	%rdi				/* | %rdi |	0x0 format */
3310
3311	movq	%rsp, %rbx			/* %rbx = current %rsp */
3312
3313	leaq	panic_quiesce(%rip), %rdi	/* %rdi = &panic_quiesce */
3314	call	dtrace_panic_trigger	/* %eax = dtrace_panic_trigger() */
3315	jmp	vpanic_common
3316
3317	SET_SIZE(dtrace_vpanic)
3318
3319#elif defined(__i386)
3320
3321	ENTRY_NP(vpanic)			/ Initial stack layout:
3322
3323	pushl	%ebp				/ | %eip | 20
3324	movl	%esp, %ebp			/ | %ebp | 16
3325	pushl	%eax				/ | %eax | 12
3326	pushl	%ebx				/ | %ebx |  8
3327	pushl	%ecx				/ | %ecx |  4
3328	pushl	%edx				/ | %edx |  0
3329
3330	movl	%esp, %ebx			/ %ebx = current stack pointer
3331
3332	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3333	pushl	%eax				/ push &panic_quiesce
3334	call	panic_trigger			/ %eax = panic_trigger()
3335	addl	$4, %esp			/ reset stack pointer
3336
3337vpanic_common:
3338	cmpl	$0, %eax			/ if (%eax == 0)
3339	je	0f				/   goto 0f;
3340
3341	/*
3342	 * If panic_trigger() was successful, we are the first to initiate a
3343	 * panic: we now switch to the reserved panic_stack before continuing.
3344	 */
3345	lea	panic_stack, %esp		/ %esp  = panic_stack
3346	addl	$PANICSTKSIZE, %esp		/ %esp += PANICSTKSIZE
3347
33480:	subl	$REGSIZE, %esp			/ allocate struct regs
3349
3350	/*
3351	 * Now that we've got everything set up, store the register values as
3352	 * they were when we entered vpanic() to the designated location in
3353	 * the regs structure we allocated on the stack.
3354	 */
3355#if !defined(__GNUC_AS__)
3356	movw	%gs, %edx
3357	movl	%edx, REGOFF_GS(%esp)
3358	movw	%fs, %edx
3359	movl	%edx, REGOFF_FS(%esp)
3360	movw	%es, %edx
3361	movl	%edx, REGOFF_ES(%esp)
3362	movw	%ds, %edx
3363	movl	%edx, REGOFF_DS(%esp)
3364#else	/* __GNUC_AS__ */
3365	mov	%gs, %edx
3366	mov	%edx, REGOFF_GS(%esp)
3367	mov	%fs, %edx
3368	mov	%edx, REGOFF_FS(%esp)
3369	mov	%es, %edx
3370	mov	%edx, REGOFF_ES(%esp)
3371	mov	%ds, %edx
3372	mov	%edx, REGOFF_DS(%esp)
3373#endif	/* __GNUC_AS__ */
3374	movl	%edi, REGOFF_EDI(%esp)
3375	movl	%esi, REGOFF_ESI(%esp)
3376	movl	16(%ebx), %ecx
3377	movl	%ecx, REGOFF_EBP(%esp)
3378	movl	%ebx, %ecx
3379	addl	$20, %ecx
3380	movl	%ecx, REGOFF_ESP(%esp)
3381	movl	8(%ebx), %ecx
3382	movl	%ecx, REGOFF_EBX(%esp)
3383	movl	0(%ebx), %ecx
3384	movl	%ecx, REGOFF_EDX(%esp)
3385	movl	4(%ebx), %ecx
3386	movl	%ecx, REGOFF_ECX(%esp)
3387	movl	12(%ebx), %ecx
3388	movl	%ecx, REGOFF_EAX(%esp)
3389	movl	$0, REGOFF_TRAPNO(%esp)
3390	movl	$0, REGOFF_ERR(%esp)
3391	lea	vpanic, %ecx
3392	movl	%ecx, REGOFF_EIP(%esp)
3393#if !defined(__GNUC_AS__)
3394	movw	%cs, %edx
3395#else	/* __GNUC_AS__ */
3396	mov	%cs, %edx
3397#endif	/* __GNUC_AS__ */
3398	movl	%edx, REGOFF_CS(%esp)
3399	pushfl
3400	popl	%ecx
3401#if defined(__xpv)
3402	/*
3403	 * Synthesize the PS_IE bit from the event mask bit
3404	 */
3405	CURTHREAD(%edx)
3406	KPREEMPT_DISABLE(%edx)
3407	EVENT_MASK_TO_IE(%edx, %ecx)
3408	CURTHREAD(%edx)
3409	KPREEMPT_ENABLE_NOKP(%edx)
3410#endif
3411	movl	%ecx, REGOFF_EFL(%esp)
3412	movl	$0, REGOFF_UESP(%esp)
3413#if !defined(__GNUC_AS__)
3414	movw	%ss, %edx
3415#else	/* __GNUC_AS__ */
3416	mov	%ss, %edx
3417#endif	/* __GNUC_AS__ */
3418	movl	%edx, REGOFF_SS(%esp)
3419
3420	movl	%esp, %ecx			/ %ecx = &regs
3421	pushl	%eax				/ push on_panic_stack
3422	pushl	%ecx				/ push &regs
3423	movl	12(%ebp), %ecx			/ %ecx = alist
3424	pushl	%ecx				/ push alist
3425	movl	8(%ebp), %ecx			/ %ecx = format
3426	pushl	%ecx				/ push format
3427	call	panicsys			/ panicsys();
3428	addl	$16, %esp			/ pop arguments
3429
3430	addl	$REGSIZE, %esp
3431	popl	%edx
3432	popl	%ecx
3433	popl	%ebx
3434	popl	%eax
3435	leave
3436	ret
3437	SET_SIZE(vpanic)
3438
3439	ENTRY_NP(dtrace_vpanic)			/ Initial stack layout:
3440
3441	pushl	%ebp				/ | %eip | 20
3442	movl	%esp, %ebp			/ | %ebp | 16
3443	pushl	%eax				/ | %eax | 12
3444	pushl	%ebx				/ | %ebx |  8
3445	pushl	%ecx				/ | %ecx |  4
3446	pushl	%edx				/ | %edx |  0
3447
3448	movl	%esp, %ebx			/ %ebx = current stack pointer
3449
3450	lea	panic_quiesce, %eax		/ %eax = &panic_quiesce
3451	pushl	%eax				/ push &panic_quiesce
3452	call	dtrace_panic_trigger		/ %eax = dtrace_panic_trigger()
3453	addl	$4, %esp			/ reset stack pointer
3454	jmp	vpanic_common			/ jump back to common code
3455
3456	SET_SIZE(dtrace_vpanic)
3457
3458#endif	/* __i386 */
3459#endif	/* __lint */
3460
3461#if defined(__lint)
3462
3463void
3464hres_tick(void)
3465{}
3466
3467int64_t timedelta;
3468hrtime_t hres_last_tick;
3469volatile timestruc_t hrestime;
3470int64_t hrestime_adj;
3471volatile int hres_lock;
3472hrtime_t hrtime_base;
3473
3474#else	/* __lint */
3475
3476	DGDEF3(hrestime, _MUL(2, CLONGSIZE), 8)
3477	.NWORD	0, 0
3478
3479	DGDEF3(hrestime_adj, 8, 8)
3480	.long	0, 0
3481
3482	DGDEF3(hres_last_tick, 8, 8)
3483	.long	0, 0
3484
3485	DGDEF3(timedelta, 8, 8)
3486	.long	0, 0
3487
3488	DGDEF3(hres_lock, 4, 8)
3489	.long	0
3490
3491	/*
3492	 * initialized to a non zero value to make pc_gethrtime()
3493	 * work correctly even before clock is initialized
3494	 */
3495	DGDEF3(hrtime_base, 8, 8)
3496	.long	_MUL(NSEC_PER_CLOCK_TICK, 6), 0
3497
3498	DGDEF3(adj_shift, 4, 4)
3499	.long	ADJ_SHIFT
3500
3501#if defined(__amd64)
3502
3503	ENTRY_NP(hres_tick)
3504	pushq	%rbp
3505	movq	%rsp, %rbp
3506
3507	/*
3508	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3509	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3510	 * At worst, performing this now instead of under CLOCK_LOCK may
3511	 * introduce some jitter in pc_gethrestime().
3512	 */
3513	call	*gethrtimef(%rip)
3514	movq	%rax, %r8
3515
3516	leaq	hres_lock(%rip), %rax
3517	movb	$-1, %dl
3518.CL1:
3519	xchgb	%dl, (%rax)
3520	testb	%dl, %dl
3521	jz	.CL3			/* got it */
3522.CL2:
3523	cmpb	$0, (%rax)		/* possible to get lock? */
3524	pause
3525	jne	.CL2
3526	jmp	.CL1			/* yes, try again */
3527.CL3:
3528	/*
3529	 * compute the interval since last time hres_tick was called
3530	 * and adjust hrtime_base and hrestime accordingly
3531	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3532	 * a timestruc_t (sec, nsec)
3533	 */
3534	leaq	hres_last_tick(%rip), %rax
3535	movq	%r8, %r11
3536	subq	(%rax), %r8
3537	addq	%r8, hrtime_base(%rip)	/* add interval to hrtime_base */
3538	addq	%r8, hrestime+8(%rip)	/* add interval to hrestime.tv_nsec */
3539	/*
3540	 * Now that we have CLOCK_LOCK, we can update hres_last_tick
3541	 */
3542	movq	%r11, (%rax)
3543
3544	call	__adj_hrestime
3545
3546	/*
3547	 * release the hres_lock
3548	 */
3549	incl	hres_lock(%rip)
3550	leave
3551	ret
3552	SET_SIZE(hres_tick)
3553
3554#elif defined(__i386)
3555
3556	ENTRY_NP(hres_tick)
3557	pushl	%ebp
3558	movl	%esp, %ebp
3559	pushl	%esi
3560	pushl	%ebx
3561
3562	/*
3563	 * We need to call *gethrtimef before picking up CLOCK_LOCK (obviously,
3564	 * hres_last_tick can only be modified while holding CLOCK_LOCK).
3565	 * At worst, performing this now instead of under CLOCK_LOCK may
3566	 * introduce some jitter in pc_gethrestime().
3567	 */
3568	call	*gethrtimef
3569	movl	%eax, %ebx
3570	movl	%edx, %esi
3571
3572	movl	$hres_lock, %eax
3573	movl	$-1, %edx
3574.CL1:
3575	xchgb	%dl, (%eax)
3576	testb	%dl, %dl
3577	jz	.CL3			/ got it
3578.CL2:
3579	cmpb	$0, (%eax)		/ possible to get lock?
3580	pause
3581	jne	.CL2
3582	jmp	.CL1			/ yes, try again
3583.CL3:
3584	/*
3585	 * compute the interval since last time hres_tick was called
3586	 * and adjust hrtime_base and hrestime accordingly
3587	 * hrtime_base is an 8 byte value (in nsec), hrestime is
3588	 * timestruc_t (sec, nsec)
3589	 */
3590
3591	lea	hres_last_tick, %eax
3592
3593	movl	%ebx, %edx
3594	movl	%esi, %ecx
3595
3596	subl 	(%eax), %edx
3597	sbbl 	4(%eax), %ecx
3598
3599	addl	%edx, hrtime_base	/ add interval to hrtime_base
3600	adcl	%ecx, hrtime_base+4
3601
3602	addl 	%edx, hrestime+4	/ add interval to hrestime.tv_nsec
3603
3604	/
3605	/ Now that we have CLOCK_LOCK, we can update hres_last_tick.
3606	/
3607	movl	%ebx, (%eax)
3608	movl	%esi,  4(%eax)
3609
3610	/ get hrestime at this moment. used as base for pc_gethrestime
3611	/
3612	/ Apply adjustment, if any
3613	/
3614	/ #define HRES_ADJ	(NSEC_PER_CLOCK_TICK >> ADJ_SHIFT)
3615	/ (max_hres_adj)
3616	/
3617	/ void
3618	/ adj_hrestime()
3619	/ {
3620	/	long long adj;
3621	/
3622	/	if (hrestime_adj == 0)
3623	/		adj = 0;
3624	/	else if (hrestime_adj > 0) {
3625	/		if (hrestime_adj < HRES_ADJ)
3626	/			adj = hrestime_adj;
3627	/		else
3628	/			adj = HRES_ADJ;
3629	/	}
3630	/	else {
3631	/		if (hrestime_adj < -(HRES_ADJ))
3632	/			adj = -(HRES_ADJ);
3633	/		else
3634	/			adj = hrestime_adj;
3635	/	}
3636	/
3637	/	timedelta -= adj;
3638	/	hrestime_adj = timedelta;
3639	/	hrestime.tv_nsec += adj;
3640	/
3641	/	while (hrestime.tv_nsec >= NANOSEC) {
3642	/		one_sec++;
3643	/		hrestime.tv_sec++;
3644	/		hrestime.tv_nsec -= NANOSEC;
3645	/	}
3646	/ }
3647__adj_hrestime:
3648	movl	hrestime_adj, %esi	/ if (hrestime_adj == 0)
3649	movl	hrestime_adj+4, %edx
3650	andl	%esi, %esi
3651	jne	.CL4			/ no
3652	andl	%edx, %edx
3653	jne	.CL4			/ no
3654	subl	%ecx, %ecx		/ yes, adj = 0;
3655	subl	%edx, %edx
3656	jmp	.CL5
3657.CL4:
3658	subl	%ecx, %ecx
3659	subl	%eax, %eax
3660	subl	%esi, %ecx
3661	sbbl	%edx, %eax
3662	andl	%eax, %eax		/ if (hrestime_adj > 0)
3663	jge	.CL6
3664
3665	/ In the following comments, HRES_ADJ is used, while in the code
3666	/ max_hres_adj is used.
3667	/
3668	/ The test for "hrestime_adj < HRES_ADJ" is complicated because
3669	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3670	/ on the logical equivalence of:
3671	/
3672	/	!(hrestime_adj < HRES_ADJ)
3673	/
3674	/ and the two step sequence:
3675	/
3676	/	(HRES_ADJ - lsw(hrestime_adj)) generates a Borrow/Carry
3677	/
3678	/ which computes whether or not the least significant 32-bits
3679	/ of hrestime_adj is greater than HRES_ADJ, followed by:
3680	/
3681	/	Previous Borrow/Carry + -1 + msw(hrestime_adj) generates a Carry
3682	/
3683	/ which generates a carry whenever step 1 is true or the most
3684	/ significant long of the longlong hrestime_adj is non-zero.
3685
3686	movl	max_hres_adj, %ecx	/ hrestime_adj is positive
3687	subl	%esi, %ecx
3688	movl	%edx, %eax
3689	adcl	$-1, %eax
3690	jnc	.CL7
3691	movl	max_hres_adj, %ecx	/ adj = HRES_ADJ;
3692	subl	%edx, %edx
3693	jmp	.CL5
3694
3695	/ The following computation is similar to the one above.
3696	/
3697	/ The test for "hrestime_adj < -(HRES_ADJ)" is complicated because
3698	/ hrestime_adj is 64-bits, while HRES_ADJ is 32-bits.  We rely
3699	/ on the logical equivalence of:
3700	/
3701	/	(hrestime_adj > -HRES_ADJ)
3702	/
3703	/ and the two step sequence:
3704	/
3705	/	(HRES_ADJ + lsw(hrestime_adj)) generates a Carry
3706	/
3707	/ which means the least significant 32-bits of hrestime_adj is
3708	/ greater than -HRES_ADJ, followed by:
3709	/
3710	/	Previous Carry + 0 + msw(hrestime_adj) generates a Carry
3711	/
3712	/ which generates a carry only when step 1 is true and the most
3713	/ significant long of the longlong hrestime_adj is -1.
3714
3715.CL6:					/ hrestime_adj is negative
3716	movl	%esi, %ecx
3717	addl	max_hres_adj, %ecx
3718	movl	%edx, %eax
3719	adcl	$0, %eax
3720	jc	.CL7
3721	xor	%ecx, %ecx
3722	subl	max_hres_adj, %ecx	/ adj = -(HRES_ADJ);
3723	movl	$-1, %edx
3724	jmp	.CL5
3725.CL7:
3726	movl	%esi, %ecx		/ adj = hrestime_adj;
3727.CL5:
3728	movl	timedelta, %esi
3729	subl	%ecx, %esi
3730	movl	timedelta+4, %eax
3731	sbbl	%edx, %eax
3732	movl	%esi, timedelta
3733	movl	%eax, timedelta+4	/ timedelta -= adj;
3734	movl	%esi, hrestime_adj
3735	movl	%eax, hrestime_adj+4	/ hrestime_adj = timedelta;
3736	addl	hrestime+4, %ecx
3737
3738	movl	%ecx, %eax		/ eax = tv_nsec
37391:
3740	cmpl	$NANOSEC, %eax		/ if ((unsigned long)tv_nsec >= NANOSEC)
3741	jb	.CL8			/ no
3742	incl	one_sec			/ yes,  one_sec++;
3743	incl	hrestime		/ hrestime.tv_sec++;
3744	addl	$-NANOSEC, %eax		/ tv_nsec -= NANOSEC
3745	jmp	1b			/ check for more seconds
3746
3747.CL8:
3748	movl	%eax, hrestime+4	/ store final into hrestime.tv_nsec
3749	incl	hres_lock		/ release the hres_lock
3750
3751	popl	%ebx
3752	popl	%esi
3753	leave
3754	ret
3755	SET_SIZE(hres_tick)
3756
3757#endif	/* __i386 */
3758#endif	/* __lint */
3759
3760/*
3761 * void prefetch_smap_w(void *)
3762 *
3763 * Prefetch ahead within a linear list of smap structures.
3764 * Not implemented for ia32.  Stub for compatibility.
3765 */
3766
3767#if defined(__lint)
3768
3769/*ARGSUSED*/
3770void prefetch_smap_w(void *smp)
3771{}
3772
3773#else	/* __lint */
3774
3775	ENTRY(prefetch_smap_w)
3776	rep;	ret	/* use 2 byte return instruction when branch target */
3777			/* AMD Software Optimization Guide - Section 6.2 */
3778	SET_SIZE(prefetch_smap_w)
3779
3780#endif	/* __lint */
3781
3782/*
3783 * prefetch_page_r(page_t *)
3784 * issue prefetch instructions for a page_t
3785 */
3786#if defined(__lint)
3787
3788/*ARGSUSED*/
3789void
3790prefetch_page_r(void *pp)
3791{}
3792
3793#else	/* __lint */
3794
3795	ENTRY(prefetch_page_r)
3796	rep;	ret	/* use 2 byte return instruction when branch target */
3797			/* AMD Software Optimization Guide - Section 6.2 */
3798	SET_SIZE(prefetch_page_r)
3799
3800#endif	/* __lint */
3801
3802#if defined(__lint)
3803
3804/*ARGSUSED*/
3805int
3806bcmp(const void *s1, const void *s2, size_t count)
3807{ return (0); }
3808
3809#else   /* __lint */
3810
3811#if defined(__amd64)
3812
3813	ENTRY(bcmp)
3814	pushq	%rbp
3815	movq	%rsp, %rbp
3816#ifdef DEBUG
3817	movq	postbootkernelbase(%rip), %r11
3818	cmpq	%r11, %rdi
3819	jb	0f
3820	cmpq	%r11, %rsi
3821	jnb	1f
38220:	leaq	.bcmp_panic_msg(%rip), %rdi
3823	xorl	%eax, %eax
3824	call	panic
38251:
3826#endif	/* DEBUG */
3827	call	memcmp
3828	testl	%eax, %eax
3829	setne	%dl
3830	leave
3831	movzbl	%dl, %eax
3832	ret
3833	SET_SIZE(bcmp)
3834
3835#elif defined(__i386)
3836
3837#define	ARG_S1		8
3838#define	ARG_S2		12
3839#define	ARG_LENGTH	16
3840
3841	ENTRY(bcmp)
3842	pushl	%ebp
3843	movl	%esp, %ebp	/ create new stack frame
3844#ifdef DEBUG
3845	movl    postbootkernelbase, %eax
3846	cmpl    %eax, ARG_S1(%ebp)
3847	jb	0f
3848	cmpl    %eax, ARG_S2(%ebp)
3849	jnb	1f
38500:	pushl   $.bcmp_panic_msg
3851	call    panic
38521:
3853#endif	/* DEBUG */
3854
3855	pushl	%edi		/ save register variable
3856	movl	ARG_S1(%ebp), %eax	/ %eax = address of string 1
3857	movl	ARG_S2(%ebp), %ecx	/ %ecx = address of string 2
3858	cmpl	%eax, %ecx	/ if the same string
3859	je	.equal		/ goto .equal
3860	movl	ARG_LENGTH(%ebp), %edi	/ %edi = length in bytes
3861	cmpl	$4, %edi	/ if %edi < 4
3862	jb	.byte_check	/ goto .byte_check
3863	.align	4
3864.word_loop:
3865	movl	(%ecx), %edx	/ move 1 word from (%ecx) to %edx
3866	leal	-4(%edi), %edi	/ %edi -= 4
3867	cmpl	(%eax), %edx	/ compare 1 word from (%eax) with %edx
3868	jne	.word_not_equal	/ if not equal, goto .word_not_equal
3869	leal	4(%ecx), %ecx	/ %ecx += 4 (next word)
3870	leal	4(%eax), %eax	/ %eax += 4 (next word)
3871	cmpl	$4, %edi	/ if %edi >= 4
3872	jae	.word_loop	/ goto .word_loop
3873.byte_check:
3874	cmpl	$0, %edi	/ if %edi == 0
3875	je	.equal		/ goto .equal
3876	jmp	.byte_loop	/ goto .byte_loop (checks in bytes)
3877.word_not_equal:
3878	leal	4(%edi), %edi	/ %edi += 4 (post-decremented)
3879	.align	4
3880.byte_loop:
3881	movb	(%ecx),	%dl	/ move 1 byte from (%ecx) to %dl
3882	cmpb	%dl, (%eax)	/ compare %dl with 1 byte from (%eax)
3883	jne	.not_equal	/ if not equal, goto .not_equal
3884	incl	%ecx		/ %ecx++ (next byte)
3885	incl	%eax		/ %eax++ (next byte)
3886	decl	%edi		/ %edi--
3887	jnz	.byte_loop	/ if not zero, goto .byte_loop
3888.equal:
3889	xorl	%eax, %eax	/ %eax = 0
3890	popl	%edi		/ restore register variable
3891	leave			/ restore old stack frame
3892	ret			/ return (NULL)
3893	.align	4
3894.not_equal:
3895	movl	$1, %eax	/ return 1
3896	popl	%edi		/ restore register variable
3897	leave			/ restore old stack frame
3898	ret			/ return (NULL)
3899	SET_SIZE(bcmp)
3900
3901#endif	/* __i386 */
3902
3903#ifdef DEBUG
3904	.text
3905.bcmp_panic_msg:
3906	.string "bcmp: arguments below kernelbase"
3907#endif	/* DEBUG */
3908
3909#endif	/* __lint */
3910
3911#if defined(__lint)
3912
3913uint_t
3914bsrw_insn(uint16_t mask)
3915{
3916	uint_t index = sizeof (mask) * NBBY - 1;
3917
3918	while ((mask & (1 << index)) == 0)
3919		index--;
3920	return (index);
3921}
3922
3923#else	/* __lint */
3924
3925#if defined(__amd64)
3926
3927	ENTRY_NP(bsrw_insn)
3928	xorl	%eax, %eax
3929	bsrw	%di, %ax
3930	ret
3931	SET_SIZE(bsrw_insn)
3932
3933#elif defined(__i386)
3934
3935	ENTRY_NP(bsrw_insn)
3936	movw	4(%esp), %cx
3937	xorl	%eax, %eax
3938	bsrw	%cx, %ax
3939	ret
3940	SET_SIZE(bsrw_insn)
3941
3942#endif	/* __i386 */
3943#endif	/* __lint */
3944
3945#if defined(__lint)
3946
3947uint_t
3948atomic_btr32(uint32_t *pending, uint_t pil)
3949{
3950	return (*pending &= ~(1 << pil));
3951}
3952
3953#else	/* __lint */
3954
3955#if defined(__i386)
3956
3957	ENTRY_NP(atomic_btr32)
3958	movl	4(%esp), %ecx
3959	movl	8(%esp), %edx
3960	xorl	%eax, %eax
3961	lock
3962	btrl	%edx, (%ecx)
3963	setc	%al
3964	ret
3965	SET_SIZE(atomic_btr32)
3966
3967#endif	/* __i386 */
3968#endif	/* __lint */
3969
3970#if defined(__lint)
3971
3972/*ARGSUSED*/
3973void
3974switch_sp_and_call(void *newsp, void (*func)(uint_t, uint_t), uint_t arg1,
3975	    uint_t arg2)
3976{}
3977
3978#else	/* __lint */
3979
3980#if defined(__amd64)
3981
3982	ENTRY_NP(switch_sp_and_call)
3983	pushq	%rbp
3984	movq	%rsp, %rbp		/* set up stack frame */
3985	movq	%rdi, %rsp		/* switch stack pointer */
3986	movq	%rdx, %rdi		/* pass func arg 1 */
3987	movq	%rsi, %r11		/* save function to call */
3988	movq	%rcx, %rsi		/* pass func arg 2 */
3989	call	*%r11			/* call function */
3990	leave				/* restore stack */
3991	ret
3992	SET_SIZE(switch_sp_and_call)
3993
3994#elif defined(__i386)
3995
3996	ENTRY_NP(switch_sp_and_call)
3997	pushl	%ebp
3998	mov	%esp, %ebp		/* set up stack frame */
3999	movl	8(%ebp), %esp		/* switch stack pointer */
4000	pushl	20(%ebp)		/* push func arg 2 */
4001	pushl	16(%ebp)		/* push func arg 1 */
4002	call	*12(%ebp)		/* call function */
4003	addl	$8, %esp		/* pop arguments */
4004	leave				/* restore stack */
4005	ret
4006	SET_SIZE(switch_sp_and_call)
4007
4008#endif	/* __i386 */
4009#endif	/* __lint */
4010
4011#if defined(__lint)
4012
4013void
4014kmdb_enter(void)
4015{}
4016
4017#else	/* __lint */
4018
4019#if defined(__amd64)
4020
4021	ENTRY_NP(kmdb_enter)
4022	pushq	%rbp
4023	movq	%rsp, %rbp
4024
4025	/*
4026	 * Save flags, do a 'cli' then return the saved flags
4027	 */
4028	call	intr_clear
4029
4030	int	$T_DBGENTR
4031
4032	/*
4033	 * Restore the saved flags
4034	 */
4035	movq	%rax, %rdi
4036	call	intr_restore
4037
4038	leave
4039	ret
4040	SET_SIZE(kmdb_enter)
4041
4042#elif defined(__i386)
4043
4044	ENTRY_NP(kmdb_enter)
4045	pushl	%ebp
4046	movl	%esp, %ebp
4047
4048	/*
4049	 * Save flags, do a 'cli' then return the saved flags
4050	 */
4051	call	intr_clear
4052
4053	int	$T_DBGENTR
4054
4055	/*
4056	 * Restore the saved flags
4057	 */
4058	pushl	%eax
4059	call	intr_restore
4060	addl	$4, %esp
4061
4062	leave
4063	ret
4064	SET_SIZE(kmdb_enter)
4065
4066#endif	/* __i386 */
4067#endif	/* __lint */
4068
4069#if defined(__lint)
4070
4071void
4072return_instr(void)
4073{}
4074
4075#else	/* __lint */
4076
4077	ENTRY_NP(return_instr)
4078	rep;	ret	/* use 2 byte instruction when branch target */
4079			/* AMD Software Optimization Guide - Section 6.2 */
4080	SET_SIZE(return_instr)
4081
4082#endif	/* __lint */
4083
4084#if defined(__lint)
4085
4086ulong_t
4087getflags(void)
4088{
4089	return (0);
4090}
4091
4092#else	/* __lint */
4093
4094#if defined(__amd64)
4095
4096	ENTRY(getflags)
4097	pushfq
4098	popq	%rax
4099#if defined(__xpv)
4100	CURTHREAD(%rdi)
4101	KPREEMPT_DISABLE(%rdi)
4102	/*
4103	 * Synthesize the PS_IE bit from the event mask bit
4104	 */
4105	CURVCPU(%r11)
4106	andq    $_BITNOT(PS_IE), %rax
4107	XEN_TEST_UPCALL_MASK(%r11)
4108	jnz	1f
4109	orq	$PS_IE, %rax
41101:
4111	KPREEMPT_ENABLE_NOKP(%rdi)
4112#endif
4113	ret
4114	SET_SIZE(getflags)
4115
4116#elif defined(__i386)
4117
4118	ENTRY(getflags)
4119	pushfl
4120	popl	%eax
4121#if defined(__xpv)
4122	CURTHREAD(%ecx)
4123	KPREEMPT_DISABLE(%ecx)
4124	/*
4125	 * Synthesize the PS_IE bit from the event mask bit
4126	 */
4127	CURVCPU(%edx)
4128	andl    $_BITNOT(PS_IE), %eax
4129	XEN_TEST_UPCALL_MASK(%edx)
4130	jnz	1f
4131	orl	$PS_IE, %eax
41321:
4133	KPREEMPT_ENABLE_NOKP(%ecx)
4134#endif
4135	ret
4136	SET_SIZE(getflags)
4137
4138#endif	/* __i386 */
4139
4140#endif	/* __lint */
4141
4142#if defined(__lint)
4143
4144ftrace_icookie_t
4145ftrace_interrupt_disable(void)
4146{ return (0); }
4147
4148#else   /* __lint */
4149
4150#if defined(__amd64)
4151
4152	ENTRY(ftrace_interrupt_disable)
4153	pushfq
4154	popq	%rax
4155	CLI(%rdx)
4156	ret
4157	SET_SIZE(ftrace_interrupt_disable)
4158
4159#elif defined(__i386)
4160
4161	ENTRY(ftrace_interrupt_disable)
4162	pushfl
4163	popl	%eax
4164	CLI(%edx)
4165	ret
4166	SET_SIZE(ftrace_interrupt_disable)
4167
4168#endif	/* __i386 */
4169#endif	/* __lint */
4170
4171#if defined(__lint)
4172
4173/*ARGSUSED*/
4174void
4175ftrace_interrupt_enable(ftrace_icookie_t cookie)
4176{}
4177
4178#else	/* __lint */
4179
4180#if defined(__amd64)
4181
4182	ENTRY(ftrace_interrupt_enable)
4183	pushq	%rdi
4184	popfq
4185	ret
4186	SET_SIZE(ftrace_interrupt_enable)
4187
4188#elif defined(__i386)
4189
4190	ENTRY(ftrace_interrupt_enable)
4191	movl	4(%esp), %eax
4192	pushl	%eax
4193	popfl
4194	ret
4195	SET_SIZE(ftrace_interrupt_enable)
4196
4197#endif	/* __i386 */
4198#endif	/* __lint */
4199