xref: /titanic_44/usr/src/uts/intel/ia32/ml/ia32.il (revision aa8cf21aa2aaa2df3db469354ccc0c47f8cdaab9)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/
28/ Inline functions for i386 kernels.
29/	Shared between all x86 platform variants.
30/
31
32/
33/ return current thread pointer
34/
35/ NOTE: the "0x10" should be replaced by the computed value of the
36/	offset of "cpu_thread" from the beginning of the struct cpu.
37/	Including "assym.h" does not work, however, since that stuff
38/	is PSM-specific and is only visible to the 'unix' build anyway.
39/	Same with current cpu pointer, where "0xc" should be replaced
40/	by the computed value of the offset of "cpu_self".
41/	Ugh -- what a disaster.
42/
43	.inline	threadp,0
44	movl	%gs:0x10, %eax
45	.end
46
47/
48/ return current cpu pointer
49/
50	.inline	curcpup,0
51	movl	%gs:0xc, %eax
52	.end
53
54/
55/ return caller
56/
57	.inline caller,0
58	movl	4(%ebp), %eax
59	.end
60
61/
62/ convert ipl to spl.  This is the identity function for i86
63/
64	.inline	ipltospl,0
65	movl	(%esp), %eax
66	.end
67
68/
69/ find the low order bit in a word
70/
71	.inline lowbit,4
72	movl	$-1, %eax
73	bsfl	(%esp), %eax
74	incl	%eax
75	.end
76
77/
78/ find the high order bit in a word
79/
80	.inline highbit,4
81	movl	$-1, %eax
82	bsrl	(%esp), %eax
83	incl	%eax
84	.end
85
86/
87/ Networking byte order functions (too bad, Intel has the wrong byte order)
88/
89	.inline	htonll,4
90	movl	(%esp), %edx
91	movl	4(%esp), %eax
92	bswap	%edx
93	bswap	%eax
94	.end
95
96	.inline	ntohll,4
97	movl	(%esp), %edx
98	movl	4(%esp), %eax
99	bswap	%edx
100	bswap	%eax
101	.end
102
103	.inline	htonl,4
104	movl	(%esp), %eax
105	bswap	%eax
106	.end
107
108	.inline	ntohl,4
109	movl	(%esp), %eax
110	bswap	%eax
111	.end
112
113	.inline	htons,4
114	movl	(%esp), %eax
115	bswap	%eax
116	shrl	$16, %eax
117	.end
118
119	.inline	ntohs,4
120	movl	(%esp), %eax
121	bswap	%eax
122	shrl	$16, %eax
123	.end
124
125/*
126 * multiply two long numbers and yield a u_longlong_t result
127 * Provided to manipulate hrtime_t values.
128 */
129	.inline mul32, 8
130	movl	4(%esp), %eax
131	movl	(%esp), %ecx
132	mull	%ecx
133	.end
134
135/*
136 * Unlock hres_lock and increment the count value. (See clock.h)
137 */
138	.inline unlock_hres_lock, 0
139	lock
140	incl	hres_lock
141	.end
142
143	.inline	atomic_orb,8
144	movl	(%esp), %eax
145	movl    4(%esp), %edx
146	lock
147	orb	%dl,(%eax)
148	.end
149
150	.inline	atomic_andb,8
151	movl	(%esp), %eax
152	movl    4(%esp), %edx
153	lock
154	andb	%dl,(%eax)
155	.end
156
157/*
158 * atomic inc/dec operations.
159 *	void atomic_inc16(uint16_t *addr) { ++*addr; }
160 *	void atomic_dec16(uint16_t *addr) { --*addr; }
161 */
162	.inline	atomic_inc16,4
163	movl	(%esp), %eax
164	lock
165	incw	(%eax)
166	.end
167
168	.inline	atomic_dec16,4
169	movl	(%esp), %eax
170	lock
171	decw	(%eax)
172	.end
173
174/*
175 * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
176 * a hint that the code sequence is a busy spin-wait loop.  Without a pause
177 * instruction in these loops, the P4 Xeon processor may suffer a severe
178 * penalty when exiting the loop because the processor detects a possible
179 * memory violation.  Inserting the pause instruction significantly reduces
180 * the likelihood of a memory order violation, improving performance.
181 * The pause instruction is a NOP on all other IA-32 processors.
182 */
183	.inline ht_pause, 0
184	rep			/ our compiler doesn't support "pause" yet,
185	nop			/ so we're using "F3 90" opcode directly
186	.end
187
188/*
189 * prefetch 64 bytes
190 *
191 * prefetch is an SSE extension which is not supported on older 32-bit processors
192 * so define this as a no-op for now
193 */
194
195 	.inline	prefetch_read_many, 4
196/	movl		(%esp), %eax
197/	prefetcht0	(%eax)
198/	prefetcht0	32(%eax)
199	.end
200
201 	.inline	prefetch_read_once, 4
202/	movl		(%esp), %eax
203/	prefetchnta	(%eax)
204/	prefetchnta	32(%eax)
205	.end
206
207 	.inline	prefetch_write_many, 4
208/	movl		(%esp), %eax
209/	prefetcht0	(%eax)
210/	prefetcht0	32(%eax)
211	.end
212
213 	.inline	prefetch_write_once, 4
214/	movl		(%esp), %eax
215/	prefetcht0	(%eax)
216/	prefetcht0	32(%eax)
217	.end
218
219