xref: /titanic_41/usr/src/common/crypto/md5/md5_byteswap.h (revision afd1ac7b1c9a8cdf273c865aa5e9a14620341443)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_MD5_BYTESWAP_H
28 #define	_MD5_BYTESWAP_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * definitions for inline functions for little-endian loads.
34  *
35  * This file has special definitions for UltraSPARC architectures,
36  * which have a special address space identifier for loading 32 and 16 bit
37  * integers in little-endian byte order.
38  *
39  * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
40  * same thing and must be changed together.
41  */
42 
43 #if defined(__sparc)
44 #include <v9/sys/asi.h>
45 #endif
46 
47 #ifdef	__cplusplus
48 extern "C" {
49 #endif
50 
51 #if defined(_LITTLE_ENDIAN)
52 
53 /*
54  * Little-endian optimization:  I don't need to do any weirdness.   On
55  * some little-endian boxen, I'll have to do alignment checks, but I can do
56  * that below.
57  */
58 
59 #if !defined(__i386) && !defined(__amd64)
60 /*
61  * i386 and amd64 don't require aligned 4-byte loads.  The symbol
62  * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
63  * requires alignment checking.
64  */
65 #define	_MD5_CHECK_ALIGNMENT
66 #endif /* !__i386 && !__amd64 */
67 
68 #define	LOAD_LITTLE_32(addr)	(*(uint32_t *)(addr))
69 
70 #else	/* !_LITTLE_ENDIAN */
71 
72 /*
73  * sparc v9/v8plus optimization:
74  *
75  * on the sparc v9/v8plus, we can load data little endian.  however, since
76  * the compiler doesn't have direct support for little endian, we
77  * link to an assembly-language routine `load_little_32' to do
78  * the magic.  note that special care must be taken to ensure the
79  * address is 32-bit aligned -- in the interest of speed, we don't
80  * check to make sure, since careful programming can guarantee this
81  * for us.
82  */
83 #if defined(sun4u)
84 
85 /* Define alignment check because we can 4-byte load as little endian. */
86 #define	_MD5_CHECK_ALIGNMENT
87 #define	LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(addr))
88 
89 #if !defined(__lint) && defined(__GNUC__)
90 
91 static __inline__ uint32_t
92 load_little_32(uint32_t *addr)
93 {
94 	uint32_t value;
95 
96 	__asm__(
97 	    "lduwa	[%1] %2, %0\n\t"
98 	: "=r" (value)
99 	: "r" (addr), "i" (ASI_PL));
100 
101 	return (value);
102 }
103 
104 static __inline__ uint16_t
105 load_little_16(uint16_t *addr)
106 {
107 	uint16_t value;
108 
109 	__asm__(
110 	    "lduha	[%1] %2, %0\n\t"
111 	: "=r" (value)
112 	: "r" (addr), "i" (ASI_PL));
113 
114 	return (value);
115 }
116 
117 #endif	/* !__lint && __GNUC__ */
118 
119 #if !defined(__GNUC__)
120 extern	uint32_t load_little_32(uint32_t *);
121 #endif	/* !__GNUC__ */
122 
123 #if defined(sun4v)
124 
125 /*
126  * For N1 want to minimize number of arithmetic operations. This is best
127  * achieved by using the %asi register to specify ASI for the lduwa operations.
128  * Also, have a separate inline template for each word, so can utilize the
129  * immediate offset in lduwa, without relying on the compiler to do the right
130  * thing.
131  *
132  * Moving to 64-bit loads might also be beneficial.
133  */
134 #define	LOAD_LITTLE_32_0(addr)	load_little_32_0((uint32_t *)(addr))
135 #define	LOAD_LITTLE_32_1(addr)	load_little_32_1((uint32_t *)(addr))
136 #define	LOAD_LITTLE_32_2(addr)	load_little_32_2((uint32_t *)(addr))
137 #define	LOAD_LITTLE_32_3(addr)	load_little_32_3((uint32_t *)(addr))
138 #define	LOAD_LITTLE_32_4(addr)	load_little_32_4((uint32_t *)(addr))
139 #define	LOAD_LITTLE_32_5(addr)	load_little_32_5((uint32_t *)(addr))
140 #define	LOAD_LITTLE_32_6(addr)	load_little_32_6((uint32_t *)(addr))
141 #define	LOAD_LITTLE_32_7(addr)	load_little_32_7((uint32_t *)(addr))
142 #define	LOAD_LITTLE_32_8(addr)	load_little_32_8((uint32_t *)(addr))
143 #define	LOAD_LITTLE_32_9(addr)	load_little_32_9((uint32_t *)(addr))
144 #define	LOAD_LITTLE_32_a(addr)	load_little_32_a((uint32_t *)(addr))
145 #define	LOAD_LITTLE_32_b(addr)	load_little_32_b((uint32_t *)(addr))
146 #define	LOAD_LITTLE_32_c(addr)	load_little_32_c((uint32_t *)(addr))
147 #define	LOAD_LITTLE_32_d(addr)	load_little_32_d((uint32_t *)(addr))
148 #define	LOAD_LITTLE_32_e(addr)	load_little_32_e((uint32_t *)(addr))
149 #define	LOAD_LITTLE_32_f(addr)	load_little_32_f((uint32_t *)(addr))
150 
151 #if !defined(__lint) && defined(__GNUC__)
152 
153 /*
154  * This actually sets the ASI register, not necessarily to ASI_PL.
155  */
156 static __inline__ void
157 set_little(uint8_t asi)
158 {
159 	__asm__ __volatile__(
160 		"wr	%%g0, %0, %%asi\n\t"
161 	: /* Nothing */
162 	: "r" (asi));
163 }
164 
165 static __inline__ uint8_t
166 get_little(void)
167 {
168 	uint8_t asi;
169 
170 	__asm__ __volatile__(
171 		"rd	%%asi, %0\n\t"
172 	: "=r" (asi));
173 
174 	return (asi);
175 }
176 
177 /*
178  * We have 16 functions which differ only in the offset from which they
179  * load.  Use this preprocessor template to simplify maintenance.  Its
180  * argument is the offset in hex, without the 0x.
181  */
182 #define	LL_TEMPLATE(__off)			\
183 static __inline__ uint32_t			\
184 load_little_32_##__off(uint32_t *addr)		\
185 {						\
186 	uint32_t value;				\
187 	__asm__(				\
188 		"lduwa	[%1 + %2]%%asi, %0\n\t"	\
189 	: "=r" (value)				\
190 	: "r" (addr), "i" ((0x##__off) << 2));	\
191 	return (value);				\
192 }
193 
194 LL_TEMPLATE(0)
195 LL_TEMPLATE(1)
196 LL_TEMPLATE(2)
197 LL_TEMPLATE(3)
198 LL_TEMPLATE(4)
199 LL_TEMPLATE(5)
200 LL_TEMPLATE(6)
201 LL_TEMPLATE(7)
202 LL_TEMPLATE(8)
203 LL_TEMPLATE(9)
204 LL_TEMPLATE(a)
205 LL_TEMPLATE(b)
206 LL_TEMPLATE(c)
207 LL_TEMPLATE(d)
208 LL_TEMPLATE(e)
209 LL_TEMPLATE(f)
210 #undef	LL_TEMPLATE
211 
212 #endif	/* !__lint && __GNUC__ */
213 
214 #if !defined(__GNUC__)
215 /*
216  * Using the %asi register to achieve little endian loads - register
217  * is set using a inline template.
218  *
219  * Saves a few arithmetic ops as can now use an immediate offset with the
220  * lduwa instructions.
221  */
222 extern void set_little(uint32_t);
223 extern uint32_t get_little(void);
224 
225 extern	uint32_t load_little_32_0(uint32_t *);
226 extern	uint32_t load_little_32_1(uint32_t *);
227 extern	uint32_t load_little_32_2(uint32_t *);
228 extern	uint32_t load_little_32_3(uint32_t *);
229 extern	uint32_t load_little_32_4(uint32_t *);
230 extern	uint32_t load_little_32_5(uint32_t *);
231 extern	uint32_t load_little_32_6(uint32_t *);
232 extern	uint32_t load_little_32_7(uint32_t *);
233 extern	uint32_t load_little_32_8(uint32_t *);
234 extern	uint32_t load_little_32_9(uint32_t *);
235 extern	uint32_t load_little_32_a(uint32_t *);
236 extern	uint32_t load_little_32_b(uint32_t *);
237 extern	uint32_t load_little_32_c(uint32_t *);
238 extern	uint32_t load_little_32_d(uint32_t *);
239 extern	uint32_t load_little_32_e(uint32_t *);
240 extern	uint32_t load_little_32_f(uint32_t *);
241 #endif	/* !__GNUC__ */
242 #endif	/* sun4v */
243 
244 /* Placate lint */
245 #if defined(__lint)
246 uint32_t
247 load_little_32(uint32_t *addr)
248 {
249 	return (*addr);
250 }
251 #endif	/* __lint */
252 
253 #else	/* !sun4u */
254 
255 /* big endian -- will work on little endian, but slowly */
256 /* Since we do byte operations, we don't have to check for alignment. */
257 #define	LOAD_LITTLE_32(addr)	\
258 	((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
259 
260 #endif	/* sun4u */
261 #endif	/* _LITTLE_ENDIAN */
262 
263 #ifdef	__cplusplus
264 }
265 #endif
266 
267 #endif	/* !_MD5_BYTESWAP_H */
268