xref: /illumos-gate/usr/src/common/crypto/md5/md5_byteswap.h (revision 13b136d3061155363c62c9f6568d25b8b27da8f6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_MD5_BYTESWAP_H
28 #define	_MD5_BYTESWAP_H
29 
30 /*
31  * definitions for inline functions for little-endian loads.
32  *
33  * This file has special definitions for UltraSPARC architectures,
34  * which have a special address space identifier for loading 32 and 16 bit
35  * integers in little-endian byte order.
36  *
37  * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
38  * same thing and must be changed together.
39  */
40 
41 #include <sys/types.h>
42 #if defined(__sparc)
43 #include <v9/sys/asi.h>
44 #elif defined(_LITTLE_ENDIAN)
45 #include <sys/byteorder.h>
46 #endif
47 
48 #ifdef	__cplusplus
49 extern "C" {
50 #endif
51 
52 #if defined(_LITTLE_ENDIAN)
53 
54 /*
55  * Little-endian optimization:  I don't need to do any weirdness.   On
56  * some little-endian boxen, I'll have to do alignment checks, but I can do
57  * that below.
58  */
59 
60 #if !defined(__i386) && !defined(__amd64)
61 /*
62  * i386 and amd64 don't require aligned 4-byte loads.  The symbol
63  * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
64  * requires alignment checking.
65  */
66 #define	_MD5_CHECK_ALIGNMENT
67 #endif /* !__i386 && !__amd64 */
68 
69 #define	LOAD_LITTLE_32(addr)	(*(uint32_t *)(void *)(addr))
70 
71 #else	/* !_LITTLE_ENDIAN */
72 
73 /*
74  * sparc v9/v8plus optimization:
75  *
76  * on the sparc v9/v8plus, we can load data little endian.  however, since
77  * the compiler doesn't have direct support for little endian, we
78  * link to an assembly-language routine `load_little_32' to do
79  * the magic.  note that special care must be taken to ensure the
80  * address is 32-bit aligned -- in the interest of speed, we don't
81  * check to make sure, since careful programming can guarantee this
82  * for us.
83  */
84 #if defined(sun4u)
85 
86 /* Define alignment check because we can 4-byte load as little endian. */
87 #define	_MD5_CHECK_ALIGNMENT
88 #define	LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(void *)(addr))
89 
90 #if !defined(__lint) && defined(__GNUC__)
91 
92 static __inline__ uint32_t
93 load_little_32(uint32_t *addr)
94 {
95 	uint32_t value;
96 
97 	__asm__(
98 	    "lduwa	[%1] %2, %0\n\t"
99 	    : "=r" (value)
100 	    : "r" (addr), "i" (ASI_PL));
101 
102 	return (value);
103 }
104 #endif	/* !__lint && __GNUC__ */
105 
106 #if !defined(__GNUC__)
107 extern	uint32_t load_little_32(uint32_t *);
108 #endif	/* !__GNUC__ */
109 
110 /* Placate lint */
111 #if defined(__lint)
112 uint32_t
113 load_little_32(uint32_t *addr)
114 {
115 	return (*addr);
116 }
117 #endif	/* __lint */
118 
119 #elif defined(_LITTLE_ENDIAN)
120 #define	LOAD_LITTLE_32(addr)	htonl(addr)
121 
122 #else
123 /* big endian -- will work on little endian, but slowly */
124 /* Since we do byte operations, we don't have to check for alignment. */
125 #define	LOAD_LITTLE_32(addr)	\
126 	((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
127 #endif	/* sun4u */
128 
129 #if defined(sun4v)
130 
131 /*
132  * For N1 want to minimize number of arithmetic operations. This is best
133  * achieved by using the %asi register to specify ASI for the lduwa operations.
134  * Also, have a separate inline template for each word, so can utilize the
135  * immediate offset in lduwa, without relying on the compiler to do the right
136  * thing.
137  *
138  * Moving to 64-bit loads might also be beneficial.
139  */
140 #define	LOAD_LITTLE_32_0(addr)	load_little_32_0((uint32_t *)(addr))
141 #define	LOAD_LITTLE_32_1(addr)	load_little_32_1((uint32_t *)(addr))
142 #define	LOAD_LITTLE_32_2(addr)	load_little_32_2((uint32_t *)(addr))
143 #define	LOAD_LITTLE_32_3(addr)	load_little_32_3((uint32_t *)(addr))
144 #define	LOAD_LITTLE_32_4(addr)	load_little_32_4((uint32_t *)(addr))
145 #define	LOAD_LITTLE_32_5(addr)	load_little_32_5((uint32_t *)(addr))
146 #define	LOAD_LITTLE_32_6(addr)	load_little_32_6((uint32_t *)(addr))
147 #define	LOAD_LITTLE_32_7(addr)	load_little_32_7((uint32_t *)(addr))
148 #define	LOAD_LITTLE_32_8(addr)	load_little_32_8((uint32_t *)(addr))
149 #define	LOAD_LITTLE_32_9(addr)	load_little_32_9((uint32_t *)(addr))
150 #define	LOAD_LITTLE_32_a(addr)	load_little_32_a((uint32_t *)(addr))
151 #define	LOAD_LITTLE_32_b(addr)	load_little_32_b((uint32_t *)(addr))
152 #define	LOAD_LITTLE_32_c(addr)	load_little_32_c((uint32_t *)(addr))
153 #define	LOAD_LITTLE_32_d(addr)	load_little_32_d((uint32_t *)(addr))
154 #define	LOAD_LITTLE_32_e(addr)	load_little_32_e((uint32_t *)(addr))
155 #define	LOAD_LITTLE_32_f(addr)	load_little_32_f((uint32_t *)(addr))
156 
157 #if !defined(__lint) && defined(__GNUC__)
158 
159 /*
160  * This actually sets the ASI register, not necessarily to ASI_PL.
161  */
162 static __inline__ void
163 set_little(uint8_t asi)
164 {
165 	__asm__ __volatile__(
166 	    "wr	%%g0, %0, %%asi\n\t"
167 	    : /* Nothing */
168 	    : "r" (asi));
169 }
170 
171 static __inline__ uint8_t
172 get_little(void)
173 {
174 	uint8_t asi;
175 
176 	__asm__ __volatile__(
177 	    "rd	%%asi, %0\n\t"
178 	    : "=r" (asi));
179 
180 	return (asi);
181 }
182 
183 /*
184  * We have 16 functions which differ only in the offset from which they
185  * load.  Use this preprocessor template to simplify maintenance.  Its
186  * argument is the offset in hex, without the 0x.
187  */
188 #define	LL_TEMPLATE(__off)			\
189 static __inline__ uint32_t			\
190 load_little_32_##__off(uint32_t *addr)		\
191 {						\
192 	uint32_t value;				\
193 	__asm__(				\
194 		"lduwa	[%1 + %2]%%asi, %0\n\t"	\
195 	: "=r" (value)				\
196 	: "r" (addr), "i" ((0x##__off) << 2));	\
197 	return (value);				\
198 }
199 
200 LL_TEMPLATE(0)
201 LL_TEMPLATE(1)
202 LL_TEMPLATE(2)
203 LL_TEMPLATE(3)
204 LL_TEMPLATE(4)
205 LL_TEMPLATE(5)
206 LL_TEMPLATE(6)
207 LL_TEMPLATE(7)
208 LL_TEMPLATE(8)
209 LL_TEMPLATE(9)
210 LL_TEMPLATE(a)
211 LL_TEMPLATE(b)
212 LL_TEMPLATE(c)
213 LL_TEMPLATE(d)
214 LL_TEMPLATE(e)
215 LL_TEMPLATE(f)
216 #undef	LL_TEMPLATE
217 
218 #endif	/* !__lint && __GNUC__ */
219 
220 #if !defined(__GNUC__)
221 /*
222  * Using the %asi register to achieve little endian loads - register
223  * is set using a inline template.
224  *
225  * Saves a few arithmetic ops as can now use an immediate offset with the
226  * lduwa instructions.
227  */
228 extern void set_little(uint32_t);
229 extern uint32_t get_little(void);
230 
231 extern	uint32_t load_little_32_0(uint32_t *);
232 extern	uint32_t load_little_32_1(uint32_t *);
233 extern	uint32_t load_little_32_2(uint32_t *);
234 extern	uint32_t load_little_32_3(uint32_t *);
235 extern	uint32_t load_little_32_4(uint32_t *);
236 extern	uint32_t load_little_32_5(uint32_t *);
237 extern	uint32_t load_little_32_6(uint32_t *);
238 extern	uint32_t load_little_32_7(uint32_t *);
239 extern	uint32_t load_little_32_8(uint32_t *);
240 extern	uint32_t load_little_32_9(uint32_t *);
241 extern	uint32_t load_little_32_a(uint32_t *);
242 extern	uint32_t load_little_32_b(uint32_t *);
243 extern	uint32_t load_little_32_c(uint32_t *);
244 extern	uint32_t load_little_32_d(uint32_t *);
245 extern	uint32_t load_little_32_e(uint32_t *);
246 extern	uint32_t load_little_32_f(uint32_t *);
247 #endif	/* !__GNUC__ */
248 #endif	/* sun4v */
249 
250 #endif	/* _LITTLE_ENDIAN */
251 
252 #ifdef	__cplusplus
253 }
254 #endif
255 
256 #endif	/* !_MD5_BYTESWAP_H */
257