xref: /titanic_50/usr/src/common/crypto/md5/md5_byteswap.h (revision 8de5c4f463386063e184a851437d58080c6c626c)
1afd1ac7bSwesolows /*
2afd1ac7bSwesolows  * CDDL HEADER START
3afd1ac7bSwesolows  *
4afd1ac7bSwesolows  * The contents of this file are subject to the terms of the
5afd1ac7bSwesolows  * Common Development and Distribution License (the "License").
6afd1ac7bSwesolows  * You may not use this file except in compliance with the License.
7afd1ac7bSwesolows  *
8afd1ac7bSwesolows  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9afd1ac7bSwesolows  * or http://www.opensolaris.org/os/licensing.
10afd1ac7bSwesolows  * See the License for the specific language governing permissions
11afd1ac7bSwesolows  * and limitations under the License.
12afd1ac7bSwesolows  *
13afd1ac7bSwesolows  * When distributing Covered Code, include this CDDL HEADER in each
14afd1ac7bSwesolows  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15afd1ac7bSwesolows  * If applicable, add the following below this CDDL HEADER, with the
16afd1ac7bSwesolows  * fields enclosed by brackets "[]" replaced with your own identifying
17afd1ac7bSwesolows  * information: Portions Copyright [yyyy] [name of copyright owner]
18afd1ac7bSwesolows  *
19afd1ac7bSwesolows  * CDDL HEADER END
20afd1ac7bSwesolows  */
21afd1ac7bSwesolows 
22afd1ac7bSwesolows /*
23*8de5c4f4SDan OpenSolaris Anderson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24afd1ac7bSwesolows  * Use is subject to license terms.
25afd1ac7bSwesolows  */
26afd1ac7bSwesolows 
27afd1ac7bSwesolows #ifndef	_MD5_BYTESWAP_H
28afd1ac7bSwesolows #define	_MD5_BYTESWAP_H
29afd1ac7bSwesolows 
30afd1ac7bSwesolows /*
31afd1ac7bSwesolows  * definitions for inline functions for little-endian loads.
32afd1ac7bSwesolows  *
33afd1ac7bSwesolows  * This file has special definitions for UltraSPARC architectures,
34afd1ac7bSwesolows  * which have a special address space identifier for loading 32 and 16 bit
35afd1ac7bSwesolows  * integers in little-endian byte order.
36afd1ac7bSwesolows  *
37afd1ac7bSwesolows  * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
38afd1ac7bSwesolows  * same thing and must be changed together.
39afd1ac7bSwesolows  */
40afd1ac7bSwesolows 
414b56a003SDaniel Anderson #include <sys/types.h>
42afd1ac7bSwesolows #if defined(__sparc)
43afd1ac7bSwesolows #include <v9/sys/asi.h>
444b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN)
454b56a003SDaniel Anderson #include <sys/byteorder.h>
46afd1ac7bSwesolows #endif
47afd1ac7bSwesolows 
48afd1ac7bSwesolows #ifdef	__cplusplus
49afd1ac7bSwesolows extern "C" {
50afd1ac7bSwesolows #endif
51afd1ac7bSwesolows 
52afd1ac7bSwesolows #if defined(_LITTLE_ENDIAN)
53afd1ac7bSwesolows 
54afd1ac7bSwesolows /*
55afd1ac7bSwesolows  * Little-endian optimization:  I don't need to do any weirdness.   On
56afd1ac7bSwesolows  * some little-endian boxen, I'll have to do alignment checks, but I can do
57afd1ac7bSwesolows  * that below.
58afd1ac7bSwesolows  */
59afd1ac7bSwesolows 
60afd1ac7bSwesolows #if !defined(__i386) && !defined(__amd64)
61afd1ac7bSwesolows /*
62afd1ac7bSwesolows  * i386 and amd64 don't require aligned 4-byte loads.  The symbol
63afd1ac7bSwesolows  * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
64afd1ac7bSwesolows  * requires alignment checking.
65afd1ac7bSwesolows  */
66afd1ac7bSwesolows #define	_MD5_CHECK_ALIGNMENT
67afd1ac7bSwesolows #endif /* !__i386 && !__amd64 */
68afd1ac7bSwesolows 
69*8de5c4f4SDan OpenSolaris Anderson #define	LOAD_LITTLE_32(addr)	(*(uint32_t *)(void *)(addr))
70afd1ac7bSwesolows 
71afd1ac7bSwesolows #else	/* !_LITTLE_ENDIAN */
72afd1ac7bSwesolows 
73afd1ac7bSwesolows /*
74afd1ac7bSwesolows  * sparc v9/v8plus optimization:
75afd1ac7bSwesolows  *
76afd1ac7bSwesolows  * on the sparc v9/v8plus, we can load data little endian.  however, since
77afd1ac7bSwesolows  * the compiler doesn't have direct support for little endian, we
78afd1ac7bSwesolows  * link to an assembly-language routine `load_little_32' to do
79afd1ac7bSwesolows  * the magic.  note that special care must be taken to ensure the
80afd1ac7bSwesolows  * address is 32-bit aligned -- in the interest of speed, we don't
81afd1ac7bSwesolows  * check to make sure, since careful programming can guarantee this
82afd1ac7bSwesolows  * for us.
83afd1ac7bSwesolows  */
84afd1ac7bSwesolows #if defined(sun4u)
85afd1ac7bSwesolows 
86afd1ac7bSwesolows /* Define alignment check because we can 4-byte load as little endian. */
87afd1ac7bSwesolows #define	_MD5_CHECK_ALIGNMENT
88*8de5c4f4SDan OpenSolaris Anderson #define	LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(void *)(addr))
89afd1ac7bSwesolows 
90afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__)
91afd1ac7bSwesolows 
92afd1ac7bSwesolows static __inline__ uint32_t
93afd1ac7bSwesolows load_little_32(uint32_t *addr)
94afd1ac7bSwesolows {
95afd1ac7bSwesolows 	uint32_t value;
96afd1ac7bSwesolows 
97afd1ac7bSwesolows 	__asm__(
98afd1ac7bSwesolows 	    "lduwa	[%1] %2, %0\n\t"
99afd1ac7bSwesolows 	    : "=r" (value)
100afd1ac7bSwesolows 	    : "r" (addr), "i" (ASI_PL));
101afd1ac7bSwesolows 
102afd1ac7bSwesolows 	return (value);
103afd1ac7bSwesolows }
104afd1ac7bSwesolows #endif	/* !__lint && __GNUC__ */
105afd1ac7bSwesolows 
106afd1ac7bSwesolows #if !defined(__GNUC__)
107afd1ac7bSwesolows extern	uint32_t load_little_32(uint32_t *);
108afd1ac7bSwesolows #endif	/* !__GNUC__ */
109afd1ac7bSwesolows 
110734b6a94Sdarrenm /* Placate lint */
111734b6a94Sdarrenm #if defined(__lint)
112734b6a94Sdarrenm uint32_t
113734b6a94Sdarrenm load_little_32(uint32_t *addr)
114734b6a94Sdarrenm {
115734b6a94Sdarrenm 	return (*addr);
116734b6a94Sdarrenm }
117734b6a94Sdarrenm #endif	/* __lint */
118734b6a94Sdarrenm 
1194b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN)
1204b56a003SDaniel Anderson #define	LOAD_LITTLE_32(addr)	htonl(addr)
121734b6a94Sdarrenm 
1224b56a003SDaniel Anderson #else
123734b6a94Sdarrenm /* big endian -- will work on little endian, but slowly */
124734b6a94Sdarrenm /* Since we do byte operations, we don't have to check for alignment. */
125734b6a94Sdarrenm #define	LOAD_LITTLE_32(addr)	\
126734b6a94Sdarrenm 	((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
127734b6a94Sdarrenm #endif	/* sun4u */
128734b6a94Sdarrenm 
129afd1ac7bSwesolows #if defined(sun4v)
130afd1ac7bSwesolows 
131afd1ac7bSwesolows /*
132afd1ac7bSwesolows  * For N1 want to minimize number of arithmetic operations. This is best
133afd1ac7bSwesolows  * achieved by using the %asi register to specify ASI for the lduwa operations.
134afd1ac7bSwesolows  * Also, have a separate inline template for each word, so can utilize the
135afd1ac7bSwesolows  * immediate offset in lduwa, without relying on the compiler to do the right
136afd1ac7bSwesolows  * thing.
137afd1ac7bSwesolows  *
138afd1ac7bSwesolows  * Moving to 64-bit loads might also be beneficial.
139afd1ac7bSwesolows  */
140afd1ac7bSwesolows #define	LOAD_LITTLE_32_0(addr)	load_little_32_0((uint32_t *)(addr))
141afd1ac7bSwesolows #define	LOAD_LITTLE_32_1(addr)	load_little_32_1((uint32_t *)(addr))
142afd1ac7bSwesolows #define	LOAD_LITTLE_32_2(addr)	load_little_32_2((uint32_t *)(addr))
143afd1ac7bSwesolows #define	LOAD_LITTLE_32_3(addr)	load_little_32_3((uint32_t *)(addr))
144afd1ac7bSwesolows #define	LOAD_LITTLE_32_4(addr)	load_little_32_4((uint32_t *)(addr))
145afd1ac7bSwesolows #define	LOAD_LITTLE_32_5(addr)	load_little_32_5((uint32_t *)(addr))
146afd1ac7bSwesolows #define	LOAD_LITTLE_32_6(addr)	load_little_32_6((uint32_t *)(addr))
147afd1ac7bSwesolows #define	LOAD_LITTLE_32_7(addr)	load_little_32_7((uint32_t *)(addr))
148afd1ac7bSwesolows #define	LOAD_LITTLE_32_8(addr)	load_little_32_8((uint32_t *)(addr))
149afd1ac7bSwesolows #define	LOAD_LITTLE_32_9(addr)	load_little_32_9((uint32_t *)(addr))
150afd1ac7bSwesolows #define	LOAD_LITTLE_32_a(addr)	load_little_32_a((uint32_t *)(addr))
151afd1ac7bSwesolows #define	LOAD_LITTLE_32_b(addr)	load_little_32_b((uint32_t *)(addr))
152afd1ac7bSwesolows #define	LOAD_LITTLE_32_c(addr)	load_little_32_c((uint32_t *)(addr))
153afd1ac7bSwesolows #define	LOAD_LITTLE_32_d(addr)	load_little_32_d((uint32_t *)(addr))
154afd1ac7bSwesolows #define	LOAD_LITTLE_32_e(addr)	load_little_32_e((uint32_t *)(addr))
155afd1ac7bSwesolows #define	LOAD_LITTLE_32_f(addr)	load_little_32_f((uint32_t *)(addr))
156afd1ac7bSwesolows 
157afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__)
158afd1ac7bSwesolows 
159afd1ac7bSwesolows /*
160afd1ac7bSwesolows  * This actually sets the ASI register, not necessarily to ASI_PL.
161afd1ac7bSwesolows  */
162afd1ac7bSwesolows static __inline__ void
163afd1ac7bSwesolows set_little(uint8_t asi)
164afd1ac7bSwesolows {
165afd1ac7bSwesolows 	__asm__ __volatile__(
166afd1ac7bSwesolows 	    "wr	%%g0, %0, %%asi\n\t"
167afd1ac7bSwesolows 	    : /* Nothing */
168afd1ac7bSwesolows 	    : "r" (asi));
169afd1ac7bSwesolows }
170afd1ac7bSwesolows 
171afd1ac7bSwesolows static __inline__ uint8_t
172afd1ac7bSwesolows get_little(void)
173afd1ac7bSwesolows {
174afd1ac7bSwesolows 	uint8_t asi;
175afd1ac7bSwesolows 
176afd1ac7bSwesolows 	__asm__ __volatile__(
177afd1ac7bSwesolows 	    "rd	%%asi, %0\n\t"
178afd1ac7bSwesolows 	    : "=r" (asi));
179afd1ac7bSwesolows 
180afd1ac7bSwesolows 	return (asi);
181afd1ac7bSwesolows }
182afd1ac7bSwesolows 
183afd1ac7bSwesolows /*
184afd1ac7bSwesolows  * We have 16 functions which differ only in the offset from which they
185afd1ac7bSwesolows  * load.  Use this preprocessor template to simplify maintenance.  Its
186afd1ac7bSwesolows  * argument is the offset in hex, without the 0x.
187afd1ac7bSwesolows  */
188afd1ac7bSwesolows #define	LL_TEMPLATE(__off)			\
189afd1ac7bSwesolows static __inline__ uint32_t			\
190afd1ac7bSwesolows load_little_32_##__off(uint32_t *addr)		\
191afd1ac7bSwesolows {						\
192afd1ac7bSwesolows 	uint32_t value;				\
193afd1ac7bSwesolows 	__asm__(				\
194afd1ac7bSwesolows 		"lduwa	[%1 + %2]%%asi, %0\n\t"	\
195afd1ac7bSwesolows 	: "=r" (value)				\
196afd1ac7bSwesolows 	: "r" (addr), "i" ((0x##__off) << 2));	\
197afd1ac7bSwesolows 	return (value);				\
198afd1ac7bSwesolows }
199afd1ac7bSwesolows 
200afd1ac7bSwesolows LL_TEMPLATE(0)
201afd1ac7bSwesolows LL_TEMPLATE(1)
202afd1ac7bSwesolows LL_TEMPLATE(2)
203afd1ac7bSwesolows LL_TEMPLATE(3)
204afd1ac7bSwesolows LL_TEMPLATE(4)
205afd1ac7bSwesolows LL_TEMPLATE(5)
206afd1ac7bSwesolows LL_TEMPLATE(6)
207afd1ac7bSwesolows LL_TEMPLATE(7)
208afd1ac7bSwesolows LL_TEMPLATE(8)
209afd1ac7bSwesolows LL_TEMPLATE(9)
210afd1ac7bSwesolows LL_TEMPLATE(a)
211afd1ac7bSwesolows LL_TEMPLATE(b)
212afd1ac7bSwesolows LL_TEMPLATE(c)
213afd1ac7bSwesolows LL_TEMPLATE(d)
214afd1ac7bSwesolows LL_TEMPLATE(e)
215afd1ac7bSwesolows LL_TEMPLATE(f)
216afd1ac7bSwesolows #undef	LL_TEMPLATE
217afd1ac7bSwesolows 
218afd1ac7bSwesolows #endif	/* !__lint && __GNUC__ */
219afd1ac7bSwesolows 
220afd1ac7bSwesolows #if !defined(__GNUC__)
221afd1ac7bSwesolows /*
222afd1ac7bSwesolows  * Using the %asi register to achieve little endian loads - register
223afd1ac7bSwesolows  * is set using a inline template.
224afd1ac7bSwesolows  *
225afd1ac7bSwesolows  * Saves a few arithmetic ops as can now use an immediate offset with the
226afd1ac7bSwesolows  * lduwa instructions.
227afd1ac7bSwesolows  */
228afd1ac7bSwesolows extern void set_little(uint32_t);
229afd1ac7bSwesolows extern uint32_t get_little(void);
230afd1ac7bSwesolows 
231afd1ac7bSwesolows extern	uint32_t load_little_32_0(uint32_t *);
232afd1ac7bSwesolows extern	uint32_t load_little_32_1(uint32_t *);
233afd1ac7bSwesolows extern	uint32_t load_little_32_2(uint32_t *);
234afd1ac7bSwesolows extern	uint32_t load_little_32_3(uint32_t *);
235afd1ac7bSwesolows extern	uint32_t load_little_32_4(uint32_t *);
236afd1ac7bSwesolows extern	uint32_t load_little_32_5(uint32_t *);
237afd1ac7bSwesolows extern	uint32_t load_little_32_6(uint32_t *);
238afd1ac7bSwesolows extern	uint32_t load_little_32_7(uint32_t *);
239afd1ac7bSwesolows extern	uint32_t load_little_32_8(uint32_t *);
240afd1ac7bSwesolows extern	uint32_t load_little_32_9(uint32_t *);
241afd1ac7bSwesolows extern	uint32_t load_little_32_a(uint32_t *);
242afd1ac7bSwesolows extern	uint32_t load_little_32_b(uint32_t *);
243afd1ac7bSwesolows extern	uint32_t load_little_32_c(uint32_t *);
244afd1ac7bSwesolows extern	uint32_t load_little_32_d(uint32_t *);
245afd1ac7bSwesolows extern	uint32_t load_little_32_e(uint32_t *);
246afd1ac7bSwesolows extern	uint32_t load_little_32_f(uint32_t *);
247afd1ac7bSwesolows #endif	/* !__GNUC__ */
248afd1ac7bSwesolows #endif	/* sun4v */
249afd1ac7bSwesolows 
250afd1ac7bSwesolows #endif	/* _LITTLE_ENDIAN */
251afd1ac7bSwesolows 
252afd1ac7bSwesolows #ifdef	__cplusplus
253afd1ac7bSwesolows }
254afd1ac7bSwesolows #endif
255afd1ac7bSwesolows 
256afd1ac7bSwesolows #endif	/* !_MD5_BYTESWAP_H */
257