1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _MD5_BYTESWAP_H 28 #define _MD5_BYTESWAP_H 29 30 /* 31 * definitions for inline functions for little-endian loads. 32 * 33 * This file has special definitions for UltraSPARC architectures, 34 * which have a special address space identifier for loading 32 and 16 bit 35 * integers in little-endian byte order. 36 */ 37 38 #include <sys/types.h> 39 #if defined(__sparc) 40 #include <v9/sys/asi.h> 41 #elif defined(_LITTLE_ENDIAN) 42 #include <sys/byteorder.h> 43 #endif 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 #if defined(_LITTLE_ENDIAN) 50 51 /* 52 * Little-endian optimization: I don't need to do any weirdness. On 53 * some little-endian boxen, I'll have to do alignment checks, but I can do 54 * that below. 55 */ 56 57 #if !defined(__i386) && !defined(__amd64) 58 /* 59 * i386 and amd64 don't require aligned 4-byte loads. The symbol 60 * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function 61 * requires alignment checking. 62 */ 63 #define _MD5_CHECK_ALIGNMENT 64 #endif /* !__i386 && !__amd64 */ 65 66 #define LOAD_LITTLE_32(addr) (*(uint32_t *)(void *)(addr)) 67 68 #else /* !_LITTLE_ENDIAN */ 69 70 /* 71 * sparc v9/v8plus optimization: 72 * 73 * on the sparc v9/v8plus, we can load data little endian. however, since 74 * the compiler doesn't have direct support for little endian, we 75 * link to an assembly-language routine `load_little_32' to do 76 * the magic. note that special care must be taken to ensure the 77 * address is 32-bit aligned -- in the interest of speed, we don't 78 * check to make sure, since careful programming can guarantee this 79 * for us. 80 */ 81 #if defined(sun4u) 82 83 /* Define alignment check because we can 4-byte load as little endian. */ 84 #define _MD5_CHECK_ALIGNMENT 85 #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(void *)(addr)) 86 87 #if !defined(__lint) && defined(__GNUC__) 88 89 static __inline__ uint32_t 90 load_little_32(uint32_t *addr) 91 { 92 uint32_t value; 93 94 __asm__( 95 "lduwa [%1] %2, %0\n\t" 96 : "=r" (value) 97 : "r" (addr), "i" (ASI_PL)); 98 99 return (value); 100 } 101 #endif /* !__lint && __GNUC__ */ 102 103 #if !defined(__GNUC__) 104 extern uint32_t load_little_32(uint32_t *); 105 #endif /* !__GNUC__ */ 106 107 /* Placate lint */ 108 #if defined(__lint) 109 uint32_t 110 load_little_32(uint32_t *addr) 111 { 112 return (*addr); 113 } 114 #endif /* __lint */ 115 116 #elif defined(_LITTLE_ENDIAN) 117 #define LOAD_LITTLE_32(addr) htonl(addr) 118 119 #else 120 /* big endian -- will work on little endian, but slowly */ 121 /* Since we do byte operations, we don't have to check for alignment. */ 122 #define LOAD_LITTLE_32(addr) \ 123 ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24)) 124 #endif /* sun4u */ 125 126 #if defined(sun4v) 127 128 /* 129 * For N1 want to minimize number of arithmetic operations. This is best 130 * achieved by using the %asi register to specify ASI for the lduwa operations. 131 * Also, have a separate inline template for each word, so can utilize the 132 * immediate offset in lduwa, without relying on the compiler to do the right 133 * thing. 134 * 135 * Moving to 64-bit loads might also be beneficial. 136 */ 137 #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr)) 138 #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr)) 139 #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr)) 140 #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr)) 141 #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr)) 142 #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr)) 143 #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr)) 144 #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr)) 145 #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr)) 146 #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr)) 147 #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr)) 148 #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr)) 149 #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr)) 150 #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr)) 151 #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr)) 152 #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr)) 153 154 #if !defined(__lint) && defined(__GNUC__) 155 156 /* 157 * This actually sets the ASI register, not necessarily to ASI_PL. 158 */ 159 static __inline__ void 160 set_little(uint8_t asi) 161 { 162 __asm__ __volatile__( 163 "wr %%g0, %0, %%asi\n\t" 164 : /* Nothing */ 165 : "r" (asi)); 166 } 167 168 static __inline__ uint8_t 169 get_little(void) 170 { 171 uint8_t asi; 172 173 __asm__ __volatile__( 174 "rd %%asi, %0\n\t" 175 : "=r" (asi)); 176 177 return (asi); 178 } 179 180 /* 181 * We have 16 functions which differ only in the offset from which they 182 * load. Use this preprocessor template to simplify maintenance. Its 183 * argument is the offset in hex, without the 0x. 184 */ 185 #define LL_TEMPLATE(__off) \ 186 static __inline__ uint32_t \ 187 load_little_32_##__off(uint32_t *addr) \ 188 { \ 189 uint32_t value; \ 190 __asm__( \ 191 "lduwa [%1 + %2]%%asi, %0\n\t" \ 192 : "=r" (value) \ 193 : "r" (addr), "i" ((0x##__off) << 2)); \ 194 return (value); \ 195 } 196 197 /* BEGIN CSTYLED */ 198 LL_TEMPLATE(0) 199 LL_TEMPLATE(1) 200 LL_TEMPLATE(2) 201 LL_TEMPLATE(3) 202 LL_TEMPLATE(4) 203 LL_TEMPLATE(5) 204 LL_TEMPLATE(6) 205 LL_TEMPLATE(7) 206 LL_TEMPLATE(8) 207 LL_TEMPLATE(9) 208 LL_TEMPLATE(a) 209 LL_TEMPLATE(b) 210 LL_TEMPLATE(c) 211 LL_TEMPLATE(d) 212 LL_TEMPLATE(e) 213 LL_TEMPLATE(f) 214 /* END CSTYLED */ 215 #undef LL_TEMPLATE 216 217 #endif /* !__lint && __GNUC__ */ 218 219 #if !defined(__GNUC__) 220 /* 221 * Using the %asi register to achieve little endian loads - register 222 * is set using a inline template. 223 * 224 * Saves a few arithmetic ops as can now use an immediate offset with the 225 * lduwa instructions. 226 */ 227 extern void set_little(uint32_t); 228 extern uint32_t get_little(void); 229 230 extern uint32_t load_little_32_0(uint32_t *); 231 extern uint32_t load_little_32_1(uint32_t *); 232 extern uint32_t load_little_32_2(uint32_t *); 233 extern uint32_t load_little_32_3(uint32_t *); 234 extern uint32_t load_little_32_4(uint32_t *); 235 extern uint32_t load_little_32_5(uint32_t *); 236 extern uint32_t load_little_32_6(uint32_t *); 237 extern uint32_t load_little_32_7(uint32_t *); 238 extern uint32_t load_little_32_8(uint32_t *); 239 extern uint32_t load_little_32_9(uint32_t *); 240 extern uint32_t load_little_32_a(uint32_t *); 241 extern uint32_t load_little_32_b(uint32_t *); 242 extern uint32_t load_little_32_c(uint32_t *); 243 extern uint32_t load_little_32_d(uint32_t *); 244 extern uint32_t load_little_32_e(uint32_t *); 245 extern uint32_t load_little_32_f(uint32_t *); 246 #endif /* !__GNUC__ */ 247 #endif /* sun4v */ 248 249 #endif /* _LITTLE_ENDIAN */ 250 251 #ifdef __cplusplus 252 } 253 #endif 254 255 #endif /* !_MD5_BYTESWAP_H */ 256