1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _MD5_BYTESWAP_H 28 #define _MD5_BYTESWAP_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * definitions for inline functions for little-endian loads. 34 * 35 * This file has special definitions for UltraSPARC architectures, 36 * which have a special address space identifier for loading 32 and 16 bit 37 * integers in little-endian byte order. 38 * 39 * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the 40 * same thing and must be changed together. 41 */ 42 43 #if defined(__sparc) 44 #include <v9/sys/asi.h> 45 #endif 46 47 #ifdef __cplusplus 48 extern "C" { 49 #endif 50 51 #if defined(_LITTLE_ENDIAN) 52 53 /* 54 * Little-endian optimization: I don't need to do any weirdness. On 55 * some little-endian boxen, I'll have to do alignment checks, but I can do 56 * that below. 57 */ 58 59 #if !defined(__i386) && !defined(__amd64) 60 /* 61 * i386 and amd64 don't require aligned 4-byte loads. The symbol 62 * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function 63 * requires alignment checking. 64 */ 65 #define _MD5_CHECK_ALIGNMENT 66 #endif /* !__i386 && !__amd64 */ 67 68 #define LOAD_LITTLE_32(addr) (*(uint32_t *)(addr)) 69 70 #else /* !_LITTLE_ENDIAN */ 71 72 /* 73 * sparc v9/v8plus optimization: 74 * 75 * on the sparc v9/v8plus, we can load data little endian. however, since 76 * the compiler doesn't have direct support for little endian, we 77 * link to an assembly-language routine `load_little_32' to do 78 * the magic. note that special care must be taken to ensure the 79 * address is 32-bit aligned -- in the interest of speed, we don't 80 * check to make sure, since careful programming can guarantee this 81 * for us. 82 */ 83 #if defined(sun4u) 84 85 /* Define alignment check because we can 4-byte load as little endian. */ 86 #define _MD5_CHECK_ALIGNMENT 87 #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(addr)) 88 89 #if !defined(__lint) && defined(__GNUC__) 90 91 static __inline__ uint32_t 92 load_little_32(uint32_t *addr) 93 { 94 uint32_t value; 95 96 __asm__( 97 "lduwa [%1] %2, %0\n\t" 98 : "=r" (value) 99 : "r" (addr), "i" (ASI_PL)); 100 101 return (value); 102 } 103 104 static __inline__ uint16_t 105 load_little_16(uint16_t *addr) 106 { 107 uint16_t value; 108 109 __asm__( 110 "lduha [%1] %2, %0\n\t" 111 : "=r" (value) 112 : "r" (addr), "i" (ASI_PL)); 113 114 return (value); 115 } 116 117 #endif /* !__lint && __GNUC__ */ 118 119 #if !defined(__GNUC__) 120 extern uint32_t load_little_32(uint32_t *); 121 #endif /* !__GNUC__ */ 122 123 /* Placate lint */ 124 #if defined(__lint) 125 uint32_t 126 load_little_32(uint32_t *addr) 127 { 128 return (*addr); 129 } 130 #endif /* __lint */ 131 132 #else /* !sun4u */ 133 134 /* big endian -- will work on little endian, but slowly */ 135 /* Since we do byte operations, we don't have to check for alignment. */ 136 #define LOAD_LITTLE_32(addr) \ 137 ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24)) 138 139 #endif /* sun4u */ 140 141 #if defined(sun4v) 142 143 /* 144 * For N1 want to minimize number of arithmetic operations. This is best 145 * achieved by using the %asi register to specify ASI for the lduwa operations. 146 * Also, have a separate inline template for each word, so can utilize the 147 * immediate offset in lduwa, without relying on the compiler to do the right 148 * thing. 149 * 150 * Moving to 64-bit loads might also be beneficial. 151 */ 152 #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr)) 153 #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr)) 154 #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr)) 155 #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr)) 156 #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr)) 157 #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr)) 158 #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr)) 159 #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr)) 160 #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr)) 161 #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr)) 162 #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr)) 163 #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr)) 164 #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr)) 165 #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr)) 166 #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr)) 167 #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr)) 168 169 #if !defined(__lint) && defined(__GNUC__) 170 171 /* 172 * This actually sets the ASI register, not necessarily to ASI_PL. 173 */ 174 static __inline__ void 175 set_little(uint8_t asi) 176 { 177 __asm__ __volatile__( 178 "wr %%g0, %0, %%asi\n\t" 179 : /* Nothing */ 180 : "r" (asi)); 181 } 182 183 static __inline__ uint8_t 184 get_little(void) 185 { 186 uint8_t asi; 187 188 __asm__ __volatile__( 189 "rd %%asi, %0\n\t" 190 : "=r" (asi)); 191 192 return (asi); 193 } 194 195 /* 196 * We have 16 functions which differ only in the offset from which they 197 * load. Use this preprocessor template to simplify maintenance. Its 198 * argument is the offset in hex, without the 0x. 199 */ 200 #define LL_TEMPLATE(__off) \ 201 static __inline__ uint32_t \ 202 load_little_32_##__off(uint32_t *addr) \ 203 { \ 204 uint32_t value; \ 205 __asm__( \ 206 "lduwa [%1 + %2]%%asi, %0\n\t" \ 207 : "=r" (value) \ 208 : "r" (addr), "i" ((0x##__off) << 2)); \ 209 return (value); \ 210 } 211 212 LL_TEMPLATE(0) 213 LL_TEMPLATE(1) 214 LL_TEMPLATE(2) 215 LL_TEMPLATE(3) 216 LL_TEMPLATE(4) 217 LL_TEMPLATE(5) 218 LL_TEMPLATE(6) 219 LL_TEMPLATE(7) 220 LL_TEMPLATE(8) 221 LL_TEMPLATE(9) 222 LL_TEMPLATE(a) 223 LL_TEMPLATE(b) 224 LL_TEMPLATE(c) 225 LL_TEMPLATE(d) 226 LL_TEMPLATE(e) 227 LL_TEMPLATE(f) 228 #undef LL_TEMPLATE 229 230 #endif /* !__lint && __GNUC__ */ 231 232 #if !defined(__GNUC__) 233 /* 234 * Using the %asi register to achieve little endian loads - register 235 * is set using a inline template. 236 * 237 * Saves a few arithmetic ops as can now use an immediate offset with the 238 * lduwa instructions. 239 */ 240 extern void set_little(uint32_t); 241 extern uint32_t get_little(void); 242 243 extern uint32_t load_little_32_0(uint32_t *); 244 extern uint32_t load_little_32_1(uint32_t *); 245 extern uint32_t load_little_32_2(uint32_t *); 246 extern uint32_t load_little_32_3(uint32_t *); 247 extern uint32_t load_little_32_4(uint32_t *); 248 extern uint32_t load_little_32_5(uint32_t *); 249 extern uint32_t load_little_32_6(uint32_t *); 250 extern uint32_t load_little_32_7(uint32_t *); 251 extern uint32_t load_little_32_8(uint32_t *); 252 extern uint32_t load_little_32_9(uint32_t *); 253 extern uint32_t load_little_32_a(uint32_t *); 254 extern uint32_t load_little_32_b(uint32_t *); 255 extern uint32_t load_little_32_c(uint32_t *); 256 extern uint32_t load_little_32_d(uint32_t *); 257 extern uint32_t load_little_32_e(uint32_t *); 258 extern uint32_t load_little_32_f(uint32_t *); 259 #endif /* !__GNUC__ */ 260 #endif /* sun4v */ 261 262 #endif /* _LITTLE_ENDIAN */ 263 264 #ifdef __cplusplus 265 } 266 #endif 267 268 #endif /* !_MD5_BYTESWAP_H */ 269