1afd1ac7bSwesolows /* 2afd1ac7bSwesolows * CDDL HEADER START 3afd1ac7bSwesolows * 4afd1ac7bSwesolows * The contents of this file are subject to the terms of the 5afd1ac7bSwesolows * Common Development and Distribution License (the "License"). 6afd1ac7bSwesolows * You may not use this file except in compliance with the License. 7afd1ac7bSwesolows * 8afd1ac7bSwesolows * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9afd1ac7bSwesolows * or http://www.opensolaris.org/os/licensing. 10afd1ac7bSwesolows * See the License for the specific language governing permissions 11afd1ac7bSwesolows * and limitations under the License. 12afd1ac7bSwesolows * 13afd1ac7bSwesolows * When distributing Covered Code, include this CDDL HEADER in each 14afd1ac7bSwesolows * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15afd1ac7bSwesolows * If applicable, add the following below this CDDL HEADER, with the 16afd1ac7bSwesolows * fields enclosed by brackets "[]" replaced with your own identifying 17afd1ac7bSwesolows * information: Portions Copyright [yyyy] [name of copyright owner] 18afd1ac7bSwesolows * 19afd1ac7bSwesolows * CDDL HEADER END 20afd1ac7bSwesolows */ 21afd1ac7bSwesolows 22afd1ac7bSwesolows /* 23*8de5c4f4SDan OpenSolaris Anderson * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24afd1ac7bSwesolows * Use is subject to license terms. 25afd1ac7bSwesolows */ 26afd1ac7bSwesolows 27afd1ac7bSwesolows #ifndef _MD5_BYTESWAP_H 28afd1ac7bSwesolows #define _MD5_BYTESWAP_H 29afd1ac7bSwesolows 30afd1ac7bSwesolows /* 31afd1ac7bSwesolows * definitions for inline functions for little-endian loads. 32afd1ac7bSwesolows * 33afd1ac7bSwesolows * This file has special definitions for UltraSPARC architectures, 34afd1ac7bSwesolows * which have a special address space identifier for loading 32 and 16 bit 35afd1ac7bSwesolows * integers in little-endian byte order. 36afd1ac7bSwesolows * 37afd1ac7bSwesolows * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the 38afd1ac7bSwesolows * same thing and must be changed together. 39afd1ac7bSwesolows */ 40afd1ac7bSwesolows 414b56a003SDaniel Anderson #include <sys/types.h> 42afd1ac7bSwesolows #if defined(__sparc) 43afd1ac7bSwesolows #include <v9/sys/asi.h> 444b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN) 454b56a003SDaniel Anderson #include <sys/byteorder.h> 46afd1ac7bSwesolows #endif 47afd1ac7bSwesolows 48afd1ac7bSwesolows #ifdef __cplusplus 49afd1ac7bSwesolows extern "C" { 50afd1ac7bSwesolows #endif 51afd1ac7bSwesolows 52afd1ac7bSwesolows #if defined(_LITTLE_ENDIAN) 53afd1ac7bSwesolows 54afd1ac7bSwesolows /* 55afd1ac7bSwesolows * Little-endian optimization: I don't need to do any weirdness. On 56afd1ac7bSwesolows * some little-endian boxen, I'll have to do alignment checks, but I can do 57afd1ac7bSwesolows * that below. 58afd1ac7bSwesolows */ 59afd1ac7bSwesolows 60afd1ac7bSwesolows #if !defined(__i386) && !defined(__amd64) 61afd1ac7bSwesolows /* 62afd1ac7bSwesolows * i386 and amd64 don't require aligned 4-byte loads. The symbol 63afd1ac7bSwesolows * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function 64afd1ac7bSwesolows * requires alignment checking. 65afd1ac7bSwesolows */ 66afd1ac7bSwesolows #define _MD5_CHECK_ALIGNMENT 67afd1ac7bSwesolows #endif /* !__i386 && !__amd64 */ 68afd1ac7bSwesolows 69*8de5c4f4SDan OpenSolaris Anderson #define LOAD_LITTLE_32(addr) (*(uint32_t *)(void *)(addr)) 70afd1ac7bSwesolows 71afd1ac7bSwesolows #else /* !_LITTLE_ENDIAN */ 72afd1ac7bSwesolows 73afd1ac7bSwesolows /* 74afd1ac7bSwesolows * sparc v9/v8plus optimization: 75afd1ac7bSwesolows * 76afd1ac7bSwesolows * on the sparc v9/v8plus, we can load data little endian. however, since 77afd1ac7bSwesolows * the compiler doesn't have direct support for little endian, we 78afd1ac7bSwesolows * link to an assembly-language routine `load_little_32' to do 79afd1ac7bSwesolows * the magic. note that special care must be taken to ensure the 80afd1ac7bSwesolows * address is 32-bit aligned -- in the interest of speed, we don't 81afd1ac7bSwesolows * check to make sure, since careful programming can guarantee this 82afd1ac7bSwesolows * for us. 83afd1ac7bSwesolows */ 84afd1ac7bSwesolows #if defined(sun4u) 85afd1ac7bSwesolows 86afd1ac7bSwesolows /* Define alignment check because we can 4-byte load as little endian. */ 87afd1ac7bSwesolows #define _MD5_CHECK_ALIGNMENT 88*8de5c4f4SDan OpenSolaris Anderson #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(void *)(addr)) 89afd1ac7bSwesolows 90afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__) 91afd1ac7bSwesolows 92afd1ac7bSwesolows static __inline__ uint32_t 93afd1ac7bSwesolows load_little_32(uint32_t *addr) 94afd1ac7bSwesolows { 95afd1ac7bSwesolows uint32_t value; 96afd1ac7bSwesolows 97afd1ac7bSwesolows __asm__( 98afd1ac7bSwesolows "lduwa [%1] %2, %0\n\t" 99afd1ac7bSwesolows : "=r" (value) 100afd1ac7bSwesolows : "r" (addr), "i" (ASI_PL)); 101afd1ac7bSwesolows 102afd1ac7bSwesolows return (value); 103afd1ac7bSwesolows } 104afd1ac7bSwesolows #endif /* !__lint && __GNUC__ */ 105afd1ac7bSwesolows 106afd1ac7bSwesolows #if !defined(__GNUC__) 107afd1ac7bSwesolows extern uint32_t load_little_32(uint32_t *); 108afd1ac7bSwesolows #endif /* !__GNUC__ */ 109afd1ac7bSwesolows 110734b6a94Sdarrenm /* Placate lint */ 111734b6a94Sdarrenm #if defined(__lint) 112734b6a94Sdarrenm uint32_t 113734b6a94Sdarrenm load_little_32(uint32_t *addr) 114734b6a94Sdarrenm { 115734b6a94Sdarrenm return (*addr); 116734b6a94Sdarrenm } 117734b6a94Sdarrenm #endif /* __lint */ 118734b6a94Sdarrenm 1194b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN) 1204b56a003SDaniel Anderson #define LOAD_LITTLE_32(addr) htonl(addr) 121734b6a94Sdarrenm 1224b56a003SDaniel Anderson #else 123734b6a94Sdarrenm /* big endian -- will work on little endian, but slowly */ 124734b6a94Sdarrenm /* Since we do byte operations, we don't have to check for alignment. */ 125734b6a94Sdarrenm #define LOAD_LITTLE_32(addr) \ 126734b6a94Sdarrenm ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24)) 127734b6a94Sdarrenm #endif /* sun4u */ 128734b6a94Sdarrenm 129afd1ac7bSwesolows #if defined(sun4v) 130afd1ac7bSwesolows 131afd1ac7bSwesolows /* 132afd1ac7bSwesolows * For N1 want to minimize number of arithmetic operations. This is best 133afd1ac7bSwesolows * achieved by using the %asi register to specify ASI for the lduwa operations. 134afd1ac7bSwesolows * Also, have a separate inline template for each word, so can utilize the 135afd1ac7bSwesolows * immediate offset in lduwa, without relying on the compiler to do the right 136afd1ac7bSwesolows * thing. 137afd1ac7bSwesolows * 138afd1ac7bSwesolows * Moving to 64-bit loads might also be beneficial. 139afd1ac7bSwesolows */ 140afd1ac7bSwesolows #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr)) 141afd1ac7bSwesolows #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr)) 142afd1ac7bSwesolows #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr)) 143afd1ac7bSwesolows #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr)) 144afd1ac7bSwesolows #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr)) 145afd1ac7bSwesolows #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr)) 146afd1ac7bSwesolows #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr)) 147afd1ac7bSwesolows #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr)) 148afd1ac7bSwesolows #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr)) 149afd1ac7bSwesolows #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr)) 150afd1ac7bSwesolows #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr)) 151afd1ac7bSwesolows #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr)) 152afd1ac7bSwesolows #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr)) 153afd1ac7bSwesolows #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr)) 154afd1ac7bSwesolows #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr)) 155afd1ac7bSwesolows #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr)) 156afd1ac7bSwesolows 157afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__) 158afd1ac7bSwesolows 159afd1ac7bSwesolows /* 160afd1ac7bSwesolows * This actually sets the ASI register, not necessarily to ASI_PL. 161afd1ac7bSwesolows */ 162afd1ac7bSwesolows static __inline__ void 163afd1ac7bSwesolows set_little(uint8_t asi) 164afd1ac7bSwesolows { 165afd1ac7bSwesolows __asm__ __volatile__( 166afd1ac7bSwesolows "wr %%g0, %0, %%asi\n\t" 167afd1ac7bSwesolows : /* Nothing */ 168afd1ac7bSwesolows : "r" (asi)); 169afd1ac7bSwesolows } 170afd1ac7bSwesolows 171afd1ac7bSwesolows static __inline__ uint8_t 172afd1ac7bSwesolows get_little(void) 173afd1ac7bSwesolows { 174afd1ac7bSwesolows uint8_t asi; 175afd1ac7bSwesolows 176afd1ac7bSwesolows __asm__ __volatile__( 177afd1ac7bSwesolows "rd %%asi, %0\n\t" 178afd1ac7bSwesolows : "=r" (asi)); 179afd1ac7bSwesolows 180afd1ac7bSwesolows return (asi); 181afd1ac7bSwesolows } 182afd1ac7bSwesolows 183afd1ac7bSwesolows /* 184afd1ac7bSwesolows * We have 16 functions which differ only in the offset from which they 185afd1ac7bSwesolows * load. Use this preprocessor template to simplify maintenance. Its 186afd1ac7bSwesolows * argument is the offset in hex, without the 0x. 187afd1ac7bSwesolows */ 188afd1ac7bSwesolows #define LL_TEMPLATE(__off) \ 189afd1ac7bSwesolows static __inline__ uint32_t \ 190afd1ac7bSwesolows load_little_32_##__off(uint32_t *addr) \ 191afd1ac7bSwesolows { \ 192afd1ac7bSwesolows uint32_t value; \ 193afd1ac7bSwesolows __asm__( \ 194afd1ac7bSwesolows "lduwa [%1 + %2]%%asi, %0\n\t" \ 195afd1ac7bSwesolows : "=r" (value) \ 196afd1ac7bSwesolows : "r" (addr), "i" ((0x##__off) << 2)); \ 197afd1ac7bSwesolows return (value); \ 198afd1ac7bSwesolows } 199afd1ac7bSwesolows 200afd1ac7bSwesolows LL_TEMPLATE(0) 201afd1ac7bSwesolows LL_TEMPLATE(1) 202afd1ac7bSwesolows LL_TEMPLATE(2) 203afd1ac7bSwesolows LL_TEMPLATE(3) 204afd1ac7bSwesolows LL_TEMPLATE(4) 205afd1ac7bSwesolows LL_TEMPLATE(5) 206afd1ac7bSwesolows LL_TEMPLATE(6) 207afd1ac7bSwesolows LL_TEMPLATE(7) 208afd1ac7bSwesolows LL_TEMPLATE(8) 209afd1ac7bSwesolows LL_TEMPLATE(9) 210afd1ac7bSwesolows LL_TEMPLATE(a) 211afd1ac7bSwesolows LL_TEMPLATE(b) 212afd1ac7bSwesolows LL_TEMPLATE(c) 213afd1ac7bSwesolows LL_TEMPLATE(d) 214afd1ac7bSwesolows LL_TEMPLATE(e) 215afd1ac7bSwesolows LL_TEMPLATE(f) 216afd1ac7bSwesolows #undef LL_TEMPLATE 217afd1ac7bSwesolows 218afd1ac7bSwesolows #endif /* !__lint && __GNUC__ */ 219afd1ac7bSwesolows 220afd1ac7bSwesolows #if !defined(__GNUC__) 221afd1ac7bSwesolows /* 222afd1ac7bSwesolows * Using the %asi register to achieve little endian loads - register 223afd1ac7bSwesolows * is set using a inline template. 224afd1ac7bSwesolows * 225afd1ac7bSwesolows * Saves a few arithmetic ops as can now use an immediate offset with the 226afd1ac7bSwesolows * lduwa instructions. 227afd1ac7bSwesolows */ 228afd1ac7bSwesolows extern void set_little(uint32_t); 229afd1ac7bSwesolows extern uint32_t get_little(void); 230afd1ac7bSwesolows 231afd1ac7bSwesolows extern uint32_t load_little_32_0(uint32_t *); 232afd1ac7bSwesolows extern uint32_t load_little_32_1(uint32_t *); 233afd1ac7bSwesolows extern uint32_t load_little_32_2(uint32_t *); 234afd1ac7bSwesolows extern uint32_t load_little_32_3(uint32_t *); 235afd1ac7bSwesolows extern uint32_t load_little_32_4(uint32_t *); 236afd1ac7bSwesolows extern uint32_t load_little_32_5(uint32_t *); 237afd1ac7bSwesolows extern uint32_t load_little_32_6(uint32_t *); 238afd1ac7bSwesolows extern uint32_t load_little_32_7(uint32_t *); 239afd1ac7bSwesolows extern uint32_t load_little_32_8(uint32_t *); 240afd1ac7bSwesolows extern uint32_t load_little_32_9(uint32_t *); 241afd1ac7bSwesolows extern uint32_t load_little_32_a(uint32_t *); 242afd1ac7bSwesolows extern uint32_t load_little_32_b(uint32_t *); 243afd1ac7bSwesolows extern uint32_t load_little_32_c(uint32_t *); 244afd1ac7bSwesolows extern uint32_t load_little_32_d(uint32_t *); 245afd1ac7bSwesolows extern uint32_t load_little_32_e(uint32_t *); 246afd1ac7bSwesolows extern uint32_t load_little_32_f(uint32_t *); 247afd1ac7bSwesolows #endif /* !__GNUC__ */ 248afd1ac7bSwesolows #endif /* sun4v */ 249afd1ac7bSwesolows 250afd1ac7bSwesolows #endif /* _LITTLE_ENDIAN */ 251afd1ac7bSwesolows 252afd1ac7bSwesolows #ifdef __cplusplus 253afd1ac7bSwesolows } 254afd1ac7bSwesolows #endif 255afd1ac7bSwesolows 256afd1ac7bSwesolows #endif /* !_MD5_BYTESWAP_H */ 257