1#! /usr/bin/env perl 2# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# Needs more work: key setup, CBC routine... 18# 19# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 20# 128-bit key, which is ~40% better than 64-bit code generated by gcc 21# 4.0. But these are not the ones currently used! Their "compact" 22# counterparts are, for security reason. ppc_AES_encrypt_compact runs 23# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 24# at 1/3 of ppc_AES_decrypt. 25 26# February 2010 27# 28# Rescheduling instructions to favour Power6 pipeline gave 10% 29# performance improvement on the platform in question (and marginal 30# improvement even on others). It should be noted that Power6 fails 31# to process byte in 18 cycles, only in 23, because it fails to issue 32# 4 load instructions in two cycles, only in 3. As result non-compact 33# block subroutines are 25% slower than one would expect. Compact 34# functions scale better, because they have pure computational part, 35# which scales perfectly with clock frequency. To be specific 36# ppc_AES_encrypt_compact operates at 42 cycles per byte, while 37# ppc_AES_decrypt_compact - at 55 (in 64-bit build). 38 39$flavour = shift; 40 41if ($flavour =~ /64/) { 42 $SIZE_T =8; 43 $LRSAVE =2*$SIZE_T; 44 $STU ="stdu"; 45 $POP ="ld"; 46 $PUSH ="std"; 47} elsif ($flavour =~ /32/) { 48 $SIZE_T =4; 49 $LRSAVE =$SIZE_T; 50 $STU ="stwu"; 51 $POP ="lwz"; 52 $PUSH ="stw"; 53} else { die "nonsense $flavour"; } 54 55$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 56 57$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 58( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 59( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 60die "can't locate ppc-xlate.pl"; 61 62open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 63 64$FRAME=32*$SIZE_T; 65 66sub _data_word() 67{ my $i; 68 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 69} 70 71$sp="r1"; 72$toc="r2"; 73$inp="r3"; 74$out="r4"; 75$key="r5"; 76 77$Tbl0="r3"; 78$Tbl1="r6"; 79$Tbl2="r7"; 80$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack 81 82$s0="r8"; 83$s1="r9"; 84$s2="r10"; 85$s3="r11"; 86 87$t0="r12"; 88$t1="r0"; # stay away from "r13"; 89$t2="r14"; 90$t3="r15"; 91 92$acc00="r16"; 93$acc01="r17"; 94$acc02="r18"; 95$acc03="r19"; 96 97$acc04="r20"; 98$acc05="r21"; 99$acc06="r22"; 100$acc07="r23"; 101 102$acc08="r24"; 103$acc09="r25"; 104$acc10="r26"; 105$acc11="r27"; 106 107$acc12="r28"; 108$acc13="r29"; 109$acc14="r30"; 110$acc15="r31"; 111 112$mask80=$Tbl2; 113$mask1b=$Tbl3; 114 115$code.=<<___; 116.machine "any" 117.text 118 119.align 7 120LAES_Te: 121 mflr r0 122 bcl 20,31,\$+4 123 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry 124 addi $Tbl0,$Tbl0,`128-8` 125 mtlr r0 126 blr 127 .long 0 128 .byte 0,12,0x14,0,0,0,0,0 129 .space `64-9*4` 130LAES_Td: 131 mflr r0 132 bcl 20,31,\$+4 133 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 134 addi $Tbl0,$Tbl0,`128-64-8+2048+256` 135 mtlr r0 136 blr 137 .long 0 138 .byte 0,12,0x14,0,0,0,0,0 139 .space `128-64-9*4` 140___ 141&_data_word( 142 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 143 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 144 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 145 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 146 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 147 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 148 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 149 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 150 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 151 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 152 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 153 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 154 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 155 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 156 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 157 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 158 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 159 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 160 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 161 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 162 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 163 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 164 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 165 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 166 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 167 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 168 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 169 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 170 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 171 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 172 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 173 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 174 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 175 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 176 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 177 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 178 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 179 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 180 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 181 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 182 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 183 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 184 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 185 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 186 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 187 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 188 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 189 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 190 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 191 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 192 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 193 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 194 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 195 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 196 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 197 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 198 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 199 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 200 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 201 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 202 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 203 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 204 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 205 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 206$code.=<<___; 207.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 208.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 209.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 210.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 211.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 212.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 213.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 214.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 215.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 216.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 217.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 218.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 219.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 220.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 221.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 222.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 223.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 224.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 225.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 226.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 227.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 228.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 229.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 230.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 231.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 232.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 233.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 234.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 235.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 236.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 237.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 238.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 239___ 240&_data_word( 241 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 242 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 243 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 244 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 245 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 246 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 247 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 248 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 249 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 250 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 251 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 252 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 253 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 254 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 255 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 256 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 257 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 258 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 259 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 260 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 261 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 262 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 263 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 264 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 265 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 266 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 267 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 268 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 269 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 270 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 271 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 272 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 273 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 274 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 275 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 276 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 277 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 278 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 279 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 280 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 281 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 282 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 283 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 284 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 285 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 286 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 287 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 288 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 289 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 290 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 291 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 292 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 293 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 294 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 295 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 296 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 297 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 298 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 299 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 300 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 301 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 302 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 303 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 304 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 305$code.=<<___; 306.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 307.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 308.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 309.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 310.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 311.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 312.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 313.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 314.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 315.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 316.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 317.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 318.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 319.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 320.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 321.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 322.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 323.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 324.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 325.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 326.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 327.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 328.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 329.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 330.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 331.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 332.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 333.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 334.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 335.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 336.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 337.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 338 339 340.globl .AES_encrypt 341.align 7 342.AES_encrypt: 343 $STU $sp,-$FRAME($sp) 344 mflr r0 345 346 $PUSH $out,`$FRAME-$SIZE_T*19`($sp) 347 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 348 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 349 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 350 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 351 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 352 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 353 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 354 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 355 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 356 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 357 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 358 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 359 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 360 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 361 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 362 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 363 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 364 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 365 $PUSH r0,`$FRAME+$LRSAVE`($sp) 366 367 andi. $t0,$inp,3 368 andi. $t1,$out,3 369 or. $t0,$t0,$t1 370 bne Lenc_unaligned 371 372Lenc_unaligned_ok: 373___ 374$code.=<<___ if (!$LITTLE_ENDIAN); 375 lwz $s0,0($inp) 376 lwz $s1,4($inp) 377 lwz $s2,8($inp) 378 lwz $s3,12($inp) 379___ 380$code.=<<___ if ($LITTLE_ENDIAN); 381 lwz $t0,0($inp) 382 lwz $t1,4($inp) 383 lwz $t2,8($inp) 384 lwz $t3,12($inp) 385 rotlwi $s0,$t0,8 386 rotlwi $s1,$t1,8 387 rotlwi $s2,$t2,8 388 rotlwi $s3,$t3,8 389 rlwimi $s0,$t0,24,0,7 390 rlwimi $s1,$t1,24,0,7 391 rlwimi $s2,$t2,24,0,7 392 rlwimi $s3,$t3,24,0,7 393 rlwimi $s0,$t0,24,16,23 394 rlwimi $s1,$t1,24,16,23 395 rlwimi $s2,$t2,24,16,23 396 rlwimi $s3,$t3,24,16,23 397___ 398$code.=<<___; 399 bl LAES_Te 400 bl Lppc_AES_encrypt_compact 401 $POP $out,`$FRAME-$SIZE_T*19`($sp) 402___ 403$code.=<<___ if ($LITTLE_ENDIAN); 404 rotlwi $t0,$s0,8 405 rotlwi $t1,$s1,8 406 rotlwi $t2,$s2,8 407 rotlwi $t3,$s3,8 408 rlwimi $t0,$s0,24,0,7 409 rlwimi $t1,$s1,24,0,7 410 rlwimi $t2,$s2,24,0,7 411 rlwimi $t3,$s3,24,0,7 412 rlwimi $t0,$s0,24,16,23 413 rlwimi $t1,$s1,24,16,23 414 rlwimi $t2,$s2,24,16,23 415 rlwimi $t3,$s3,24,16,23 416 stw $t0,0($out) 417 stw $t1,4($out) 418 stw $t2,8($out) 419 stw $t3,12($out) 420___ 421$code.=<<___ if (!$LITTLE_ENDIAN); 422 stw $s0,0($out) 423 stw $s1,4($out) 424 stw $s2,8($out) 425 stw $s3,12($out) 426___ 427$code.=<<___; 428 b Lenc_done 429 430Lenc_unaligned: 431 subfic $t0,$inp,4096 432 subfic $t1,$out,4096 433 andi. $t0,$t0,4096-16 434 beq Lenc_xpage 435 andi. $t1,$t1,4096-16 436 bne Lenc_unaligned_ok 437 438Lenc_xpage: 439 lbz $acc00,0($inp) 440 lbz $acc01,1($inp) 441 lbz $acc02,2($inp) 442 lbz $s0,3($inp) 443 lbz $acc04,4($inp) 444 lbz $acc05,5($inp) 445 lbz $acc06,6($inp) 446 lbz $s1,7($inp) 447 lbz $acc08,8($inp) 448 lbz $acc09,9($inp) 449 lbz $acc10,10($inp) 450 insrwi $s0,$acc00,8,0 451 lbz $s2,11($inp) 452 insrwi $s1,$acc04,8,0 453 lbz $acc12,12($inp) 454 insrwi $s0,$acc01,8,8 455 lbz $acc13,13($inp) 456 insrwi $s1,$acc05,8,8 457 lbz $acc14,14($inp) 458 insrwi $s0,$acc02,8,16 459 lbz $s3,15($inp) 460 insrwi $s1,$acc06,8,16 461 insrwi $s2,$acc08,8,0 462 insrwi $s3,$acc12,8,0 463 insrwi $s2,$acc09,8,8 464 insrwi $s3,$acc13,8,8 465 insrwi $s2,$acc10,8,16 466 insrwi $s3,$acc14,8,16 467 468 bl LAES_Te 469 bl Lppc_AES_encrypt_compact 470 $POP $out,`$FRAME-$SIZE_T*19`($sp) 471 472 extrwi $acc00,$s0,8,0 473 extrwi $acc01,$s0,8,8 474 stb $acc00,0($out) 475 extrwi $acc02,$s0,8,16 476 stb $acc01,1($out) 477 stb $acc02,2($out) 478 extrwi $acc04,$s1,8,0 479 stb $s0,3($out) 480 extrwi $acc05,$s1,8,8 481 stb $acc04,4($out) 482 extrwi $acc06,$s1,8,16 483 stb $acc05,5($out) 484 stb $acc06,6($out) 485 extrwi $acc08,$s2,8,0 486 stb $s1,7($out) 487 extrwi $acc09,$s2,8,8 488 stb $acc08,8($out) 489 extrwi $acc10,$s2,8,16 490 stb $acc09,9($out) 491 stb $acc10,10($out) 492 extrwi $acc12,$s3,8,0 493 stb $s2,11($out) 494 extrwi $acc13,$s3,8,8 495 stb $acc12,12($out) 496 extrwi $acc14,$s3,8,16 497 stb $acc13,13($out) 498 stb $acc14,14($out) 499 stb $s3,15($out) 500 501Lenc_done: 502 $POP r0,`$FRAME+$LRSAVE`($sp) 503 $POP r14,`$FRAME-$SIZE_T*18`($sp) 504 $POP r15,`$FRAME-$SIZE_T*17`($sp) 505 $POP r16,`$FRAME-$SIZE_T*16`($sp) 506 $POP r17,`$FRAME-$SIZE_T*15`($sp) 507 $POP r18,`$FRAME-$SIZE_T*14`($sp) 508 $POP r19,`$FRAME-$SIZE_T*13`($sp) 509 $POP r20,`$FRAME-$SIZE_T*12`($sp) 510 $POP r21,`$FRAME-$SIZE_T*11`($sp) 511 $POP r22,`$FRAME-$SIZE_T*10`($sp) 512 $POP r23,`$FRAME-$SIZE_T*9`($sp) 513 $POP r24,`$FRAME-$SIZE_T*8`($sp) 514 $POP r25,`$FRAME-$SIZE_T*7`($sp) 515 $POP r26,`$FRAME-$SIZE_T*6`($sp) 516 $POP r27,`$FRAME-$SIZE_T*5`($sp) 517 $POP r28,`$FRAME-$SIZE_T*4`($sp) 518 $POP r29,`$FRAME-$SIZE_T*3`($sp) 519 $POP r30,`$FRAME-$SIZE_T*2`($sp) 520 $POP r31,`$FRAME-$SIZE_T*1`($sp) 521 mtlr r0 522 addi $sp,$sp,$FRAME 523 blr 524 .long 0 525 .byte 0,12,4,1,0x80,18,3,0 526 .long 0 527 528.align 5 529Lppc_AES_encrypt: 530 lwz $acc00,240($key) 531 addi $Tbl1,$Tbl0,3 532 lwz $t0,0($key) 533 addi $Tbl2,$Tbl0,2 534 lwz $t1,4($key) 535 addi $Tbl3,$Tbl0,1 536 lwz $t2,8($key) 537 addi $acc00,$acc00,-1 538 lwz $t3,12($key) 539 addi $key,$key,16 540 xor $s0,$s0,$t0 541 xor $s1,$s1,$t1 542 xor $s2,$s2,$t2 543 xor $s3,$s3,$t3 544 mtctr $acc00 545.align 4 546Lenc_loop: 547 rlwinm $acc00,$s0,`32-24+3`,21,28 548 rlwinm $acc01,$s1,`32-24+3`,21,28 549 rlwinm $acc02,$s2,`32-24+3`,21,28 550 rlwinm $acc03,$s3,`32-24+3`,21,28 551 lwz $t0,0($key) 552 rlwinm $acc04,$s1,`32-16+3`,21,28 553 lwz $t1,4($key) 554 rlwinm $acc05,$s2,`32-16+3`,21,28 555 lwz $t2,8($key) 556 rlwinm $acc06,$s3,`32-16+3`,21,28 557 lwz $t3,12($key) 558 rlwinm $acc07,$s0,`32-16+3`,21,28 559 lwzx $acc00,$Tbl0,$acc00 560 rlwinm $acc08,$s2,`32-8+3`,21,28 561 lwzx $acc01,$Tbl0,$acc01 562 rlwinm $acc09,$s3,`32-8+3`,21,28 563 lwzx $acc02,$Tbl0,$acc02 564 rlwinm $acc10,$s0,`32-8+3`,21,28 565 lwzx $acc03,$Tbl0,$acc03 566 rlwinm $acc11,$s1,`32-8+3`,21,28 567 lwzx $acc04,$Tbl1,$acc04 568 rlwinm $acc12,$s3,`0+3`,21,28 569 lwzx $acc05,$Tbl1,$acc05 570 rlwinm $acc13,$s0,`0+3`,21,28 571 lwzx $acc06,$Tbl1,$acc06 572 rlwinm $acc14,$s1,`0+3`,21,28 573 lwzx $acc07,$Tbl1,$acc07 574 rlwinm $acc15,$s2,`0+3`,21,28 575 lwzx $acc08,$Tbl2,$acc08 576 xor $t0,$t0,$acc00 577 lwzx $acc09,$Tbl2,$acc09 578 xor $t1,$t1,$acc01 579 lwzx $acc10,$Tbl2,$acc10 580 xor $t2,$t2,$acc02 581 lwzx $acc11,$Tbl2,$acc11 582 xor $t3,$t3,$acc03 583 lwzx $acc12,$Tbl3,$acc12 584 xor $t0,$t0,$acc04 585 lwzx $acc13,$Tbl3,$acc13 586 xor $t1,$t1,$acc05 587 lwzx $acc14,$Tbl3,$acc14 588 xor $t2,$t2,$acc06 589 lwzx $acc15,$Tbl3,$acc15 590 xor $t3,$t3,$acc07 591 xor $t0,$t0,$acc08 592 xor $t1,$t1,$acc09 593 xor $t2,$t2,$acc10 594 xor $t3,$t3,$acc11 595 xor $s0,$t0,$acc12 596 xor $s1,$t1,$acc13 597 xor $s2,$t2,$acc14 598 xor $s3,$t3,$acc15 599 addi $key,$key,16 600 bdnz Lenc_loop 601 602 addi $Tbl2,$Tbl0,2048 603 nop 604 lwz $t0,0($key) 605 rlwinm $acc00,$s0,`32-24`,24,31 606 lwz $t1,4($key) 607 rlwinm $acc01,$s1,`32-24`,24,31 608 lwz $t2,8($key) 609 rlwinm $acc02,$s2,`32-24`,24,31 610 lwz $t3,12($key) 611 rlwinm $acc03,$s3,`32-24`,24,31 612 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 613 rlwinm $acc04,$s1,`32-16`,24,31 614 lwz $acc09,`2048+32`($Tbl0) 615 rlwinm $acc05,$s2,`32-16`,24,31 616 lwz $acc10,`2048+64`($Tbl0) 617 rlwinm $acc06,$s3,`32-16`,24,31 618 lwz $acc11,`2048+96`($Tbl0) 619 rlwinm $acc07,$s0,`32-16`,24,31 620 lwz $acc12,`2048+128`($Tbl0) 621 rlwinm $acc08,$s2,`32-8`,24,31 622 lwz $acc13,`2048+160`($Tbl0) 623 rlwinm $acc09,$s3,`32-8`,24,31 624 lwz $acc14,`2048+192`($Tbl0) 625 rlwinm $acc10,$s0,`32-8`,24,31 626 lwz $acc15,`2048+224`($Tbl0) 627 rlwinm $acc11,$s1,`32-8`,24,31 628 lbzx $acc00,$Tbl2,$acc00 629 rlwinm $acc12,$s3,`0`,24,31 630 lbzx $acc01,$Tbl2,$acc01 631 rlwinm $acc13,$s0,`0`,24,31 632 lbzx $acc02,$Tbl2,$acc02 633 rlwinm $acc14,$s1,`0`,24,31 634 lbzx $acc03,$Tbl2,$acc03 635 rlwinm $acc15,$s2,`0`,24,31 636 lbzx $acc04,$Tbl2,$acc04 637 rlwinm $s0,$acc00,24,0,7 638 lbzx $acc05,$Tbl2,$acc05 639 rlwinm $s1,$acc01,24,0,7 640 lbzx $acc06,$Tbl2,$acc06 641 rlwinm $s2,$acc02,24,0,7 642 lbzx $acc07,$Tbl2,$acc07 643 rlwinm $s3,$acc03,24,0,7 644 lbzx $acc08,$Tbl2,$acc08 645 rlwimi $s0,$acc04,16,8,15 646 lbzx $acc09,$Tbl2,$acc09 647 rlwimi $s1,$acc05,16,8,15 648 lbzx $acc10,$Tbl2,$acc10 649 rlwimi $s2,$acc06,16,8,15 650 lbzx $acc11,$Tbl2,$acc11 651 rlwimi $s3,$acc07,16,8,15 652 lbzx $acc12,$Tbl2,$acc12 653 rlwimi $s0,$acc08,8,16,23 654 lbzx $acc13,$Tbl2,$acc13 655 rlwimi $s1,$acc09,8,16,23 656 lbzx $acc14,$Tbl2,$acc14 657 rlwimi $s2,$acc10,8,16,23 658 lbzx $acc15,$Tbl2,$acc15 659 rlwimi $s3,$acc11,8,16,23 660 or $s0,$s0,$acc12 661 or $s1,$s1,$acc13 662 or $s2,$s2,$acc14 663 or $s3,$s3,$acc15 664 xor $s0,$s0,$t0 665 xor $s1,$s1,$t1 666 xor $s2,$s2,$t2 667 xor $s3,$s3,$t3 668 blr 669 .long 0 670 .byte 0,12,0x14,0,0,0,0,0 671 672.align 4 673Lppc_AES_encrypt_compact: 674 lwz $acc00,240($key) 675 addi $Tbl1,$Tbl0,2048 676 lwz $t0,0($key) 677 lis $mask80,0x8080 678 lwz $t1,4($key) 679 lis $mask1b,0x1b1b 680 lwz $t2,8($key) 681 ori $mask80,$mask80,0x8080 682 lwz $t3,12($key) 683 ori $mask1b,$mask1b,0x1b1b 684 addi $key,$key,16 685 mtctr $acc00 686.align 4 687Lenc_compact_loop: 688 xor $s0,$s0,$t0 689 xor $s1,$s1,$t1 690 rlwinm $acc00,$s0,`32-24`,24,31 691 xor $s2,$s2,$t2 692 rlwinm $acc01,$s1,`32-24`,24,31 693 xor $s3,$s3,$t3 694 rlwinm $acc02,$s2,`32-24`,24,31 695 rlwinm $acc03,$s3,`32-24`,24,31 696 rlwinm $acc04,$s1,`32-16`,24,31 697 rlwinm $acc05,$s2,`32-16`,24,31 698 rlwinm $acc06,$s3,`32-16`,24,31 699 rlwinm $acc07,$s0,`32-16`,24,31 700 lbzx $acc00,$Tbl1,$acc00 701 rlwinm $acc08,$s2,`32-8`,24,31 702 lbzx $acc01,$Tbl1,$acc01 703 rlwinm $acc09,$s3,`32-8`,24,31 704 lbzx $acc02,$Tbl1,$acc02 705 rlwinm $acc10,$s0,`32-8`,24,31 706 lbzx $acc03,$Tbl1,$acc03 707 rlwinm $acc11,$s1,`32-8`,24,31 708 lbzx $acc04,$Tbl1,$acc04 709 rlwinm $acc12,$s3,`0`,24,31 710 lbzx $acc05,$Tbl1,$acc05 711 rlwinm $acc13,$s0,`0`,24,31 712 lbzx $acc06,$Tbl1,$acc06 713 rlwinm $acc14,$s1,`0`,24,31 714 lbzx $acc07,$Tbl1,$acc07 715 rlwinm $acc15,$s2,`0`,24,31 716 lbzx $acc08,$Tbl1,$acc08 717 rlwinm $s0,$acc00,24,0,7 718 lbzx $acc09,$Tbl1,$acc09 719 rlwinm $s1,$acc01,24,0,7 720 lbzx $acc10,$Tbl1,$acc10 721 rlwinm $s2,$acc02,24,0,7 722 lbzx $acc11,$Tbl1,$acc11 723 rlwinm $s3,$acc03,24,0,7 724 lbzx $acc12,$Tbl1,$acc12 725 rlwimi $s0,$acc04,16,8,15 726 lbzx $acc13,$Tbl1,$acc13 727 rlwimi $s1,$acc05,16,8,15 728 lbzx $acc14,$Tbl1,$acc14 729 rlwimi $s2,$acc06,16,8,15 730 lbzx $acc15,$Tbl1,$acc15 731 rlwimi $s3,$acc07,16,8,15 732 rlwimi $s0,$acc08,8,16,23 733 rlwimi $s1,$acc09,8,16,23 734 rlwimi $s2,$acc10,8,16,23 735 rlwimi $s3,$acc11,8,16,23 736 lwz $t0,0($key) 737 or $s0,$s0,$acc12 738 lwz $t1,4($key) 739 or $s1,$s1,$acc13 740 lwz $t2,8($key) 741 or $s2,$s2,$acc14 742 lwz $t3,12($key) 743 or $s3,$s3,$acc15 744 745 addi $key,$key,16 746 bdz Lenc_compact_done 747 748 and $acc00,$s0,$mask80 # r1=r0&0x80808080 749 and $acc01,$s1,$mask80 750 and $acc02,$s2,$mask80 751 and $acc03,$s3,$mask80 752 srwi $acc04,$acc00,7 # r1>>7 753 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 754 srwi $acc05,$acc01,7 755 andc $acc09,$s1,$mask80 756 srwi $acc06,$acc02,7 757 andc $acc10,$s2,$mask80 758 srwi $acc07,$acc03,7 759 andc $acc11,$s3,$mask80 760 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 761 sub $acc01,$acc01,$acc05 762 sub $acc02,$acc02,$acc06 763 sub $acc03,$acc03,$acc07 764 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 765 add $acc09,$acc09,$acc09 766 add $acc10,$acc10,$acc10 767 add $acc11,$acc11,$acc11 768 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 769 and $acc01,$acc01,$mask1b 770 and $acc02,$acc02,$mask1b 771 and $acc03,$acc03,$mask1b 772 xor $acc00,$acc00,$acc08 # r2 773 xor $acc01,$acc01,$acc09 774 rotlwi $acc12,$s0,16 # ROTATE(r0,16) 775 xor $acc02,$acc02,$acc10 776 rotlwi $acc13,$s1,16 777 xor $acc03,$acc03,$acc11 778 rotlwi $acc14,$s2,16 779 780 xor $s0,$s0,$acc00 # r0^r2 781 rotlwi $acc15,$s3,16 782 xor $s1,$s1,$acc01 783 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 784 xor $s2,$s2,$acc02 785 rotrwi $s1,$s1,24 786 xor $s3,$s3,$acc03 787 rotrwi $s2,$s2,24 788 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 789 rotrwi $s3,$s3,24 790 xor $s1,$s1,$acc01 791 xor $s2,$s2,$acc02 792 xor $s3,$s3,$acc03 793 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 794 xor $s0,$s0,$acc12 # 795 rotlwi $acc09,$acc13,8 796 xor $s1,$s1,$acc13 797 rotlwi $acc10,$acc14,8 798 xor $s2,$s2,$acc14 799 rotlwi $acc11,$acc15,8 800 xor $s3,$s3,$acc15 801 xor $s0,$s0,$acc08 # 802 xor $s1,$s1,$acc09 803 xor $s2,$s2,$acc10 804 xor $s3,$s3,$acc11 805 806 b Lenc_compact_loop 807.align 4 808Lenc_compact_done: 809 xor $s0,$s0,$t0 810 xor $s1,$s1,$t1 811 xor $s2,$s2,$t2 812 xor $s3,$s3,$t3 813 blr 814 .long 0 815 .byte 0,12,0x14,0,0,0,0,0 816.size .AES_encrypt,.-.AES_encrypt 817 818.globl .AES_decrypt 819.align 7 820.AES_decrypt: 821 $STU $sp,-$FRAME($sp) 822 mflr r0 823 824 $PUSH $out,`$FRAME-$SIZE_T*19`($sp) 825 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 826 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 827 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 828 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 829 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 830 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 831 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 832 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 833 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 834 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 835 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 836 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 837 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 838 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 839 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 840 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 841 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 842 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 843 $PUSH r0,`$FRAME+$LRSAVE`($sp) 844 845 andi. $t0,$inp,3 846 andi. $t1,$out,3 847 or. $t0,$t0,$t1 848 bne Ldec_unaligned 849 850Ldec_unaligned_ok: 851___ 852$code.=<<___ if (!$LITTLE_ENDIAN); 853 lwz $s0,0($inp) 854 lwz $s1,4($inp) 855 lwz $s2,8($inp) 856 lwz $s3,12($inp) 857___ 858$code.=<<___ if ($LITTLE_ENDIAN); 859 lwz $t0,0($inp) 860 lwz $t1,4($inp) 861 lwz $t2,8($inp) 862 lwz $t3,12($inp) 863 rotlwi $s0,$t0,8 864 rotlwi $s1,$t1,8 865 rotlwi $s2,$t2,8 866 rotlwi $s3,$t3,8 867 rlwimi $s0,$t0,24,0,7 868 rlwimi $s1,$t1,24,0,7 869 rlwimi $s2,$t2,24,0,7 870 rlwimi $s3,$t3,24,0,7 871 rlwimi $s0,$t0,24,16,23 872 rlwimi $s1,$t1,24,16,23 873 rlwimi $s2,$t2,24,16,23 874 rlwimi $s3,$t3,24,16,23 875___ 876$code.=<<___; 877 bl LAES_Td 878 bl Lppc_AES_decrypt_compact 879 $POP $out,`$FRAME-$SIZE_T*19`($sp) 880___ 881$code.=<<___ if ($LITTLE_ENDIAN); 882 rotlwi $t0,$s0,8 883 rotlwi $t1,$s1,8 884 rotlwi $t2,$s2,8 885 rotlwi $t3,$s3,8 886 rlwimi $t0,$s0,24,0,7 887 rlwimi $t1,$s1,24,0,7 888 rlwimi $t2,$s2,24,0,7 889 rlwimi $t3,$s3,24,0,7 890 rlwimi $t0,$s0,24,16,23 891 rlwimi $t1,$s1,24,16,23 892 rlwimi $t2,$s2,24,16,23 893 rlwimi $t3,$s3,24,16,23 894 stw $t0,0($out) 895 stw $t1,4($out) 896 stw $t2,8($out) 897 stw $t3,12($out) 898___ 899$code.=<<___ if (!$LITTLE_ENDIAN); 900 stw $s0,0($out) 901 stw $s1,4($out) 902 stw $s2,8($out) 903 stw $s3,12($out) 904___ 905$code.=<<___; 906 b Ldec_done 907 908Ldec_unaligned: 909 subfic $t0,$inp,4096 910 subfic $t1,$out,4096 911 andi. $t0,$t0,4096-16 912 beq Ldec_xpage 913 andi. $t1,$t1,4096-16 914 bne Ldec_unaligned_ok 915 916Ldec_xpage: 917 lbz $acc00,0($inp) 918 lbz $acc01,1($inp) 919 lbz $acc02,2($inp) 920 lbz $s0,3($inp) 921 lbz $acc04,4($inp) 922 lbz $acc05,5($inp) 923 lbz $acc06,6($inp) 924 lbz $s1,7($inp) 925 lbz $acc08,8($inp) 926 lbz $acc09,9($inp) 927 lbz $acc10,10($inp) 928 insrwi $s0,$acc00,8,0 929 lbz $s2,11($inp) 930 insrwi $s1,$acc04,8,0 931 lbz $acc12,12($inp) 932 insrwi $s0,$acc01,8,8 933 lbz $acc13,13($inp) 934 insrwi $s1,$acc05,8,8 935 lbz $acc14,14($inp) 936 insrwi $s0,$acc02,8,16 937 lbz $s3,15($inp) 938 insrwi $s1,$acc06,8,16 939 insrwi $s2,$acc08,8,0 940 insrwi $s3,$acc12,8,0 941 insrwi $s2,$acc09,8,8 942 insrwi $s3,$acc13,8,8 943 insrwi $s2,$acc10,8,16 944 insrwi $s3,$acc14,8,16 945 946 bl LAES_Td 947 bl Lppc_AES_decrypt_compact 948 $POP $out,`$FRAME-$SIZE_T*19`($sp) 949 950 extrwi $acc00,$s0,8,0 951 extrwi $acc01,$s0,8,8 952 stb $acc00,0($out) 953 extrwi $acc02,$s0,8,16 954 stb $acc01,1($out) 955 stb $acc02,2($out) 956 extrwi $acc04,$s1,8,0 957 stb $s0,3($out) 958 extrwi $acc05,$s1,8,8 959 stb $acc04,4($out) 960 extrwi $acc06,$s1,8,16 961 stb $acc05,5($out) 962 stb $acc06,6($out) 963 extrwi $acc08,$s2,8,0 964 stb $s1,7($out) 965 extrwi $acc09,$s2,8,8 966 stb $acc08,8($out) 967 extrwi $acc10,$s2,8,16 968 stb $acc09,9($out) 969 stb $acc10,10($out) 970 extrwi $acc12,$s3,8,0 971 stb $s2,11($out) 972 extrwi $acc13,$s3,8,8 973 stb $acc12,12($out) 974 extrwi $acc14,$s3,8,16 975 stb $acc13,13($out) 976 stb $acc14,14($out) 977 stb $s3,15($out) 978 979Ldec_done: 980 $POP r0,`$FRAME+$LRSAVE`($sp) 981 $POP r14,`$FRAME-$SIZE_T*18`($sp) 982 $POP r15,`$FRAME-$SIZE_T*17`($sp) 983 $POP r16,`$FRAME-$SIZE_T*16`($sp) 984 $POP r17,`$FRAME-$SIZE_T*15`($sp) 985 $POP r18,`$FRAME-$SIZE_T*14`($sp) 986 $POP r19,`$FRAME-$SIZE_T*13`($sp) 987 $POP r20,`$FRAME-$SIZE_T*12`($sp) 988 $POP r21,`$FRAME-$SIZE_T*11`($sp) 989 $POP r22,`$FRAME-$SIZE_T*10`($sp) 990 $POP r23,`$FRAME-$SIZE_T*9`($sp) 991 $POP r24,`$FRAME-$SIZE_T*8`($sp) 992 $POP r25,`$FRAME-$SIZE_T*7`($sp) 993 $POP r26,`$FRAME-$SIZE_T*6`($sp) 994 $POP r27,`$FRAME-$SIZE_T*5`($sp) 995 $POP r28,`$FRAME-$SIZE_T*4`($sp) 996 $POP r29,`$FRAME-$SIZE_T*3`($sp) 997 $POP r30,`$FRAME-$SIZE_T*2`($sp) 998 $POP r31,`$FRAME-$SIZE_T*1`($sp) 999 mtlr r0 1000 addi $sp,$sp,$FRAME 1001 blr 1002 .long 0 1003 .byte 0,12,4,1,0x80,18,3,0 1004 .long 0 1005 1006.align 5 1007Lppc_AES_decrypt: 1008 lwz $acc00,240($key) 1009 addi $Tbl1,$Tbl0,3 1010 lwz $t0,0($key) 1011 addi $Tbl2,$Tbl0,2 1012 lwz $t1,4($key) 1013 addi $Tbl3,$Tbl0,1 1014 lwz $t2,8($key) 1015 addi $acc00,$acc00,-1 1016 lwz $t3,12($key) 1017 addi $key,$key,16 1018 xor $s0,$s0,$t0 1019 xor $s1,$s1,$t1 1020 xor $s2,$s2,$t2 1021 xor $s3,$s3,$t3 1022 mtctr $acc00 1023.align 4 1024Ldec_loop: 1025 rlwinm $acc00,$s0,`32-24+3`,21,28 1026 rlwinm $acc01,$s1,`32-24+3`,21,28 1027 rlwinm $acc02,$s2,`32-24+3`,21,28 1028 rlwinm $acc03,$s3,`32-24+3`,21,28 1029 lwz $t0,0($key) 1030 rlwinm $acc04,$s3,`32-16+3`,21,28 1031 lwz $t1,4($key) 1032 rlwinm $acc05,$s0,`32-16+3`,21,28 1033 lwz $t2,8($key) 1034 rlwinm $acc06,$s1,`32-16+3`,21,28 1035 lwz $t3,12($key) 1036 rlwinm $acc07,$s2,`32-16+3`,21,28 1037 lwzx $acc00,$Tbl0,$acc00 1038 rlwinm $acc08,$s2,`32-8+3`,21,28 1039 lwzx $acc01,$Tbl0,$acc01 1040 rlwinm $acc09,$s3,`32-8+3`,21,28 1041 lwzx $acc02,$Tbl0,$acc02 1042 rlwinm $acc10,$s0,`32-8+3`,21,28 1043 lwzx $acc03,$Tbl0,$acc03 1044 rlwinm $acc11,$s1,`32-8+3`,21,28 1045 lwzx $acc04,$Tbl1,$acc04 1046 rlwinm $acc12,$s1,`0+3`,21,28 1047 lwzx $acc05,$Tbl1,$acc05 1048 rlwinm $acc13,$s2,`0+3`,21,28 1049 lwzx $acc06,$Tbl1,$acc06 1050 rlwinm $acc14,$s3,`0+3`,21,28 1051 lwzx $acc07,$Tbl1,$acc07 1052 rlwinm $acc15,$s0,`0+3`,21,28 1053 lwzx $acc08,$Tbl2,$acc08 1054 xor $t0,$t0,$acc00 1055 lwzx $acc09,$Tbl2,$acc09 1056 xor $t1,$t1,$acc01 1057 lwzx $acc10,$Tbl2,$acc10 1058 xor $t2,$t2,$acc02 1059 lwzx $acc11,$Tbl2,$acc11 1060 xor $t3,$t3,$acc03 1061 lwzx $acc12,$Tbl3,$acc12 1062 xor $t0,$t0,$acc04 1063 lwzx $acc13,$Tbl3,$acc13 1064 xor $t1,$t1,$acc05 1065 lwzx $acc14,$Tbl3,$acc14 1066 xor $t2,$t2,$acc06 1067 lwzx $acc15,$Tbl3,$acc15 1068 xor $t3,$t3,$acc07 1069 xor $t0,$t0,$acc08 1070 xor $t1,$t1,$acc09 1071 xor $t2,$t2,$acc10 1072 xor $t3,$t3,$acc11 1073 xor $s0,$t0,$acc12 1074 xor $s1,$t1,$acc13 1075 xor $s2,$t2,$acc14 1076 xor $s3,$t3,$acc15 1077 addi $key,$key,16 1078 bdnz Ldec_loop 1079 1080 addi $Tbl2,$Tbl0,2048 1081 nop 1082 lwz $t0,0($key) 1083 rlwinm $acc00,$s0,`32-24`,24,31 1084 lwz $t1,4($key) 1085 rlwinm $acc01,$s1,`32-24`,24,31 1086 lwz $t2,8($key) 1087 rlwinm $acc02,$s2,`32-24`,24,31 1088 lwz $t3,12($key) 1089 rlwinm $acc03,$s3,`32-24`,24,31 1090 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 1091 rlwinm $acc04,$s3,`32-16`,24,31 1092 lwz $acc09,`2048+32`($Tbl0) 1093 rlwinm $acc05,$s0,`32-16`,24,31 1094 lwz $acc10,`2048+64`($Tbl0) 1095 lbzx $acc00,$Tbl2,$acc00 1096 lwz $acc11,`2048+96`($Tbl0) 1097 lbzx $acc01,$Tbl2,$acc01 1098 lwz $acc12,`2048+128`($Tbl0) 1099 rlwinm $acc06,$s1,`32-16`,24,31 1100 lwz $acc13,`2048+160`($Tbl0) 1101 rlwinm $acc07,$s2,`32-16`,24,31 1102 lwz $acc14,`2048+192`($Tbl0) 1103 rlwinm $acc08,$s2,`32-8`,24,31 1104 lwz $acc15,`2048+224`($Tbl0) 1105 rlwinm $acc09,$s3,`32-8`,24,31 1106 lbzx $acc02,$Tbl2,$acc02 1107 rlwinm $acc10,$s0,`32-8`,24,31 1108 lbzx $acc03,$Tbl2,$acc03 1109 rlwinm $acc11,$s1,`32-8`,24,31 1110 lbzx $acc04,$Tbl2,$acc04 1111 rlwinm $acc12,$s1,`0`,24,31 1112 lbzx $acc05,$Tbl2,$acc05 1113 rlwinm $acc13,$s2,`0`,24,31 1114 lbzx $acc06,$Tbl2,$acc06 1115 rlwinm $acc14,$s3,`0`,24,31 1116 lbzx $acc07,$Tbl2,$acc07 1117 rlwinm $acc15,$s0,`0`,24,31 1118 lbzx $acc08,$Tbl2,$acc08 1119 rlwinm $s0,$acc00,24,0,7 1120 lbzx $acc09,$Tbl2,$acc09 1121 rlwinm $s1,$acc01,24,0,7 1122 lbzx $acc10,$Tbl2,$acc10 1123 rlwinm $s2,$acc02,24,0,7 1124 lbzx $acc11,$Tbl2,$acc11 1125 rlwinm $s3,$acc03,24,0,7 1126 lbzx $acc12,$Tbl2,$acc12 1127 rlwimi $s0,$acc04,16,8,15 1128 lbzx $acc13,$Tbl2,$acc13 1129 rlwimi $s1,$acc05,16,8,15 1130 lbzx $acc14,$Tbl2,$acc14 1131 rlwimi $s2,$acc06,16,8,15 1132 lbzx $acc15,$Tbl2,$acc15 1133 rlwimi $s3,$acc07,16,8,15 1134 rlwimi $s0,$acc08,8,16,23 1135 rlwimi $s1,$acc09,8,16,23 1136 rlwimi $s2,$acc10,8,16,23 1137 rlwimi $s3,$acc11,8,16,23 1138 or $s0,$s0,$acc12 1139 or $s1,$s1,$acc13 1140 or $s2,$s2,$acc14 1141 or $s3,$s3,$acc15 1142 xor $s0,$s0,$t0 1143 xor $s1,$s1,$t1 1144 xor $s2,$s2,$t2 1145 xor $s3,$s3,$t3 1146 blr 1147 .long 0 1148 .byte 0,12,0x14,0,0,0,0,0 1149 1150.align 4 1151Lppc_AES_decrypt_compact: 1152 lwz $acc00,240($key) 1153 addi $Tbl1,$Tbl0,2048 1154 lwz $t0,0($key) 1155 lis $mask80,0x8080 1156 lwz $t1,4($key) 1157 lis $mask1b,0x1b1b 1158 lwz $t2,8($key) 1159 ori $mask80,$mask80,0x8080 1160 lwz $t3,12($key) 1161 ori $mask1b,$mask1b,0x1b1b 1162 addi $key,$key,16 1163___ 1164$code.=<<___ if ($SIZE_T==8); 1165 insrdi $mask80,$mask80,32,0 1166 insrdi $mask1b,$mask1b,32,0 1167___ 1168$code.=<<___; 1169 mtctr $acc00 1170.align 4 1171Ldec_compact_loop: 1172 xor $s0,$s0,$t0 1173 xor $s1,$s1,$t1 1174 rlwinm $acc00,$s0,`32-24`,24,31 1175 xor $s2,$s2,$t2 1176 rlwinm $acc01,$s1,`32-24`,24,31 1177 xor $s3,$s3,$t3 1178 rlwinm $acc02,$s2,`32-24`,24,31 1179 rlwinm $acc03,$s3,`32-24`,24,31 1180 rlwinm $acc04,$s3,`32-16`,24,31 1181 rlwinm $acc05,$s0,`32-16`,24,31 1182 rlwinm $acc06,$s1,`32-16`,24,31 1183 rlwinm $acc07,$s2,`32-16`,24,31 1184 lbzx $acc00,$Tbl1,$acc00 1185 rlwinm $acc08,$s2,`32-8`,24,31 1186 lbzx $acc01,$Tbl1,$acc01 1187 rlwinm $acc09,$s3,`32-8`,24,31 1188 lbzx $acc02,$Tbl1,$acc02 1189 rlwinm $acc10,$s0,`32-8`,24,31 1190 lbzx $acc03,$Tbl1,$acc03 1191 rlwinm $acc11,$s1,`32-8`,24,31 1192 lbzx $acc04,$Tbl1,$acc04 1193 rlwinm $acc12,$s1,`0`,24,31 1194 lbzx $acc05,$Tbl1,$acc05 1195 rlwinm $acc13,$s2,`0`,24,31 1196 lbzx $acc06,$Tbl1,$acc06 1197 rlwinm $acc14,$s3,`0`,24,31 1198 lbzx $acc07,$Tbl1,$acc07 1199 rlwinm $acc15,$s0,`0`,24,31 1200 lbzx $acc08,$Tbl1,$acc08 1201 rlwinm $s0,$acc00,24,0,7 1202 lbzx $acc09,$Tbl1,$acc09 1203 rlwinm $s1,$acc01,24,0,7 1204 lbzx $acc10,$Tbl1,$acc10 1205 rlwinm $s2,$acc02,24,0,7 1206 lbzx $acc11,$Tbl1,$acc11 1207 rlwinm $s3,$acc03,24,0,7 1208 lbzx $acc12,$Tbl1,$acc12 1209 rlwimi $s0,$acc04,16,8,15 1210 lbzx $acc13,$Tbl1,$acc13 1211 rlwimi $s1,$acc05,16,8,15 1212 lbzx $acc14,$Tbl1,$acc14 1213 rlwimi $s2,$acc06,16,8,15 1214 lbzx $acc15,$Tbl1,$acc15 1215 rlwimi $s3,$acc07,16,8,15 1216 rlwimi $s0,$acc08,8,16,23 1217 rlwimi $s1,$acc09,8,16,23 1218 rlwimi $s2,$acc10,8,16,23 1219 rlwimi $s3,$acc11,8,16,23 1220 lwz $t0,0($key) 1221 or $s0,$s0,$acc12 1222 lwz $t1,4($key) 1223 or $s1,$s1,$acc13 1224 lwz $t2,8($key) 1225 or $s2,$s2,$acc14 1226 lwz $t3,12($key) 1227 or $s3,$s3,$acc15 1228 1229 addi $key,$key,16 1230 bdz Ldec_compact_done 1231___ 1232$code.=<<___ if ($SIZE_T==8); 1233 # vectorized permutation improves decrypt performance by 10% 1234 insrdi $s0,$s1,32,0 1235 insrdi $s2,$s3,32,0 1236 1237 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1238 and $acc02,$s2,$mask80 1239 srdi $acc04,$acc00,7 # r1>>7 1240 srdi $acc06,$acc02,7 1241 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1242 andc $acc10,$s2,$mask80 1243 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1244 sub $acc02,$acc02,$acc06 1245 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1246 add $acc10,$acc10,$acc10 1247 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1248 and $acc02,$acc02,$mask1b 1249 xor $acc00,$acc00,$acc08 # r2 1250 xor $acc02,$acc02,$acc10 1251 1252 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1253 and $acc06,$acc02,$mask80 1254 srdi $acc08,$acc04,7 # r1>>7 1255 srdi $acc10,$acc06,7 1256 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1257 andc $acc14,$acc02,$mask80 1258 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1259 sub $acc06,$acc06,$acc10 1260 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1261 add $acc14,$acc14,$acc14 1262 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1263 and $acc06,$acc06,$mask1b 1264 xor $acc04,$acc04,$acc12 # r4 1265 xor $acc06,$acc06,$acc14 1266 1267 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1268 and $acc10,$acc06,$mask80 1269 srdi $acc12,$acc08,7 # r1>>7 1270 srdi $acc14,$acc10,7 1271 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1272 sub $acc10,$acc10,$acc14 1273 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1274 andc $acc14,$acc06,$mask80 1275 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1276 add $acc14,$acc14,$acc14 1277 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1278 and $acc10,$acc10,$mask1b 1279 xor $acc08,$acc08,$acc12 # r8 1280 xor $acc10,$acc10,$acc14 1281 1282 xor $acc00,$acc00,$s0 # r2^r0 1283 xor $acc02,$acc02,$s2 1284 xor $acc04,$acc04,$s0 # r4^r0 1285 xor $acc06,$acc06,$s2 1286 1287 extrdi $acc01,$acc00,32,0 1288 extrdi $acc03,$acc02,32,0 1289 extrdi $acc05,$acc04,32,0 1290 extrdi $acc07,$acc06,32,0 1291 extrdi $acc09,$acc08,32,0 1292 extrdi $acc11,$acc10,32,0 1293___ 1294$code.=<<___ if ($SIZE_T==4); 1295 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1296 and $acc01,$s1,$mask80 1297 and $acc02,$s2,$mask80 1298 and $acc03,$s3,$mask80 1299 srwi $acc04,$acc00,7 # r1>>7 1300 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1301 srwi $acc05,$acc01,7 1302 andc $acc09,$s1,$mask80 1303 srwi $acc06,$acc02,7 1304 andc $acc10,$s2,$mask80 1305 srwi $acc07,$acc03,7 1306 andc $acc11,$s3,$mask80 1307 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1308 sub $acc01,$acc01,$acc05 1309 sub $acc02,$acc02,$acc06 1310 sub $acc03,$acc03,$acc07 1311 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1312 add $acc09,$acc09,$acc09 1313 add $acc10,$acc10,$acc10 1314 add $acc11,$acc11,$acc11 1315 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1316 and $acc01,$acc01,$mask1b 1317 and $acc02,$acc02,$mask1b 1318 and $acc03,$acc03,$mask1b 1319 xor $acc00,$acc00,$acc08 # r2 1320 xor $acc01,$acc01,$acc09 1321 xor $acc02,$acc02,$acc10 1322 xor $acc03,$acc03,$acc11 1323 1324 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1325 and $acc05,$acc01,$mask80 1326 and $acc06,$acc02,$mask80 1327 and $acc07,$acc03,$mask80 1328 srwi $acc08,$acc04,7 # r1>>7 1329 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1330 srwi $acc09,$acc05,7 1331 andc $acc13,$acc01,$mask80 1332 srwi $acc10,$acc06,7 1333 andc $acc14,$acc02,$mask80 1334 srwi $acc11,$acc07,7 1335 andc $acc15,$acc03,$mask80 1336 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1337 sub $acc05,$acc05,$acc09 1338 sub $acc06,$acc06,$acc10 1339 sub $acc07,$acc07,$acc11 1340 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1341 add $acc13,$acc13,$acc13 1342 add $acc14,$acc14,$acc14 1343 add $acc15,$acc15,$acc15 1344 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1345 and $acc05,$acc05,$mask1b 1346 and $acc06,$acc06,$mask1b 1347 and $acc07,$acc07,$mask1b 1348 xor $acc04,$acc04,$acc12 # r4 1349 xor $acc05,$acc05,$acc13 1350 xor $acc06,$acc06,$acc14 1351 xor $acc07,$acc07,$acc15 1352 1353 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1354 and $acc09,$acc05,$mask80 1355 srwi $acc12,$acc08,7 # r1>>7 1356 and $acc10,$acc06,$mask80 1357 srwi $acc13,$acc09,7 1358 and $acc11,$acc07,$mask80 1359 srwi $acc14,$acc10,7 1360 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1361 srwi $acc15,$acc11,7 1362 sub $acc09,$acc09,$acc13 1363 sub $acc10,$acc10,$acc14 1364 sub $acc11,$acc11,$acc15 1365 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1366 andc $acc13,$acc05,$mask80 1367 andc $acc14,$acc06,$mask80 1368 andc $acc15,$acc07,$mask80 1369 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1370 add $acc13,$acc13,$acc13 1371 add $acc14,$acc14,$acc14 1372 add $acc15,$acc15,$acc15 1373 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1374 and $acc09,$acc09,$mask1b 1375 and $acc10,$acc10,$mask1b 1376 and $acc11,$acc11,$mask1b 1377 xor $acc08,$acc08,$acc12 # r8 1378 xor $acc09,$acc09,$acc13 1379 xor $acc10,$acc10,$acc14 1380 xor $acc11,$acc11,$acc15 1381 1382 xor $acc00,$acc00,$s0 # r2^r0 1383 xor $acc01,$acc01,$s1 1384 xor $acc02,$acc02,$s2 1385 xor $acc03,$acc03,$s3 1386 xor $acc04,$acc04,$s0 # r4^r0 1387 xor $acc05,$acc05,$s1 1388 xor $acc06,$acc06,$s2 1389 xor $acc07,$acc07,$s3 1390___ 1391$code.=<<___; 1392 rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1393 rotrwi $s1,$s1,8 1394 xor $s0,$s0,$acc00 # ^= r2^r0 1395 rotrwi $s2,$s2,8 1396 xor $s1,$s1,$acc01 1397 rotrwi $s3,$s3,8 1398 xor $s2,$s2,$acc02 1399 xor $s3,$s3,$acc03 1400 xor $acc00,$acc00,$acc08 1401 xor $acc01,$acc01,$acc09 1402 xor $acc02,$acc02,$acc10 1403 xor $acc03,$acc03,$acc11 1404 xor $s0,$s0,$acc04 # ^= r4^r0 1405 rotrwi $acc00,$acc00,24 1406 xor $s1,$s1,$acc05 1407 rotrwi $acc01,$acc01,24 1408 xor $s2,$s2,$acc06 1409 rotrwi $acc02,$acc02,24 1410 xor $s3,$s3,$acc07 1411 rotrwi $acc03,$acc03,24 1412 xor $acc04,$acc04,$acc08 1413 xor $acc05,$acc05,$acc09 1414 xor $acc06,$acc06,$acc10 1415 xor $acc07,$acc07,$acc11 1416 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1417 rotrwi $acc04,$acc04,16 1418 xor $s1,$s1,$acc09 1419 rotrwi $acc05,$acc05,16 1420 xor $s2,$s2,$acc10 1421 rotrwi $acc06,$acc06,16 1422 xor $s3,$s3,$acc11 1423 rotrwi $acc07,$acc07,16 1424 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1425 rotrwi $acc08,$acc08,8 1426 xor $s1,$s1,$acc01 1427 rotrwi $acc09,$acc09,8 1428 xor $s2,$s2,$acc02 1429 rotrwi $acc10,$acc10,8 1430 xor $s3,$s3,$acc03 1431 rotrwi $acc11,$acc11,8 1432 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1433 xor $s1,$s1,$acc05 1434 xor $s2,$s2,$acc06 1435 xor $s3,$s3,$acc07 1436 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) 1437 xor $s1,$s1,$acc09 1438 xor $s2,$s2,$acc10 1439 xor $s3,$s3,$acc11 1440 1441 b Ldec_compact_loop 1442.align 4 1443Ldec_compact_done: 1444 xor $s0,$s0,$t0 1445 xor $s1,$s1,$t1 1446 xor $s2,$s2,$t2 1447 xor $s3,$s3,$t3 1448 blr 1449 .long 0 1450 .byte 0,12,0x14,0,0,0,0,0 1451.size .AES_decrypt,.-.AES_decrypt 1452 1453.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" 1454.align 7 1455___ 1456 1457$code =~ s/\`([^\`]*)\`/eval $1/gem; 1458print $code; 1459close STDOUT; 1460