1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# Needs more work: key setup, CBC routine... 11# 12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 13# 128-bit key, which is ~40% better than 64-bit code generated by gcc 14# 4.0. But these are not the ones currently used! Their "compact" 15# counterparts are, for security reason. ppc_AES_encrypt_compact runs 16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 17# at 1/3 of ppc_AES_decrypt. 18 19# February 2010 20# 21# Rescheduling instructions to favour Power6 pipeline gave 10% 22# performance improvement on the platfrom in question (and marginal 23# improvement even on others). It should be noted that Power6 fails 24# to process byte in 18 cycles, only in 23, because it fails to issue 25# 4 load instructions in two cycles, only in 3. As result non-compact 26# block subroutines are 25% slower than one would expect. Compact 27# functions scale better, because they have pure computational part, 28# which scales perfectly with clock frequency. To be specific 29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while 30# ppc_AES_decrypt_compact - at 55 (in 64-bit build). 31 32$flavour = shift; 33 34if ($flavour =~ /64/) { 35 $SIZE_T =8; 36 $LRSAVE =2*$SIZE_T; 37 $STU ="stdu"; 38 $POP ="ld"; 39 $PUSH ="std"; 40} elsif ($flavour =~ /32/) { 41 $SIZE_T =4; 42 $LRSAVE =$SIZE_T; 43 $STU ="stwu"; 44 $POP ="lwz"; 45 $PUSH ="stw"; 46} else { die "nonsense $flavour"; } 47 48$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 49 50$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 51( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 52( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 53die "can't locate ppc-xlate.pl"; 54 55open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 56 57$FRAME=32*$SIZE_T; 58 59sub _data_word() 60{ my $i; 61 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 62} 63 64$sp="r1"; 65$toc="r2"; 66$inp="r3"; 67$out="r4"; 68$key="r5"; 69 70$Tbl0="r3"; 71$Tbl1="r6"; 72$Tbl2="r7"; 73$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack 74 75$s0="r8"; 76$s1="r9"; 77$s2="r10"; 78$s3="r11"; 79 80$t0="r12"; 81$t1="r0"; # stay away from "r13"; 82$t2="r14"; 83$t3="r15"; 84 85$acc00="r16"; 86$acc01="r17"; 87$acc02="r18"; 88$acc03="r19"; 89 90$acc04="r20"; 91$acc05="r21"; 92$acc06="r22"; 93$acc07="r23"; 94 95$acc08="r24"; 96$acc09="r25"; 97$acc10="r26"; 98$acc11="r27"; 99 100$acc12="r28"; 101$acc13="r29"; 102$acc14="r30"; 103$acc15="r31"; 104 105$mask80=$Tbl2; 106$mask1b=$Tbl3; 107 108$code.=<<___; 109.machine "any" 110.text 111 112.align 7 113LAES_Te: 114 mflr r0 115 bcl 20,31,\$+4 116 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry 117 addi $Tbl0,$Tbl0,`128-8` 118 mtlr r0 119 blr 120 .long 0 121 .byte 0,12,0x14,0,0,0,0,0 122 .space `64-9*4` 123LAES_Td: 124 mflr r0 125 bcl 20,31,\$+4 126 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 127 addi $Tbl0,$Tbl0,`128-64-8+2048+256` 128 mtlr r0 129 blr 130 .long 0 131 .byte 0,12,0x14,0,0,0,0,0 132 .space `128-64-9*4` 133___ 134&_data_word( 135 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 136 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 137 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 138 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 139 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 140 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 141 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 142 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 143 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 144 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 145 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 146 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 147 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 148 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 149 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 150 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 151 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 152 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 153 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 154 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 155 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 156 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 157 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 158 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 159 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 160 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 161 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 162 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 163 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 164 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 165 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 166 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 167 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 168 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 169 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 170 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 171 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 172 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 173 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 174 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 175 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 176 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 177 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 178 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 179 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 180 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 181 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 182 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 183 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 184 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 185 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 186 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 187 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 188 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 189 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 190 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 191 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 192 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 193 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 194 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 195 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 196 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 197 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 198 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 199$code.=<<___; 200.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 201.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 202.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 203.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 204.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 205.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 206.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 207.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 208.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 209.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 210.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 211.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 212.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 213.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 214.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 215.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 216.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 217.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 218.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 219.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 220.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 221.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 222.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 223.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 224.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 225.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 226.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 227.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 228.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 229.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 230.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 231.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 232___ 233&_data_word( 234 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 235 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 236 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 237 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 238 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 239 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 240 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 241 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 242 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 243 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 244 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 245 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 246 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 247 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 248 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 249 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 250 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 251 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 252 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 253 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 254 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 255 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 256 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 257 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 258 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 259 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 260 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 261 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 262 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 263 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 264 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 265 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 266 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 267 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 268 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 269 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 270 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 271 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 272 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 273 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 274 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 275 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 276 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 277 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 278 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 279 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 280 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 281 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 282 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 283 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 284 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 285 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 286 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 287 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 288 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 289 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 290 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 291 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 292 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 293 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 294 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 295 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 296 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 297 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 298$code.=<<___; 299.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 300.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 301.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 302.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 303.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 304.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 305.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 306.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 307.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 308.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 309.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 310.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 311.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 312.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 313.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 314.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 315.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 316.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 317.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 318.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 319.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 320.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 321.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 322.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 323.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 324.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 325.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 326.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 327.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 328.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 329.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 330.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 331 332 333.globl .AES_encrypt 334.align 7 335.AES_encrypt: 336 $STU $sp,-$FRAME($sp) 337 mflr r0 338 339 $PUSH $out,`$FRAME-$SIZE_T*19`($sp) 340 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 341 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 342 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 343 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 344 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 345 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 346 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 347 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 348 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 349 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 350 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 351 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 352 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 353 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 354 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 355 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 356 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 357 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 358 $PUSH r0,`$FRAME+$LRSAVE`($sp) 359 360 andi. $t0,$inp,3 361 andi. $t1,$out,3 362 or. $t0,$t0,$t1 363 bne Lenc_unaligned 364 365Lenc_unaligned_ok: 366___ 367$code.=<<___ if (!$LITTLE_ENDIAN); 368 lwz $s0,0($inp) 369 lwz $s1,4($inp) 370 lwz $s2,8($inp) 371 lwz $s3,12($inp) 372___ 373$code.=<<___ if ($LITTLE_ENDIAN); 374 lwz $t0,0($inp) 375 lwz $t1,4($inp) 376 lwz $t2,8($inp) 377 lwz $t3,12($inp) 378 rotlwi $s0,$t0,8 379 rotlwi $s1,$t1,8 380 rotlwi $s2,$t2,8 381 rotlwi $s3,$t3,8 382 rlwimi $s0,$t0,24,0,7 383 rlwimi $s1,$t1,24,0,7 384 rlwimi $s2,$t2,24,0,7 385 rlwimi $s3,$t3,24,0,7 386 rlwimi $s0,$t0,24,16,23 387 rlwimi $s1,$t1,24,16,23 388 rlwimi $s2,$t2,24,16,23 389 rlwimi $s3,$t3,24,16,23 390___ 391$code.=<<___; 392 bl LAES_Te 393 bl Lppc_AES_encrypt_compact 394 $POP $out,`$FRAME-$SIZE_T*19`($sp) 395___ 396$code.=<<___ if ($LITTLE_ENDIAN); 397 rotlwi $t0,$s0,8 398 rotlwi $t1,$s1,8 399 rotlwi $t2,$s2,8 400 rotlwi $t3,$s3,8 401 rlwimi $t0,$s0,24,0,7 402 rlwimi $t1,$s1,24,0,7 403 rlwimi $t2,$s2,24,0,7 404 rlwimi $t3,$s3,24,0,7 405 rlwimi $t0,$s0,24,16,23 406 rlwimi $t1,$s1,24,16,23 407 rlwimi $t2,$s2,24,16,23 408 rlwimi $t3,$s3,24,16,23 409 stw $t0,0($out) 410 stw $t1,4($out) 411 stw $t2,8($out) 412 stw $t3,12($out) 413___ 414$code.=<<___ if (!$LITTLE_ENDIAN); 415 stw $s0,0($out) 416 stw $s1,4($out) 417 stw $s2,8($out) 418 stw $s3,12($out) 419___ 420$code.=<<___; 421 b Lenc_done 422 423Lenc_unaligned: 424 subfic $t0,$inp,4096 425 subfic $t1,$out,4096 426 andi. $t0,$t0,4096-16 427 beq Lenc_xpage 428 andi. $t1,$t1,4096-16 429 bne Lenc_unaligned_ok 430 431Lenc_xpage: 432 lbz $acc00,0($inp) 433 lbz $acc01,1($inp) 434 lbz $acc02,2($inp) 435 lbz $s0,3($inp) 436 lbz $acc04,4($inp) 437 lbz $acc05,5($inp) 438 lbz $acc06,6($inp) 439 lbz $s1,7($inp) 440 lbz $acc08,8($inp) 441 lbz $acc09,9($inp) 442 lbz $acc10,10($inp) 443 insrwi $s0,$acc00,8,0 444 lbz $s2,11($inp) 445 insrwi $s1,$acc04,8,0 446 lbz $acc12,12($inp) 447 insrwi $s0,$acc01,8,8 448 lbz $acc13,13($inp) 449 insrwi $s1,$acc05,8,8 450 lbz $acc14,14($inp) 451 insrwi $s0,$acc02,8,16 452 lbz $s3,15($inp) 453 insrwi $s1,$acc06,8,16 454 insrwi $s2,$acc08,8,0 455 insrwi $s3,$acc12,8,0 456 insrwi $s2,$acc09,8,8 457 insrwi $s3,$acc13,8,8 458 insrwi $s2,$acc10,8,16 459 insrwi $s3,$acc14,8,16 460 461 bl LAES_Te 462 bl Lppc_AES_encrypt_compact 463 $POP $out,`$FRAME-$SIZE_T*19`($sp) 464 465 extrwi $acc00,$s0,8,0 466 extrwi $acc01,$s0,8,8 467 stb $acc00,0($out) 468 extrwi $acc02,$s0,8,16 469 stb $acc01,1($out) 470 stb $acc02,2($out) 471 extrwi $acc04,$s1,8,0 472 stb $s0,3($out) 473 extrwi $acc05,$s1,8,8 474 stb $acc04,4($out) 475 extrwi $acc06,$s1,8,16 476 stb $acc05,5($out) 477 stb $acc06,6($out) 478 extrwi $acc08,$s2,8,0 479 stb $s1,7($out) 480 extrwi $acc09,$s2,8,8 481 stb $acc08,8($out) 482 extrwi $acc10,$s2,8,16 483 stb $acc09,9($out) 484 stb $acc10,10($out) 485 extrwi $acc12,$s3,8,0 486 stb $s2,11($out) 487 extrwi $acc13,$s3,8,8 488 stb $acc12,12($out) 489 extrwi $acc14,$s3,8,16 490 stb $acc13,13($out) 491 stb $acc14,14($out) 492 stb $s3,15($out) 493 494Lenc_done: 495 $POP r0,`$FRAME+$LRSAVE`($sp) 496 $POP r14,`$FRAME-$SIZE_T*18`($sp) 497 $POP r15,`$FRAME-$SIZE_T*17`($sp) 498 $POP r16,`$FRAME-$SIZE_T*16`($sp) 499 $POP r17,`$FRAME-$SIZE_T*15`($sp) 500 $POP r18,`$FRAME-$SIZE_T*14`($sp) 501 $POP r19,`$FRAME-$SIZE_T*13`($sp) 502 $POP r20,`$FRAME-$SIZE_T*12`($sp) 503 $POP r21,`$FRAME-$SIZE_T*11`($sp) 504 $POP r22,`$FRAME-$SIZE_T*10`($sp) 505 $POP r23,`$FRAME-$SIZE_T*9`($sp) 506 $POP r24,`$FRAME-$SIZE_T*8`($sp) 507 $POP r25,`$FRAME-$SIZE_T*7`($sp) 508 $POP r26,`$FRAME-$SIZE_T*6`($sp) 509 $POP r27,`$FRAME-$SIZE_T*5`($sp) 510 $POP r28,`$FRAME-$SIZE_T*4`($sp) 511 $POP r29,`$FRAME-$SIZE_T*3`($sp) 512 $POP r30,`$FRAME-$SIZE_T*2`($sp) 513 $POP r31,`$FRAME-$SIZE_T*1`($sp) 514 mtlr r0 515 addi $sp,$sp,$FRAME 516 blr 517 .long 0 518 .byte 0,12,4,1,0x80,18,3,0 519 .long 0 520 521.align 5 522Lppc_AES_encrypt: 523 lwz $acc00,240($key) 524 addi $Tbl1,$Tbl0,3 525 lwz $t0,0($key) 526 addi $Tbl2,$Tbl0,2 527 lwz $t1,4($key) 528 addi $Tbl3,$Tbl0,1 529 lwz $t2,8($key) 530 addi $acc00,$acc00,-1 531 lwz $t3,12($key) 532 addi $key,$key,16 533 xor $s0,$s0,$t0 534 xor $s1,$s1,$t1 535 xor $s2,$s2,$t2 536 xor $s3,$s3,$t3 537 mtctr $acc00 538.align 4 539Lenc_loop: 540 rlwinm $acc00,$s0,`32-24+3`,21,28 541 rlwinm $acc01,$s1,`32-24+3`,21,28 542 rlwinm $acc02,$s2,`32-24+3`,21,28 543 rlwinm $acc03,$s3,`32-24+3`,21,28 544 lwz $t0,0($key) 545 rlwinm $acc04,$s1,`32-16+3`,21,28 546 lwz $t1,4($key) 547 rlwinm $acc05,$s2,`32-16+3`,21,28 548 lwz $t2,8($key) 549 rlwinm $acc06,$s3,`32-16+3`,21,28 550 lwz $t3,12($key) 551 rlwinm $acc07,$s0,`32-16+3`,21,28 552 lwzx $acc00,$Tbl0,$acc00 553 rlwinm $acc08,$s2,`32-8+3`,21,28 554 lwzx $acc01,$Tbl0,$acc01 555 rlwinm $acc09,$s3,`32-8+3`,21,28 556 lwzx $acc02,$Tbl0,$acc02 557 rlwinm $acc10,$s0,`32-8+3`,21,28 558 lwzx $acc03,$Tbl0,$acc03 559 rlwinm $acc11,$s1,`32-8+3`,21,28 560 lwzx $acc04,$Tbl1,$acc04 561 rlwinm $acc12,$s3,`0+3`,21,28 562 lwzx $acc05,$Tbl1,$acc05 563 rlwinm $acc13,$s0,`0+3`,21,28 564 lwzx $acc06,$Tbl1,$acc06 565 rlwinm $acc14,$s1,`0+3`,21,28 566 lwzx $acc07,$Tbl1,$acc07 567 rlwinm $acc15,$s2,`0+3`,21,28 568 lwzx $acc08,$Tbl2,$acc08 569 xor $t0,$t0,$acc00 570 lwzx $acc09,$Tbl2,$acc09 571 xor $t1,$t1,$acc01 572 lwzx $acc10,$Tbl2,$acc10 573 xor $t2,$t2,$acc02 574 lwzx $acc11,$Tbl2,$acc11 575 xor $t3,$t3,$acc03 576 lwzx $acc12,$Tbl3,$acc12 577 xor $t0,$t0,$acc04 578 lwzx $acc13,$Tbl3,$acc13 579 xor $t1,$t1,$acc05 580 lwzx $acc14,$Tbl3,$acc14 581 xor $t2,$t2,$acc06 582 lwzx $acc15,$Tbl3,$acc15 583 xor $t3,$t3,$acc07 584 xor $t0,$t0,$acc08 585 xor $t1,$t1,$acc09 586 xor $t2,$t2,$acc10 587 xor $t3,$t3,$acc11 588 xor $s0,$t0,$acc12 589 xor $s1,$t1,$acc13 590 xor $s2,$t2,$acc14 591 xor $s3,$t3,$acc15 592 addi $key,$key,16 593 bdnz Lenc_loop 594 595 addi $Tbl2,$Tbl0,2048 596 nop 597 lwz $t0,0($key) 598 rlwinm $acc00,$s0,`32-24`,24,31 599 lwz $t1,4($key) 600 rlwinm $acc01,$s1,`32-24`,24,31 601 lwz $t2,8($key) 602 rlwinm $acc02,$s2,`32-24`,24,31 603 lwz $t3,12($key) 604 rlwinm $acc03,$s3,`32-24`,24,31 605 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 606 rlwinm $acc04,$s1,`32-16`,24,31 607 lwz $acc09,`2048+32`($Tbl0) 608 rlwinm $acc05,$s2,`32-16`,24,31 609 lwz $acc10,`2048+64`($Tbl0) 610 rlwinm $acc06,$s3,`32-16`,24,31 611 lwz $acc11,`2048+96`($Tbl0) 612 rlwinm $acc07,$s0,`32-16`,24,31 613 lwz $acc12,`2048+128`($Tbl0) 614 rlwinm $acc08,$s2,`32-8`,24,31 615 lwz $acc13,`2048+160`($Tbl0) 616 rlwinm $acc09,$s3,`32-8`,24,31 617 lwz $acc14,`2048+192`($Tbl0) 618 rlwinm $acc10,$s0,`32-8`,24,31 619 lwz $acc15,`2048+224`($Tbl0) 620 rlwinm $acc11,$s1,`32-8`,24,31 621 lbzx $acc00,$Tbl2,$acc00 622 rlwinm $acc12,$s3,`0`,24,31 623 lbzx $acc01,$Tbl2,$acc01 624 rlwinm $acc13,$s0,`0`,24,31 625 lbzx $acc02,$Tbl2,$acc02 626 rlwinm $acc14,$s1,`0`,24,31 627 lbzx $acc03,$Tbl2,$acc03 628 rlwinm $acc15,$s2,`0`,24,31 629 lbzx $acc04,$Tbl2,$acc04 630 rlwinm $s0,$acc00,24,0,7 631 lbzx $acc05,$Tbl2,$acc05 632 rlwinm $s1,$acc01,24,0,7 633 lbzx $acc06,$Tbl2,$acc06 634 rlwinm $s2,$acc02,24,0,7 635 lbzx $acc07,$Tbl2,$acc07 636 rlwinm $s3,$acc03,24,0,7 637 lbzx $acc08,$Tbl2,$acc08 638 rlwimi $s0,$acc04,16,8,15 639 lbzx $acc09,$Tbl2,$acc09 640 rlwimi $s1,$acc05,16,8,15 641 lbzx $acc10,$Tbl2,$acc10 642 rlwimi $s2,$acc06,16,8,15 643 lbzx $acc11,$Tbl2,$acc11 644 rlwimi $s3,$acc07,16,8,15 645 lbzx $acc12,$Tbl2,$acc12 646 rlwimi $s0,$acc08,8,16,23 647 lbzx $acc13,$Tbl2,$acc13 648 rlwimi $s1,$acc09,8,16,23 649 lbzx $acc14,$Tbl2,$acc14 650 rlwimi $s2,$acc10,8,16,23 651 lbzx $acc15,$Tbl2,$acc15 652 rlwimi $s3,$acc11,8,16,23 653 or $s0,$s0,$acc12 654 or $s1,$s1,$acc13 655 or $s2,$s2,$acc14 656 or $s3,$s3,$acc15 657 xor $s0,$s0,$t0 658 xor $s1,$s1,$t1 659 xor $s2,$s2,$t2 660 xor $s3,$s3,$t3 661 blr 662 .long 0 663 .byte 0,12,0x14,0,0,0,0,0 664 665.align 4 666Lppc_AES_encrypt_compact: 667 lwz $acc00,240($key) 668 addi $Tbl1,$Tbl0,2048 669 lwz $t0,0($key) 670 lis $mask80,0x8080 671 lwz $t1,4($key) 672 lis $mask1b,0x1b1b 673 lwz $t2,8($key) 674 ori $mask80,$mask80,0x8080 675 lwz $t3,12($key) 676 ori $mask1b,$mask1b,0x1b1b 677 addi $key,$key,16 678 mtctr $acc00 679.align 4 680Lenc_compact_loop: 681 xor $s0,$s0,$t0 682 xor $s1,$s1,$t1 683 rlwinm $acc00,$s0,`32-24`,24,31 684 xor $s2,$s2,$t2 685 rlwinm $acc01,$s1,`32-24`,24,31 686 xor $s3,$s3,$t3 687 rlwinm $acc02,$s2,`32-24`,24,31 688 rlwinm $acc03,$s3,`32-24`,24,31 689 rlwinm $acc04,$s1,`32-16`,24,31 690 rlwinm $acc05,$s2,`32-16`,24,31 691 rlwinm $acc06,$s3,`32-16`,24,31 692 rlwinm $acc07,$s0,`32-16`,24,31 693 lbzx $acc00,$Tbl1,$acc00 694 rlwinm $acc08,$s2,`32-8`,24,31 695 lbzx $acc01,$Tbl1,$acc01 696 rlwinm $acc09,$s3,`32-8`,24,31 697 lbzx $acc02,$Tbl1,$acc02 698 rlwinm $acc10,$s0,`32-8`,24,31 699 lbzx $acc03,$Tbl1,$acc03 700 rlwinm $acc11,$s1,`32-8`,24,31 701 lbzx $acc04,$Tbl1,$acc04 702 rlwinm $acc12,$s3,`0`,24,31 703 lbzx $acc05,$Tbl1,$acc05 704 rlwinm $acc13,$s0,`0`,24,31 705 lbzx $acc06,$Tbl1,$acc06 706 rlwinm $acc14,$s1,`0`,24,31 707 lbzx $acc07,$Tbl1,$acc07 708 rlwinm $acc15,$s2,`0`,24,31 709 lbzx $acc08,$Tbl1,$acc08 710 rlwinm $s0,$acc00,24,0,7 711 lbzx $acc09,$Tbl1,$acc09 712 rlwinm $s1,$acc01,24,0,7 713 lbzx $acc10,$Tbl1,$acc10 714 rlwinm $s2,$acc02,24,0,7 715 lbzx $acc11,$Tbl1,$acc11 716 rlwinm $s3,$acc03,24,0,7 717 lbzx $acc12,$Tbl1,$acc12 718 rlwimi $s0,$acc04,16,8,15 719 lbzx $acc13,$Tbl1,$acc13 720 rlwimi $s1,$acc05,16,8,15 721 lbzx $acc14,$Tbl1,$acc14 722 rlwimi $s2,$acc06,16,8,15 723 lbzx $acc15,$Tbl1,$acc15 724 rlwimi $s3,$acc07,16,8,15 725 rlwimi $s0,$acc08,8,16,23 726 rlwimi $s1,$acc09,8,16,23 727 rlwimi $s2,$acc10,8,16,23 728 rlwimi $s3,$acc11,8,16,23 729 lwz $t0,0($key) 730 or $s0,$s0,$acc12 731 lwz $t1,4($key) 732 or $s1,$s1,$acc13 733 lwz $t2,8($key) 734 or $s2,$s2,$acc14 735 lwz $t3,12($key) 736 or $s3,$s3,$acc15 737 738 addi $key,$key,16 739 bdz Lenc_compact_done 740 741 and $acc00,$s0,$mask80 # r1=r0&0x80808080 742 and $acc01,$s1,$mask80 743 and $acc02,$s2,$mask80 744 and $acc03,$s3,$mask80 745 srwi $acc04,$acc00,7 # r1>>7 746 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 747 srwi $acc05,$acc01,7 748 andc $acc09,$s1,$mask80 749 srwi $acc06,$acc02,7 750 andc $acc10,$s2,$mask80 751 srwi $acc07,$acc03,7 752 andc $acc11,$s3,$mask80 753 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 754 sub $acc01,$acc01,$acc05 755 sub $acc02,$acc02,$acc06 756 sub $acc03,$acc03,$acc07 757 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 758 add $acc09,$acc09,$acc09 759 add $acc10,$acc10,$acc10 760 add $acc11,$acc11,$acc11 761 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 762 and $acc01,$acc01,$mask1b 763 and $acc02,$acc02,$mask1b 764 and $acc03,$acc03,$mask1b 765 xor $acc00,$acc00,$acc08 # r2 766 xor $acc01,$acc01,$acc09 767 rotlwi $acc12,$s0,16 # ROTATE(r0,16) 768 xor $acc02,$acc02,$acc10 769 rotlwi $acc13,$s1,16 770 xor $acc03,$acc03,$acc11 771 rotlwi $acc14,$s2,16 772 773 xor $s0,$s0,$acc00 # r0^r2 774 rotlwi $acc15,$s3,16 775 xor $s1,$s1,$acc01 776 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 777 xor $s2,$s2,$acc02 778 rotrwi $s1,$s1,24 779 xor $s3,$s3,$acc03 780 rotrwi $s2,$s2,24 781 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 782 rotrwi $s3,$s3,24 783 xor $s1,$s1,$acc01 784 xor $s2,$s2,$acc02 785 xor $s3,$s3,$acc03 786 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 787 xor $s0,$s0,$acc12 # 788 rotlwi $acc09,$acc13,8 789 xor $s1,$s1,$acc13 790 rotlwi $acc10,$acc14,8 791 xor $s2,$s2,$acc14 792 rotlwi $acc11,$acc15,8 793 xor $s3,$s3,$acc15 794 xor $s0,$s0,$acc08 # 795 xor $s1,$s1,$acc09 796 xor $s2,$s2,$acc10 797 xor $s3,$s3,$acc11 798 799 b Lenc_compact_loop 800.align 4 801Lenc_compact_done: 802 xor $s0,$s0,$t0 803 xor $s1,$s1,$t1 804 xor $s2,$s2,$t2 805 xor $s3,$s3,$t3 806 blr 807 .long 0 808 .byte 0,12,0x14,0,0,0,0,0 809.size .AES_encrypt,.-.AES_encrypt 810 811.globl .AES_decrypt 812.align 7 813.AES_decrypt: 814 $STU $sp,-$FRAME($sp) 815 mflr r0 816 817 $PUSH $out,`$FRAME-$SIZE_T*19`($sp) 818 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 819 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 820 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 821 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 822 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 823 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 824 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 825 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 826 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 827 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 828 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 829 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 830 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 831 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 832 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 833 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 834 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 835 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 836 $PUSH r0,`$FRAME+$LRSAVE`($sp) 837 838 andi. $t0,$inp,3 839 andi. $t1,$out,3 840 or. $t0,$t0,$t1 841 bne Ldec_unaligned 842 843Ldec_unaligned_ok: 844___ 845$code.=<<___ if (!$LITTLE_ENDIAN); 846 lwz $s0,0($inp) 847 lwz $s1,4($inp) 848 lwz $s2,8($inp) 849 lwz $s3,12($inp) 850___ 851$code.=<<___ if ($LITTLE_ENDIAN); 852 lwz $t0,0($inp) 853 lwz $t1,4($inp) 854 lwz $t2,8($inp) 855 lwz $t3,12($inp) 856 rotlwi $s0,$t0,8 857 rotlwi $s1,$t1,8 858 rotlwi $s2,$t2,8 859 rotlwi $s3,$t3,8 860 rlwimi $s0,$t0,24,0,7 861 rlwimi $s1,$t1,24,0,7 862 rlwimi $s2,$t2,24,0,7 863 rlwimi $s3,$t3,24,0,7 864 rlwimi $s0,$t0,24,16,23 865 rlwimi $s1,$t1,24,16,23 866 rlwimi $s2,$t2,24,16,23 867 rlwimi $s3,$t3,24,16,23 868___ 869$code.=<<___; 870 bl LAES_Td 871 bl Lppc_AES_decrypt_compact 872 $POP $out,`$FRAME-$SIZE_T*19`($sp) 873___ 874$code.=<<___ if ($LITTLE_ENDIAN); 875 rotlwi $t0,$s0,8 876 rotlwi $t1,$s1,8 877 rotlwi $t2,$s2,8 878 rotlwi $t3,$s3,8 879 rlwimi $t0,$s0,24,0,7 880 rlwimi $t1,$s1,24,0,7 881 rlwimi $t2,$s2,24,0,7 882 rlwimi $t3,$s3,24,0,7 883 rlwimi $t0,$s0,24,16,23 884 rlwimi $t1,$s1,24,16,23 885 rlwimi $t2,$s2,24,16,23 886 rlwimi $t3,$s3,24,16,23 887 stw $t0,0($out) 888 stw $t1,4($out) 889 stw $t2,8($out) 890 stw $t3,12($out) 891___ 892$code.=<<___ if (!$LITTLE_ENDIAN); 893 stw $s0,0($out) 894 stw $s1,4($out) 895 stw $s2,8($out) 896 stw $s3,12($out) 897___ 898$code.=<<___; 899 b Ldec_done 900 901Ldec_unaligned: 902 subfic $t0,$inp,4096 903 subfic $t1,$out,4096 904 andi. $t0,$t0,4096-16 905 beq Ldec_xpage 906 andi. $t1,$t1,4096-16 907 bne Ldec_unaligned_ok 908 909Ldec_xpage: 910 lbz $acc00,0($inp) 911 lbz $acc01,1($inp) 912 lbz $acc02,2($inp) 913 lbz $s0,3($inp) 914 lbz $acc04,4($inp) 915 lbz $acc05,5($inp) 916 lbz $acc06,6($inp) 917 lbz $s1,7($inp) 918 lbz $acc08,8($inp) 919 lbz $acc09,9($inp) 920 lbz $acc10,10($inp) 921 insrwi $s0,$acc00,8,0 922 lbz $s2,11($inp) 923 insrwi $s1,$acc04,8,0 924 lbz $acc12,12($inp) 925 insrwi $s0,$acc01,8,8 926 lbz $acc13,13($inp) 927 insrwi $s1,$acc05,8,8 928 lbz $acc14,14($inp) 929 insrwi $s0,$acc02,8,16 930 lbz $s3,15($inp) 931 insrwi $s1,$acc06,8,16 932 insrwi $s2,$acc08,8,0 933 insrwi $s3,$acc12,8,0 934 insrwi $s2,$acc09,8,8 935 insrwi $s3,$acc13,8,8 936 insrwi $s2,$acc10,8,16 937 insrwi $s3,$acc14,8,16 938 939 bl LAES_Td 940 bl Lppc_AES_decrypt_compact 941 $POP $out,`$FRAME-$SIZE_T*19`($sp) 942 943 extrwi $acc00,$s0,8,0 944 extrwi $acc01,$s0,8,8 945 stb $acc00,0($out) 946 extrwi $acc02,$s0,8,16 947 stb $acc01,1($out) 948 stb $acc02,2($out) 949 extrwi $acc04,$s1,8,0 950 stb $s0,3($out) 951 extrwi $acc05,$s1,8,8 952 stb $acc04,4($out) 953 extrwi $acc06,$s1,8,16 954 stb $acc05,5($out) 955 stb $acc06,6($out) 956 extrwi $acc08,$s2,8,0 957 stb $s1,7($out) 958 extrwi $acc09,$s2,8,8 959 stb $acc08,8($out) 960 extrwi $acc10,$s2,8,16 961 stb $acc09,9($out) 962 stb $acc10,10($out) 963 extrwi $acc12,$s3,8,0 964 stb $s2,11($out) 965 extrwi $acc13,$s3,8,8 966 stb $acc12,12($out) 967 extrwi $acc14,$s3,8,16 968 stb $acc13,13($out) 969 stb $acc14,14($out) 970 stb $s3,15($out) 971 972Ldec_done: 973 $POP r0,`$FRAME+$LRSAVE`($sp) 974 $POP r14,`$FRAME-$SIZE_T*18`($sp) 975 $POP r15,`$FRAME-$SIZE_T*17`($sp) 976 $POP r16,`$FRAME-$SIZE_T*16`($sp) 977 $POP r17,`$FRAME-$SIZE_T*15`($sp) 978 $POP r18,`$FRAME-$SIZE_T*14`($sp) 979 $POP r19,`$FRAME-$SIZE_T*13`($sp) 980 $POP r20,`$FRAME-$SIZE_T*12`($sp) 981 $POP r21,`$FRAME-$SIZE_T*11`($sp) 982 $POP r22,`$FRAME-$SIZE_T*10`($sp) 983 $POP r23,`$FRAME-$SIZE_T*9`($sp) 984 $POP r24,`$FRAME-$SIZE_T*8`($sp) 985 $POP r25,`$FRAME-$SIZE_T*7`($sp) 986 $POP r26,`$FRAME-$SIZE_T*6`($sp) 987 $POP r27,`$FRAME-$SIZE_T*5`($sp) 988 $POP r28,`$FRAME-$SIZE_T*4`($sp) 989 $POP r29,`$FRAME-$SIZE_T*3`($sp) 990 $POP r30,`$FRAME-$SIZE_T*2`($sp) 991 $POP r31,`$FRAME-$SIZE_T*1`($sp) 992 mtlr r0 993 addi $sp,$sp,$FRAME 994 blr 995 .long 0 996 .byte 0,12,4,1,0x80,18,3,0 997 .long 0 998 999.align 5 1000Lppc_AES_decrypt: 1001 lwz $acc00,240($key) 1002 addi $Tbl1,$Tbl0,3 1003 lwz $t0,0($key) 1004 addi $Tbl2,$Tbl0,2 1005 lwz $t1,4($key) 1006 addi $Tbl3,$Tbl0,1 1007 lwz $t2,8($key) 1008 addi $acc00,$acc00,-1 1009 lwz $t3,12($key) 1010 addi $key,$key,16 1011 xor $s0,$s0,$t0 1012 xor $s1,$s1,$t1 1013 xor $s2,$s2,$t2 1014 xor $s3,$s3,$t3 1015 mtctr $acc00 1016.align 4 1017Ldec_loop: 1018 rlwinm $acc00,$s0,`32-24+3`,21,28 1019 rlwinm $acc01,$s1,`32-24+3`,21,28 1020 rlwinm $acc02,$s2,`32-24+3`,21,28 1021 rlwinm $acc03,$s3,`32-24+3`,21,28 1022 lwz $t0,0($key) 1023 rlwinm $acc04,$s3,`32-16+3`,21,28 1024 lwz $t1,4($key) 1025 rlwinm $acc05,$s0,`32-16+3`,21,28 1026 lwz $t2,8($key) 1027 rlwinm $acc06,$s1,`32-16+3`,21,28 1028 lwz $t3,12($key) 1029 rlwinm $acc07,$s2,`32-16+3`,21,28 1030 lwzx $acc00,$Tbl0,$acc00 1031 rlwinm $acc08,$s2,`32-8+3`,21,28 1032 lwzx $acc01,$Tbl0,$acc01 1033 rlwinm $acc09,$s3,`32-8+3`,21,28 1034 lwzx $acc02,$Tbl0,$acc02 1035 rlwinm $acc10,$s0,`32-8+3`,21,28 1036 lwzx $acc03,$Tbl0,$acc03 1037 rlwinm $acc11,$s1,`32-8+3`,21,28 1038 lwzx $acc04,$Tbl1,$acc04 1039 rlwinm $acc12,$s1,`0+3`,21,28 1040 lwzx $acc05,$Tbl1,$acc05 1041 rlwinm $acc13,$s2,`0+3`,21,28 1042 lwzx $acc06,$Tbl1,$acc06 1043 rlwinm $acc14,$s3,`0+3`,21,28 1044 lwzx $acc07,$Tbl1,$acc07 1045 rlwinm $acc15,$s0,`0+3`,21,28 1046 lwzx $acc08,$Tbl2,$acc08 1047 xor $t0,$t0,$acc00 1048 lwzx $acc09,$Tbl2,$acc09 1049 xor $t1,$t1,$acc01 1050 lwzx $acc10,$Tbl2,$acc10 1051 xor $t2,$t2,$acc02 1052 lwzx $acc11,$Tbl2,$acc11 1053 xor $t3,$t3,$acc03 1054 lwzx $acc12,$Tbl3,$acc12 1055 xor $t0,$t0,$acc04 1056 lwzx $acc13,$Tbl3,$acc13 1057 xor $t1,$t1,$acc05 1058 lwzx $acc14,$Tbl3,$acc14 1059 xor $t2,$t2,$acc06 1060 lwzx $acc15,$Tbl3,$acc15 1061 xor $t3,$t3,$acc07 1062 xor $t0,$t0,$acc08 1063 xor $t1,$t1,$acc09 1064 xor $t2,$t2,$acc10 1065 xor $t3,$t3,$acc11 1066 xor $s0,$t0,$acc12 1067 xor $s1,$t1,$acc13 1068 xor $s2,$t2,$acc14 1069 xor $s3,$t3,$acc15 1070 addi $key,$key,16 1071 bdnz Ldec_loop 1072 1073 addi $Tbl2,$Tbl0,2048 1074 nop 1075 lwz $t0,0($key) 1076 rlwinm $acc00,$s0,`32-24`,24,31 1077 lwz $t1,4($key) 1078 rlwinm $acc01,$s1,`32-24`,24,31 1079 lwz $t2,8($key) 1080 rlwinm $acc02,$s2,`32-24`,24,31 1081 lwz $t3,12($key) 1082 rlwinm $acc03,$s3,`32-24`,24,31 1083 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 1084 rlwinm $acc04,$s3,`32-16`,24,31 1085 lwz $acc09,`2048+32`($Tbl0) 1086 rlwinm $acc05,$s0,`32-16`,24,31 1087 lwz $acc10,`2048+64`($Tbl0) 1088 lbzx $acc00,$Tbl2,$acc00 1089 lwz $acc11,`2048+96`($Tbl0) 1090 lbzx $acc01,$Tbl2,$acc01 1091 lwz $acc12,`2048+128`($Tbl0) 1092 rlwinm $acc06,$s1,`32-16`,24,31 1093 lwz $acc13,`2048+160`($Tbl0) 1094 rlwinm $acc07,$s2,`32-16`,24,31 1095 lwz $acc14,`2048+192`($Tbl0) 1096 rlwinm $acc08,$s2,`32-8`,24,31 1097 lwz $acc15,`2048+224`($Tbl0) 1098 rlwinm $acc09,$s3,`32-8`,24,31 1099 lbzx $acc02,$Tbl2,$acc02 1100 rlwinm $acc10,$s0,`32-8`,24,31 1101 lbzx $acc03,$Tbl2,$acc03 1102 rlwinm $acc11,$s1,`32-8`,24,31 1103 lbzx $acc04,$Tbl2,$acc04 1104 rlwinm $acc12,$s1,`0`,24,31 1105 lbzx $acc05,$Tbl2,$acc05 1106 rlwinm $acc13,$s2,`0`,24,31 1107 lbzx $acc06,$Tbl2,$acc06 1108 rlwinm $acc14,$s3,`0`,24,31 1109 lbzx $acc07,$Tbl2,$acc07 1110 rlwinm $acc15,$s0,`0`,24,31 1111 lbzx $acc08,$Tbl2,$acc08 1112 rlwinm $s0,$acc00,24,0,7 1113 lbzx $acc09,$Tbl2,$acc09 1114 rlwinm $s1,$acc01,24,0,7 1115 lbzx $acc10,$Tbl2,$acc10 1116 rlwinm $s2,$acc02,24,0,7 1117 lbzx $acc11,$Tbl2,$acc11 1118 rlwinm $s3,$acc03,24,0,7 1119 lbzx $acc12,$Tbl2,$acc12 1120 rlwimi $s0,$acc04,16,8,15 1121 lbzx $acc13,$Tbl2,$acc13 1122 rlwimi $s1,$acc05,16,8,15 1123 lbzx $acc14,$Tbl2,$acc14 1124 rlwimi $s2,$acc06,16,8,15 1125 lbzx $acc15,$Tbl2,$acc15 1126 rlwimi $s3,$acc07,16,8,15 1127 rlwimi $s0,$acc08,8,16,23 1128 rlwimi $s1,$acc09,8,16,23 1129 rlwimi $s2,$acc10,8,16,23 1130 rlwimi $s3,$acc11,8,16,23 1131 or $s0,$s0,$acc12 1132 or $s1,$s1,$acc13 1133 or $s2,$s2,$acc14 1134 or $s3,$s3,$acc15 1135 xor $s0,$s0,$t0 1136 xor $s1,$s1,$t1 1137 xor $s2,$s2,$t2 1138 xor $s3,$s3,$t3 1139 blr 1140 .long 0 1141 .byte 0,12,0x14,0,0,0,0,0 1142 1143.align 4 1144Lppc_AES_decrypt_compact: 1145 lwz $acc00,240($key) 1146 addi $Tbl1,$Tbl0,2048 1147 lwz $t0,0($key) 1148 lis $mask80,0x8080 1149 lwz $t1,4($key) 1150 lis $mask1b,0x1b1b 1151 lwz $t2,8($key) 1152 ori $mask80,$mask80,0x8080 1153 lwz $t3,12($key) 1154 ori $mask1b,$mask1b,0x1b1b 1155 addi $key,$key,16 1156___ 1157$code.=<<___ if ($SIZE_T==8); 1158 insrdi $mask80,$mask80,32,0 1159 insrdi $mask1b,$mask1b,32,0 1160___ 1161$code.=<<___; 1162 mtctr $acc00 1163.align 4 1164Ldec_compact_loop: 1165 xor $s0,$s0,$t0 1166 xor $s1,$s1,$t1 1167 rlwinm $acc00,$s0,`32-24`,24,31 1168 xor $s2,$s2,$t2 1169 rlwinm $acc01,$s1,`32-24`,24,31 1170 xor $s3,$s3,$t3 1171 rlwinm $acc02,$s2,`32-24`,24,31 1172 rlwinm $acc03,$s3,`32-24`,24,31 1173 rlwinm $acc04,$s3,`32-16`,24,31 1174 rlwinm $acc05,$s0,`32-16`,24,31 1175 rlwinm $acc06,$s1,`32-16`,24,31 1176 rlwinm $acc07,$s2,`32-16`,24,31 1177 lbzx $acc00,$Tbl1,$acc00 1178 rlwinm $acc08,$s2,`32-8`,24,31 1179 lbzx $acc01,$Tbl1,$acc01 1180 rlwinm $acc09,$s3,`32-8`,24,31 1181 lbzx $acc02,$Tbl1,$acc02 1182 rlwinm $acc10,$s0,`32-8`,24,31 1183 lbzx $acc03,$Tbl1,$acc03 1184 rlwinm $acc11,$s1,`32-8`,24,31 1185 lbzx $acc04,$Tbl1,$acc04 1186 rlwinm $acc12,$s1,`0`,24,31 1187 lbzx $acc05,$Tbl1,$acc05 1188 rlwinm $acc13,$s2,`0`,24,31 1189 lbzx $acc06,$Tbl1,$acc06 1190 rlwinm $acc14,$s3,`0`,24,31 1191 lbzx $acc07,$Tbl1,$acc07 1192 rlwinm $acc15,$s0,`0`,24,31 1193 lbzx $acc08,$Tbl1,$acc08 1194 rlwinm $s0,$acc00,24,0,7 1195 lbzx $acc09,$Tbl1,$acc09 1196 rlwinm $s1,$acc01,24,0,7 1197 lbzx $acc10,$Tbl1,$acc10 1198 rlwinm $s2,$acc02,24,0,7 1199 lbzx $acc11,$Tbl1,$acc11 1200 rlwinm $s3,$acc03,24,0,7 1201 lbzx $acc12,$Tbl1,$acc12 1202 rlwimi $s0,$acc04,16,8,15 1203 lbzx $acc13,$Tbl1,$acc13 1204 rlwimi $s1,$acc05,16,8,15 1205 lbzx $acc14,$Tbl1,$acc14 1206 rlwimi $s2,$acc06,16,8,15 1207 lbzx $acc15,$Tbl1,$acc15 1208 rlwimi $s3,$acc07,16,8,15 1209 rlwimi $s0,$acc08,8,16,23 1210 rlwimi $s1,$acc09,8,16,23 1211 rlwimi $s2,$acc10,8,16,23 1212 rlwimi $s3,$acc11,8,16,23 1213 lwz $t0,0($key) 1214 or $s0,$s0,$acc12 1215 lwz $t1,4($key) 1216 or $s1,$s1,$acc13 1217 lwz $t2,8($key) 1218 or $s2,$s2,$acc14 1219 lwz $t3,12($key) 1220 or $s3,$s3,$acc15 1221 1222 addi $key,$key,16 1223 bdz Ldec_compact_done 1224___ 1225$code.=<<___ if ($SIZE_T==8); 1226 # vectorized permutation improves decrypt performance by 10% 1227 insrdi $s0,$s1,32,0 1228 insrdi $s2,$s3,32,0 1229 1230 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1231 and $acc02,$s2,$mask80 1232 srdi $acc04,$acc00,7 # r1>>7 1233 srdi $acc06,$acc02,7 1234 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1235 andc $acc10,$s2,$mask80 1236 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1237 sub $acc02,$acc02,$acc06 1238 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1239 add $acc10,$acc10,$acc10 1240 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1241 and $acc02,$acc02,$mask1b 1242 xor $acc00,$acc00,$acc08 # r2 1243 xor $acc02,$acc02,$acc10 1244 1245 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1246 and $acc06,$acc02,$mask80 1247 srdi $acc08,$acc04,7 # r1>>7 1248 srdi $acc10,$acc06,7 1249 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1250 andc $acc14,$acc02,$mask80 1251 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1252 sub $acc06,$acc06,$acc10 1253 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1254 add $acc14,$acc14,$acc14 1255 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1256 and $acc06,$acc06,$mask1b 1257 xor $acc04,$acc04,$acc12 # r4 1258 xor $acc06,$acc06,$acc14 1259 1260 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1261 and $acc10,$acc06,$mask80 1262 srdi $acc12,$acc08,7 # r1>>7 1263 srdi $acc14,$acc10,7 1264 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1265 sub $acc10,$acc10,$acc14 1266 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1267 andc $acc14,$acc06,$mask80 1268 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1269 add $acc14,$acc14,$acc14 1270 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1271 and $acc10,$acc10,$mask1b 1272 xor $acc08,$acc08,$acc12 # r8 1273 xor $acc10,$acc10,$acc14 1274 1275 xor $acc00,$acc00,$s0 # r2^r0 1276 xor $acc02,$acc02,$s2 1277 xor $acc04,$acc04,$s0 # r4^r0 1278 xor $acc06,$acc06,$s2 1279 1280 extrdi $acc01,$acc00,32,0 1281 extrdi $acc03,$acc02,32,0 1282 extrdi $acc05,$acc04,32,0 1283 extrdi $acc07,$acc06,32,0 1284 extrdi $acc09,$acc08,32,0 1285 extrdi $acc11,$acc10,32,0 1286___ 1287$code.=<<___ if ($SIZE_T==4); 1288 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1289 and $acc01,$s1,$mask80 1290 and $acc02,$s2,$mask80 1291 and $acc03,$s3,$mask80 1292 srwi $acc04,$acc00,7 # r1>>7 1293 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1294 srwi $acc05,$acc01,7 1295 andc $acc09,$s1,$mask80 1296 srwi $acc06,$acc02,7 1297 andc $acc10,$s2,$mask80 1298 srwi $acc07,$acc03,7 1299 andc $acc11,$s3,$mask80 1300 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1301 sub $acc01,$acc01,$acc05 1302 sub $acc02,$acc02,$acc06 1303 sub $acc03,$acc03,$acc07 1304 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1305 add $acc09,$acc09,$acc09 1306 add $acc10,$acc10,$acc10 1307 add $acc11,$acc11,$acc11 1308 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1309 and $acc01,$acc01,$mask1b 1310 and $acc02,$acc02,$mask1b 1311 and $acc03,$acc03,$mask1b 1312 xor $acc00,$acc00,$acc08 # r2 1313 xor $acc01,$acc01,$acc09 1314 xor $acc02,$acc02,$acc10 1315 xor $acc03,$acc03,$acc11 1316 1317 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1318 and $acc05,$acc01,$mask80 1319 and $acc06,$acc02,$mask80 1320 and $acc07,$acc03,$mask80 1321 srwi $acc08,$acc04,7 # r1>>7 1322 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1323 srwi $acc09,$acc05,7 1324 andc $acc13,$acc01,$mask80 1325 srwi $acc10,$acc06,7 1326 andc $acc14,$acc02,$mask80 1327 srwi $acc11,$acc07,7 1328 andc $acc15,$acc03,$mask80 1329 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1330 sub $acc05,$acc05,$acc09 1331 sub $acc06,$acc06,$acc10 1332 sub $acc07,$acc07,$acc11 1333 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1334 add $acc13,$acc13,$acc13 1335 add $acc14,$acc14,$acc14 1336 add $acc15,$acc15,$acc15 1337 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1338 and $acc05,$acc05,$mask1b 1339 and $acc06,$acc06,$mask1b 1340 and $acc07,$acc07,$mask1b 1341 xor $acc04,$acc04,$acc12 # r4 1342 xor $acc05,$acc05,$acc13 1343 xor $acc06,$acc06,$acc14 1344 xor $acc07,$acc07,$acc15 1345 1346 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1347 and $acc09,$acc05,$mask80 1348 srwi $acc12,$acc08,7 # r1>>7 1349 and $acc10,$acc06,$mask80 1350 srwi $acc13,$acc09,7 1351 and $acc11,$acc07,$mask80 1352 srwi $acc14,$acc10,7 1353 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1354 srwi $acc15,$acc11,7 1355 sub $acc09,$acc09,$acc13 1356 sub $acc10,$acc10,$acc14 1357 sub $acc11,$acc11,$acc15 1358 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1359 andc $acc13,$acc05,$mask80 1360 andc $acc14,$acc06,$mask80 1361 andc $acc15,$acc07,$mask80 1362 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1363 add $acc13,$acc13,$acc13 1364 add $acc14,$acc14,$acc14 1365 add $acc15,$acc15,$acc15 1366 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1367 and $acc09,$acc09,$mask1b 1368 and $acc10,$acc10,$mask1b 1369 and $acc11,$acc11,$mask1b 1370 xor $acc08,$acc08,$acc12 # r8 1371 xor $acc09,$acc09,$acc13 1372 xor $acc10,$acc10,$acc14 1373 xor $acc11,$acc11,$acc15 1374 1375 xor $acc00,$acc00,$s0 # r2^r0 1376 xor $acc01,$acc01,$s1 1377 xor $acc02,$acc02,$s2 1378 xor $acc03,$acc03,$s3 1379 xor $acc04,$acc04,$s0 # r4^r0 1380 xor $acc05,$acc05,$s1 1381 xor $acc06,$acc06,$s2 1382 xor $acc07,$acc07,$s3 1383___ 1384$code.=<<___; 1385 rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1386 rotrwi $s1,$s1,8 1387 xor $s0,$s0,$acc00 # ^= r2^r0 1388 rotrwi $s2,$s2,8 1389 xor $s1,$s1,$acc01 1390 rotrwi $s3,$s3,8 1391 xor $s2,$s2,$acc02 1392 xor $s3,$s3,$acc03 1393 xor $acc00,$acc00,$acc08 1394 xor $acc01,$acc01,$acc09 1395 xor $acc02,$acc02,$acc10 1396 xor $acc03,$acc03,$acc11 1397 xor $s0,$s0,$acc04 # ^= r4^r0 1398 rotrwi $acc00,$acc00,24 1399 xor $s1,$s1,$acc05 1400 rotrwi $acc01,$acc01,24 1401 xor $s2,$s2,$acc06 1402 rotrwi $acc02,$acc02,24 1403 xor $s3,$s3,$acc07 1404 rotrwi $acc03,$acc03,24 1405 xor $acc04,$acc04,$acc08 1406 xor $acc05,$acc05,$acc09 1407 xor $acc06,$acc06,$acc10 1408 xor $acc07,$acc07,$acc11 1409 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1410 rotrwi $acc04,$acc04,16 1411 xor $s1,$s1,$acc09 1412 rotrwi $acc05,$acc05,16 1413 xor $s2,$s2,$acc10 1414 rotrwi $acc06,$acc06,16 1415 xor $s3,$s3,$acc11 1416 rotrwi $acc07,$acc07,16 1417 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1418 rotrwi $acc08,$acc08,8 1419 xor $s1,$s1,$acc01 1420 rotrwi $acc09,$acc09,8 1421 xor $s2,$s2,$acc02 1422 rotrwi $acc10,$acc10,8 1423 xor $s3,$s3,$acc03 1424 rotrwi $acc11,$acc11,8 1425 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1426 xor $s1,$s1,$acc05 1427 xor $s2,$s2,$acc06 1428 xor $s3,$s3,$acc07 1429 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) 1430 xor $s1,$s1,$acc09 1431 xor $s2,$s2,$acc10 1432 xor $s3,$s3,$acc11 1433 1434 b Ldec_compact_loop 1435.align 4 1436Ldec_compact_done: 1437 xor $s0,$s0,$t0 1438 xor $s1,$s1,$t1 1439 xor $s2,$s2,$t2 1440 xor $s3,$s3,$t3 1441 blr 1442 .long 0 1443 .byte 0,12,0x14,0,0,0,0,0 1444.size .AES_decrypt,.-.AES_decrypt 1445 1446.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" 1447.align 7 1448___ 1449 1450$code =~ s/\`([^\`]*)\`/eval $1/gem; 1451print $code; 1452close STDOUT; 1453