1#! /usr/bin/env perl 2# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# This module implements Poly1305 hash for s390x. 18# 19# June 2015 20# 21# ~6.6/2.3 cpb on z10/z196+, >2x improvement over compiler-generated 22# code. For older compiler improvement coefficient is >3x, because 23# then base 2^64 and base 2^32 implementations are compared. 24# 25# On side note, z13 enables vector base 2^26 implementation... 26 27$flavour = shift; 28 29if ($flavour =~ /3[12]/) { 30 $SIZE_T=4; 31 $g=""; 32} else { 33 $SIZE_T=8; 34 $g="g"; 35} 36 37while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} 38open STDOUT,">$output"; 39 40$sp="%r15"; 41 42my ($ctx,$inp,$len,$padbit) = map("%r$_",(2..5)); 43 44$code.=<<___; 45.text 46 47.globl poly1305_init 48.type poly1305_init,\@function 49.align 16 50poly1305_init: 51 lghi %r0,0 52 lghi %r1,-1 53 stg %r0,0($ctx) # zero hash value 54 stg %r0,8($ctx) 55 stg %r0,16($ctx) 56 57 cl${g}r $inp,%r0 58 je .Lno_key 59 60 lrvg %r4,0($inp) # load little-endian key 61 lrvg %r5,8($inp) 62 63 nihl %r1,0xffc0 # 0xffffffc0ffffffff 64 srlg %r0,%r1,4 # 0x0ffffffc0fffffff 65 srlg %r1,%r1,4 66 nill %r1,0xfffc # 0x0ffffffc0ffffffc 67 68 ngr %r4,%r0 69 ngr %r5,%r1 70 71 stg %r4,32($ctx) 72 stg %r5,40($ctx) 73 74.Lno_key: 75 lghi %r2,0 76 br %r14 77.size poly1305_init,.-poly1305_init 78___ 79{ 80my ($d0hi,$d0lo,$d1hi,$d1lo,$t0,$h0,$t1,$h1,$h2) = map("%r$_",(6..14)); 81my ($r0,$r1,$s1) = map("%r$_",(0..2)); 82 83$code.=<<___; 84.globl poly1305_blocks 85.type poly1305_blocks,\@function 86.align 16 87poly1305_blocks: 88 srl${g} $len,4 # fixed-up in 64-bit build 89 lghi %r0,0 90 cl${g}r $len,%r0 91 je .Lno_data 92 93 stm${g} %r6,%r14,`6*$SIZE_T`($sp) 94 95 llgfr $padbit,$padbit # clear upper half, much needed with 96 # non-64-bit ABI 97 lg $r0,32($ctx) # load key 98 lg $r1,40($ctx) 99 100 lg $h0,0($ctx) # load hash value 101 lg $h1,8($ctx) 102 lg $h2,16($ctx) 103 104 st$g $ctx,`2*$SIZE_T`($sp) # off-load $ctx 105 srlg $s1,$r1,2 106 algr $s1,$r1 # s1 = r1 + r1>>2 107 j .Loop 108 109.align 16 110.Loop: 111 lrvg $d0lo,0($inp) # load little-endian input 112 lrvg $d1lo,8($inp) 113 la $inp,16($inp) 114 115 algr $d0lo,$h0 # accumulate input 116 alcgr $d1lo,$h1 117 118 lgr $h0,$d0lo 119 mlgr $d0hi,$r0 # h0*r0 -> $d0hi:$d0lo 120 lgr $h1,$d1lo 121 mlgr $d1hi,$s1 # h1*5*r1 -> $d1hi:$d1lo 122 123 mlgr $t0,$r1 # h0*r1 -> $t0:$h0 124 mlgr $t1,$r0 # h1*r0 -> $t1:$h1 125 alcgr $h2,$padbit 126 127 algr $d0lo,$d1lo 128 lgr $d1lo,$h2 129 alcgr $d0hi,$d1hi 130 lghi $d1hi,0 131 132 algr $h1,$h0 133 alcgr $t1,$t0 134 135 msgr $d1lo,$s1 # h2*s1 136 msgr $h2,$r0 # h2*r0 137 138 algr $h1,$d1lo 139 alcgr $t1,$d1hi # $d1hi is zero 140 141 algr $h1,$d0hi 142 alcgr $h2,$t1 143 144 lghi $h0,-4 # final reduction step 145 ngr $h0,$h2 146 srlg $t0,$h2,2 147 algr $h0,$t0 148 lghi $t1,3 149 ngr $h2,$t1 150 151 algr $h0,$d0lo 152 alcgr $h1,$d1hi # $d1hi is still zero 153 alcgr $h2,$d1hi # $d1hi is still zero 154 155 brct$g $len,.Loop 156 157 l$g $ctx,`2*$SIZE_T`($sp) # restore $ctx 158 159 stg $h0,0($ctx) # store hash value 160 stg $h1,8($ctx) 161 stg $h2,16($ctx) 162 163 lm${g} %r6,%r14,`6*$SIZE_T`($sp) 164.Lno_data: 165 br %r14 166.size poly1305_blocks,.-poly1305_blocks 167___ 168} 169{ 170my ($mac,$nonce)=($inp,$len); 171my ($h0,$h1,$h2,$d0,$d1)=map("%r$_",(5..9)); 172 173$code.=<<___; 174.globl poly1305_emit 175.type poly1305_emit,\@function 176.align 16 177poly1305_emit: 178 stm${g} %r6,%r9,`6*$SIZE_T`($sp) 179 180 lg $h0,0($ctx) 181 lg $h1,8($ctx) 182 lg $h2,16($ctx) 183 184 lghi %r0,5 185 lghi %r1,0 186 lgr $d0,$h0 187 lgr $d1,$h1 188 189 algr $h0,%r0 # compare to modulus 190 alcgr $h1,%r1 191 alcgr $h2,%r1 192 193 srlg $h2,$h2,2 # did it borrow/carry? 194 slgr %r1,$h2 # 0-$h2>>2 195 lg $h2,0($nonce) # load nonce 196 lghi %r0,-1 197 lg $ctx,8($nonce) 198 xgr %r0,%r1 # ~%r1 199 200 ngr $h0,%r1 201 ngr $d0,%r0 202 ngr $h1,%r1 203 ngr $d1,%r0 204 ogr $h0,$d0 205 rllg $d0,$h2,32 # flip nonce words 206 ogr $h1,$d1 207 rllg $d1,$ctx,32 208 209 algr $h0,$d0 # accumulate nonce 210 alcgr $h1,$d1 211 212 strvg $h0,0($mac) # write little-endian result 213 strvg $h1,8($mac) 214 215 lm${g} %r6,%r9,`6*$SIZE_T`($sp) 216 br %r14 217.size poly1305_emit,.-poly1305_emit 218 219.string "Poly1305 for s390x, CRYPTOGAMS by <appro\@openssl.org>" 220___ 221} 222 223$code =~ s/\`([^\`]*)\`/eval $1/gem; 224$code =~ s/\b(srlg\s+)(%r[0-9]+\s*,)\s*([0-9]+)/$1$2$2$3/gm; 225 226print $code; 227close STDOUT; 228