Lines Matching +full:a +full:- +full:h

2 # Implement fast SHA-256 with SSSE3 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
41 # This code is described in an Intel White-Paper:
42 # "Fast SHA-256 Implementations on Intel Architecture Processors"
49 #include <linux/linkage.h>
50 #include <linux/cfi_types.h>
58 # Add reg to mem using reg-mem add and store
87 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
88 SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
100 a = %eax define
105 h = %r11d define
135 # Rotate values of symbols a...h
137 TMP_ = h
138 h = g define
144 b = a
145 a = TMP_ define
149 ## compute s0 four at a time and s1 two at a time
150 ## compute W[-16] + W[-7] 4 at a time
153 ror $(25-11), y0 # y0 = e >> (25-11)
154 mov a, y1 # y1 = a
155 palignr $4, X2, XTMP0 # XTMP0 = W[-7]
156 ror $(22-13), y1 # y1 = a >> (22-13)
157 xor e, y0 # y0 = e ^ (e >> (25-11))
159 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
161 xor a, y1 # y1 = a ^ (a >> (22-13)
163 paddd X0, XTMP0 # XTMP0 = W[-7] + W[-16]
164 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
166 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
168 palignr $4, X0, XTMP1 # XTMP1 = W[-15]
169 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
172 movdqa XTMP1, XTMP2 # XTMP2 = W[-15]
173 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
176 movdqa XTMP1, XTMP3 # XTMP3 = W[-15]
177 mov a, y0 # y0 = a
178 add y2, h # h = h + S1 + CH + k + w
179 mov a, y2 # y2 = a
180 pslld $(32-7), XTMP1 #
181 or c, y0 # y0 = a|c
182 add h, d # d = d + h + S1 + CH + k + w
183 and c, y2 # y2 = a&c
185 and b, y0 # y0 = (a|c)&b
186 add y1, h # h = h + S1 + CH + k + w + S0
187 por XTMP2, XTMP1 # XTMP1 = W[-15] ror 7
188 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
189 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
192 movdqa XTMP3, XTMP2 # XTMP2 = W[-15]
194 mov a, y1 # y1 = a
195 movdqa XTMP3, XTMP4 # XTMP4 = W[-15]
196 ror $(25-11), y0 # y0 = e >> (25-11)
197 xor e, y0 # y0 = e ^ (e >> (25-11))
199 ror $(22-13), y1 # y1 = a >> (22-13)
200 pslld $(32-18), XTMP3 #
201 xor a, y1 # y1 = a ^ (a >> (22-13)
202 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
205 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
206 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
212 psrld $3, XTMP4 # XTMP4 = W[-15] >> 3
215 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 pxor XTMP2, XTMP1 # XTMP1 = W[-15] ror 7 ^ W[-15] ror 18
217 mov a, y0 # y0 = a
218 add y2, h # h = h + S1 + CH + k + w
219 mov a, y2 # y2 = a
221 or c, y0 # y0 = a|c
222 add h, d # d = d + h + S1 + CH + k + w
223 and c, y2 # y2 = a&c
225 pshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
226 and b, y0 # y0 = (a|c)&b
227 add y1, h # h = h + S1 + CH + k + w + S0
228 paddd XTMP1, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
229 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
230 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
233 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {BBAA}
235 mov a, y1 # y1 = a
236 ror $(25-11), y0 # y0 = e >> (25-11)
237 movdqa XTMP2, XTMP4 # XTMP4 = W[-2] {BBAA}
238 xor e, y0 # y0 = e ^ (e >> (25-11))
239 ror $(22-13), y1 # y1 = a >> (22-13)
241 xor a, y1 # y1 = a ^ (a >> (22-13)
242 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
243 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xBxA}
245 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xBxA}
246 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
248 psrld $10, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
249 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
250 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
255 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
258 mov a, y0 # y0 = a
259 add y2, h # h = h + S1 + CH + k + w
260 mov a, y2 # y2 = a
262 or c, y0 # y0 = a|c
263 add h, d # d = d + h + S1 + CH + k + w
264 and c, y2 # y2 = a&c
266 and b, y0 # y0 = (a|c)&b
267 add y1, h # h = h + S1 + CH + k + w + S0
269 pshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {BBAA}
270 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
271 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
274 movdqa XTMP2, XTMP3 # XTMP3 = W[-2] {DDCC}
276 ror $(25-11), y0 # y0 = e >> (25-11)
277 mov a, y1 # y1 = a
278 movdqa XTMP2, X0 # X0 = W[-2] {DDCC}
279 ror $(22-13), y1 # y1 = a >> (22-13)
280 xor e, y0 # y0 = e ^ (e >> (25-11))
282 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
283 psrlq $17, XTMP2 # XTMP2 = W[-2] ror 17 {xDxC}
284 xor a, y1 # y1 = a ^ (a >> (22-13)
286 psrlq $19, XTMP3 # XTMP3 = W[-2] ror 19 {xDxC}
287 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25
289 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
290 psrld $10, X0 # X0 = W[-2] >> 10 {DDCC}
291 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22
295 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>2
299 mov a, y0 # y0 = a
300 add y2, h # h = h + S1 + CH + k + w
301 mov a, y2 # y2 = a
303 or c, y0 # y0 = a|c
304 add h, d # d = d + h + S1 + CH + k + w
305 and c, y2 # y2 = a&c
307 and b, y0 # y0 = (a|c)&b
308 add y1, h # h = h + S1 + CH + k + w + S0
309 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
310 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
319 ror $(25-11), y0 # y0 = e >> (25-11)
320 mov a, y1 # y1 = a
321 xor e, y0 # y0 = e ^ (e >> (25-11))
322 ror $(22-13), y1 # y1 = a >> (22-13)
324 xor a, y1 # y1 = a ^ (a >> (22-13)
325 ror $(11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
327 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
328 ror $(13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
330 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
334 ror $2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
337 mov a, y0 # y0 = a
338 add y2, h # h = h + S1 + CH + k + w
339 mov a, y2 # y2 = a
340 or c, y0 # y0 = a|c
341 add h, d # d = d + h + S1 + CH + k + w
342 and c, y2 # y2 = a&c
343 and b, y0 # y0 = (a|c)&b
344 add y1, h # h = h + S1 + CH + k + w + S0
345 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
346 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
377 mov 4*0(CTX), a
384 mov 4*7(CTX), h
451 addm (4*0)(CTX),a
458 addm (4*7)(CTX),h
505 # shuffle xBxA -> 00BA
511 # shuffle xDxC -> DC00