xref: /freebsd/crypto/openssl/engines/asm/e_padlock-x86.pl (revision b077aed33b7b6aefca7b17ddb250cf521f938613)
1e71b7053SJung-uk Kim#! /usr/bin/env perl
2640242a5SJung-uk Kim# Copyright 2011-2023 The OpenSSL Project Authors. All Rights Reserved.
3e71b7053SJung-uk Kim#
4*b077aed3SPierre Pronchery# Licensed under the Apache License 2.0 (the "License").  You may not use
5e71b7053SJung-uk Kim# this file except in compliance with the License.  You can obtain a copy
6e71b7053SJung-uk Kim# in the file LICENSE in the source distribution or at
7e71b7053SJung-uk Kim# https://www.openssl.org/source/license.html
8e71b7053SJung-uk Kim
9e71b7053SJung-uk Kim
10e71b7053SJung-uk Kim# ====================================================================
11e71b7053SJung-uk Kim# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12e71b7053SJung-uk Kim# project. The module is, however, dual licensed under OpenSSL and
13e71b7053SJung-uk Kim# CRYPTOGAMS licenses depending on where you obtain it. For further
14e71b7053SJung-uk Kim# details see http://www.openssl.org/~appro/cryptogams/.
15e71b7053SJung-uk Kim# ====================================================================
16e71b7053SJung-uk Kim
17e71b7053SJung-uk Kim# September 2011
18e71b7053SJung-uk Kim#
19e71b7053SJung-uk Kim# Assembler helpers for Padlock engine. Compared to original engine
20e71b7053SJung-uk Kim# version relying on inline assembler and compiled with gcc 3.4.6 it
21e71b7053SJung-uk Kim# was measured to provide ~100% improvement on misaligned data in ECB
22e71b7053SJung-uk Kim# mode and ~75% in CBC mode. For aligned data improvement can be
23e71b7053SJung-uk Kim# observed for short inputs only, e.g. 45% for 64-byte messages in
24e71b7053SJung-uk Kim# ECB mode, 20% in CBC. Difference in performance for aligned vs.
25e71b7053SJung-uk Kim# misaligned data depends on misalignment and is either ~1.8x or 2.9x.
26e71b7053SJung-uk Kim# These are approximately same factors as for hardware support, so
27e71b7053SJung-uk Kim# there is little reason to rely on the latter. On the contrary, it
28e71b7053SJung-uk Kim# might actually hurt performance in mixture of aligned and misaligned
29e71b7053SJung-uk Kim# buffers, because a) if you choose to flip 'align' flag in control
30e71b7053SJung-uk Kim# word on per-buffer basis, then you'd have to reload key context,
31e71b7053SJung-uk Kim# which incurs penalty; b) if you choose to set 'align' flag
32e71b7053SJung-uk Kim# permanently, it limits performance even for aligned data to ~1/2.
33e71b7053SJung-uk Kim# All above mentioned results were collected on 1.5GHz C7. Nano on the
34e71b7053SJung-uk Kim# other hand handles unaligned data more gracefully. Depending on
35e71b7053SJung-uk Kim# algorithm and how unaligned data is, hardware can be up to 70% more
36e71b7053SJung-uk Kim# efficient than below software alignment procedures, nor does 'align'
37e71b7053SJung-uk Kim# flag have affect on aligned performance [if has any meaning at all].
38e71b7053SJung-uk Kim# Therefore suggestion is to unconditionally set 'align' flag on Nano
39e71b7053SJung-uk Kim# for optimal performance.
40e71b7053SJung-uk Kim
41e71b7053SJung-uk Kim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
42e71b7053SJung-uk Kimpush(@INC,"${dir}","${dir}../../crypto/perlasm");
43e71b7053SJung-uk Kimrequire "x86asm.pl";
44e71b7053SJung-uk Kim
45*b077aed3SPierre Pronchery$output=pop and open STDOUT,">$output";
46e71b7053SJung-uk Kim
47e71b7053SJung-uk Kim&asm_init($ARGV[0]);
48e71b7053SJung-uk Kim
49e71b7053SJung-uk Kim%PADLOCK_PREFETCH=(ecb=>128, cbc=>64);	# prefetch errata
50e71b7053SJung-uk Kim$PADLOCK_CHUNK=512;	# Must be a power of 2 larger than 16
51e71b7053SJung-uk Kim
52e71b7053SJung-uk Kim$ctx="edx";
53e71b7053SJung-uk Kim$out="edi";
54e71b7053SJung-uk Kim$inp="esi";
55e71b7053SJung-uk Kim$len="ecx";
56e71b7053SJung-uk Kim$chunk="ebx";
57e71b7053SJung-uk Kim
58e71b7053SJung-uk Kim&function_begin_B("padlock_capability");
59e71b7053SJung-uk Kim	&push	("ebx");
60e71b7053SJung-uk Kim	&pushf	();
61e71b7053SJung-uk Kim	&pop	("eax");
62e71b7053SJung-uk Kim	&mov	("ecx","eax");
63e71b7053SJung-uk Kim	&xor	("eax",1<<21);
64e71b7053SJung-uk Kim	&push	("eax");
65e71b7053SJung-uk Kim	&popf	();
66e71b7053SJung-uk Kim	&pushf	();
67e71b7053SJung-uk Kim	&pop	("eax");
68e71b7053SJung-uk Kim	&xor	("ecx","eax");
69e71b7053SJung-uk Kim	&xor	("eax","eax");
70e71b7053SJung-uk Kim	&bt	("ecx",21);
71e71b7053SJung-uk Kim	&jnc	(&label("noluck"));
72e71b7053SJung-uk Kim	&cpuid	();
73e71b7053SJung-uk Kim	&xor	("eax","eax");
74e71b7053SJung-uk Kim	&cmp	("ebx","0x".unpack("H*",'tneC'));
75e71b7053SJung-uk Kim	&jne	(&label("zhaoxin"));
76e71b7053SJung-uk Kim	&cmp	("edx","0x".unpack("H*",'Hrua'));
77e71b7053SJung-uk Kim	&jne	(&label("noluck"));
78e71b7053SJung-uk Kim	&cmp	("ecx","0x".unpack("H*",'slua'));
79e71b7053SJung-uk Kim	&jne	(&label("noluck"));
80e71b7053SJung-uk Kim	&jmp	(&label("zhaoxinEnd"));
81e71b7053SJung-uk Kim&set_label("zhaoxin");
82e71b7053SJung-uk Kim	&cmp	("ebx","0x".unpack("H*",'hS  '));
83e71b7053SJung-uk Kim	&jne	(&label("noluck"));
84e71b7053SJung-uk Kim	&cmp	("edx","0x".unpack("H*",'hgna'));
85e71b7053SJung-uk Kim	&jne	(&label("noluck"));
86e71b7053SJung-uk Kim	&cmp	("ecx","0x".unpack("H*",'  ia'));
87e71b7053SJung-uk Kim	&jne	(&label("noluck"));
88e71b7053SJung-uk Kim&set_label("zhaoxinEnd");
89e71b7053SJung-uk Kim	&mov	("eax",0xC0000000);
90e71b7053SJung-uk Kim	&cpuid	();
91e71b7053SJung-uk Kim	&mov	("edx","eax");
92e71b7053SJung-uk Kim	&xor	("eax","eax");
93e71b7053SJung-uk Kim	&cmp	("edx",0xC0000001);
94e71b7053SJung-uk Kim	&jb	(&label("noluck"));
95e71b7053SJung-uk Kim	&mov	("eax",1);
96e71b7053SJung-uk Kim	&cpuid	();
97e71b7053SJung-uk Kim	&or	("eax",0x0f);
98e71b7053SJung-uk Kim	&xor	("ebx","ebx");
99e71b7053SJung-uk Kim	&and	("eax",0x0fff);
100e71b7053SJung-uk Kim	&cmp	("eax",0x06ff);		# check for Nano
101e71b7053SJung-uk Kim	&sete	("bl");
102e71b7053SJung-uk Kim	&mov	("eax",0xC0000001);
103e71b7053SJung-uk Kim	&push	("ebx");
104e71b7053SJung-uk Kim	&cpuid	();
105e71b7053SJung-uk Kim	&pop	("ebx");
106e71b7053SJung-uk Kim	&mov	("eax","edx");
107e71b7053SJung-uk Kim	&shl	("ebx",4);		# bit#4 denotes Nano
108e71b7053SJung-uk Kim	&and	("eax",0xffffffef);
109e71b7053SJung-uk Kim	&or	("eax","ebx")
110e71b7053SJung-uk Kim&set_label("noluck");
111e71b7053SJung-uk Kim	&pop	("ebx");
112e71b7053SJung-uk Kim	&ret	();
113e71b7053SJung-uk Kim&function_end_B("padlock_capability")
114e71b7053SJung-uk Kim
115e71b7053SJung-uk Kim&function_begin_B("padlock_key_bswap");
116e71b7053SJung-uk Kim	&mov	("edx",&wparam(0));
117e71b7053SJung-uk Kim	&mov	("ecx",&DWP(240,"edx"));
118640242a5SJung-uk Kim	&inc	("ecx");
119640242a5SJung-uk Kim	&shl	("ecx",2);
120e71b7053SJung-uk Kim&set_label("bswap_loop");
121e71b7053SJung-uk Kim	&mov	("eax",&DWP(0,"edx"));
122e71b7053SJung-uk Kim	&bswap	("eax");
123e71b7053SJung-uk Kim	&mov	(&DWP(0,"edx"),"eax");
124e71b7053SJung-uk Kim	&lea	("edx",&DWP(4,"edx"));
125e71b7053SJung-uk Kim	&sub	("ecx",1);
126e71b7053SJung-uk Kim	&jnz	(&label("bswap_loop"));
127e71b7053SJung-uk Kim	&ret	();
128e71b7053SJung-uk Kim&function_end_B("padlock_key_bswap");
129e71b7053SJung-uk Kim
130e71b7053SJung-uk Kim# This is heuristic key context tracing. At first one
131e71b7053SJung-uk Kim# believes that one should use atomic swap instructions,
132e71b7053SJung-uk Kim# but it's not actually necessary. Point is that if
133e71b7053SJung-uk Kim# padlock_saved_context was changed by another thread
134e71b7053SJung-uk Kim# after we've read it and before we compare it with ctx,
135e71b7053SJung-uk Kim# our key *shall* be reloaded upon thread context switch
136e71b7053SJung-uk Kim# and we are therefore set in either case...
137e71b7053SJung-uk Kim&static_label("padlock_saved_context");
138e71b7053SJung-uk Kim
139e71b7053SJung-uk Kim&function_begin_B("padlock_verify_context");
140e71b7053SJung-uk Kim	&mov	($ctx,&wparam(0));
141e71b7053SJung-uk Kim	&lea	("eax",($::win32 or $::coff) ? &DWP(&label("padlock_saved_context")) :
142e71b7053SJung-uk Kim		       &DWP(&label("padlock_saved_context")."-".&label("verify_pic_point")));
143e71b7053SJung-uk Kim	&pushf	();
144e71b7053SJung-uk Kim	&call	("_padlock_verify_ctx");
145e71b7053SJung-uk Kim&set_label("verify_pic_point");
146e71b7053SJung-uk Kim	&lea	("esp",&DWP(4,"esp"));
147e71b7053SJung-uk Kim	&ret	();
148e71b7053SJung-uk Kim&function_end_B("padlock_verify_context");
149e71b7053SJung-uk Kim
150e71b7053SJung-uk Kim&function_begin_B("_padlock_verify_ctx");
151e71b7053SJung-uk Kim	&add	("eax",&DWP(0,"esp")) if(!($::win32 or $::coff));# &padlock_saved_context
152e71b7053SJung-uk Kim	&bt	(&DWP(4,"esp"),30);		# eflags
153e71b7053SJung-uk Kim	&jnc	(&label("verified"));
154e71b7053SJung-uk Kim	&cmp	($ctx,&DWP(0,"eax"));
155e71b7053SJung-uk Kim	&je	(&label("verified"));
156e71b7053SJung-uk Kim	&pushf	();
157e71b7053SJung-uk Kim	&popf	();
158e71b7053SJung-uk Kim&set_label("verified");
159e71b7053SJung-uk Kim	&mov	(&DWP(0,"eax"),$ctx);
160e71b7053SJung-uk Kim	&ret	();
161e71b7053SJung-uk Kim&function_end_B("_padlock_verify_ctx");
162e71b7053SJung-uk Kim
163e71b7053SJung-uk Kim&function_begin_B("padlock_reload_key");
164e71b7053SJung-uk Kim	&pushf	();
165e71b7053SJung-uk Kim	&popf	();
166e71b7053SJung-uk Kim	&ret	();
167e71b7053SJung-uk Kim&function_end_B("padlock_reload_key");
168e71b7053SJung-uk Kim
169e71b7053SJung-uk Kim&function_begin_B("padlock_aes_block");
170e71b7053SJung-uk Kim	&push	("edi");
171e71b7053SJung-uk Kim	&push	("esi");
172e71b7053SJung-uk Kim	&push	("ebx");
173e71b7053SJung-uk Kim	&mov	($out,&wparam(0));		# must be 16-byte aligned
174e71b7053SJung-uk Kim	&mov	($inp,&wparam(1));		# must be 16-byte aligned
175e71b7053SJung-uk Kim	&mov	($ctx,&wparam(2));
176e71b7053SJung-uk Kim	&mov	($len,1);
177e71b7053SJung-uk Kim	&lea	("ebx",&DWP(32,$ctx));		# key
178e71b7053SJung-uk Kim	&lea	($ctx,&DWP(16,$ctx));		# control word
179e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa7,0xc8);	# rep xcryptecb
180e71b7053SJung-uk Kim	&pop	("ebx");
181e71b7053SJung-uk Kim	&pop	("esi");
182e71b7053SJung-uk Kim	&pop	("edi");
183e71b7053SJung-uk Kim	&ret	();
184e71b7053SJung-uk Kim&function_end_B("padlock_aes_block");
185e71b7053SJung-uk Kim
186e71b7053SJung-uk Kimsub generate_mode {
187e71b7053SJung-uk Kimmy ($mode,$opcode) = @_;
188e71b7053SJung-uk Kim# int padlock_$mode_encrypt(void *out, const void *inp,
189e71b7053SJung-uk Kim#		struct padlock_cipher_data *ctx, size_t len);
190e71b7053SJung-uk Kim&function_begin("padlock_${mode}_encrypt");
191e71b7053SJung-uk Kim	&mov	($out,&wparam(0));
192e71b7053SJung-uk Kim	&mov	($inp,&wparam(1));
193e71b7053SJung-uk Kim	&mov	($ctx,&wparam(2));
194e71b7053SJung-uk Kim	&mov	($len,&wparam(3));
195e71b7053SJung-uk Kim	&test	($ctx,15);
196e71b7053SJung-uk Kim	&jnz	(&label("${mode}_abort"));
197e71b7053SJung-uk Kim	&test	($len,15);
198e71b7053SJung-uk Kim	&jnz	(&label("${mode}_abort"));
199e71b7053SJung-uk Kim	&lea	("eax",($::win32 or $::coff) ? &DWP(&label("padlock_saved_context")) :
200e71b7053SJung-uk Kim		       &DWP(&label("padlock_saved_context")."-".&label("${mode}_pic_point")));
201e71b7053SJung-uk Kim	&pushf	();
202e71b7053SJung-uk Kim	&cld	();
203e71b7053SJung-uk Kim	&call	("_padlock_verify_ctx");
204e71b7053SJung-uk Kim&set_label("${mode}_pic_point");
205e71b7053SJung-uk Kim	&lea	($ctx,&DWP(16,$ctx));	# control word
206e71b7053SJung-uk Kim	&xor	("eax","eax");
207e71b7053SJung-uk Kim					if ($mode eq "ctr32") {
208e71b7053SJung-uk Kim	&movq	("mm0",&QWP(-16,$ctx));	# load [upper part of] counter
209e71b7053SJung-uk Kim					} else {
210e71b7053SJung-uk Kim	&xor	("ebx","ebx");
211e71b7053SJung-uk Kim	&test	(&DWP(0,$ctx),1<<5);	# align bit in control word
212e71b7053SJung-uk Kim	&jnz	(&label("${mode}_aligned"));
213e71b7053SJung-uk Kim	&test	($out,0x0f);
214e71b7053SJung-uk Kim	&setz	("al");			# !out_misaligned
215e71b7053SJung-uk Kim	&test	($inp,0x0f);
216e71b7053SJung-uk Kim	&setz	("bl");			# !inp_misaligned
217e71b7053SJung-uk Kim	&test	("eax","ebx");
218e71b7053SJung-uk Kim	&jnz	(&label("${mode}_aligned"));
219e71b7053SJung-uk Kim	&neg	("eax");
220e71b7053SJung-uk Kim					}
221e71b7053SJung-uk Kim	&mov	($chunk,$PADLOCK_CHUNK);
222e71b7053SJung-uk Kim	&not	("eax");		# out_misaligned?-1:0
223e71b7053SJung-uk Kim	&lea	("ebp",&DWP(-24,"esp"));
224e71b7053SJung-uk Kim	&cmp	($len,$chunk);
225e71b7053SJung-uk Kim	&cmovc	($chunk,$len);		# chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
226e71b7053SJung-uk Kim	&and	("eax",$chunk);		# out_misaligned?chunk:0
227e71b7053SJung-uk Kim	&mov	($chunk,$len);
228e71b7053SJung-uk Kim	&neg	("eax");
229e71b7053SJung-uk Kim	&and	($chunk,$PADLOCK_CHUNK-1);	# chunk=len%PADLOCK_CHUNK
230e71b7053SJung-uk Kim	&lea	("esp",&DWP(0,"eax","ebp"));	# alloca
231e71b7053SJung-uk Kim	&mov	("eax",$PADLOCK_CHUNK);
232e71b7053SJung-uk Kim	&cmovz	($chunk,"eax");			# chunk=chunk?:PADLOCK_CHUNK
233e71b7053SJung-uk Kim	&mov	("eax","ebp");
234e71b7053SJung-uk Kim	&and	("ebp",-16);
235e71b7053SJung-uk Kim	&and	("esp",-16);
236e71b7053SJung-uk Kim	&mov	(&DWP(16,"ebp"),"eax");
237e71b7053SJung-uk Kim    if ($PADLOCK_PREFETCH{$mode}) {
238e71b7053SJung-uk Kim	&cmp	($len,$chunk);
239e71b7053SJung-uk Kim	&ja	(&label("${mode}_loop"));
240e71b7053SJung-uk Kim	&mov	("eax",$inp);		# check if prefetch crosses page
241e71b7053SJung-uk Kim	&cmp	("ebp","esp");
242e71b7053SJung-uk Kim	&cmove	("eax",$out);
243e71b7053SJung-uk Kim	&add	("eax",$len);
244e71b7053SJung-uk Kim	&neg	("eax");
245e71b7053SJung-uk Kim	&and	("eax",0xfff);		# distance to page boundary
246e71b7053SJung-uk Kim	&cmp	("eax",$PADLOCK_PREFETCH{$mode});
247e71b7053SJung-uk Kim	&mov	("eax",-$PADLOCK_PREFETCH{$mode});
248e71b7053SJung-uk Kim	&cmovae	("eax",$chunk);		# mask=distance<prefetch?-prefetch:-1
249e71b7053SJung-uk Kim	&and	($chunk,"eax");
250e71b7053SJung-uk Kim	&jz	(&label("${mode}_unaligned_tail"));
251e71b7053SJung-uk Kim    }
252e71b7053SJung-uk Kim	&jmp	(&label("${mode}_loop"));
253e71b7053SJung-uk Kim
254e71b7053SJung-uk Kim&set_label("${mode}_loop",16);
255e71b7053SJung-uk Kim	&mov	(&DWP(0,"ebp"),$out);		# save parameters
256e71b7053SJung-uk Kim	&mov	(&DWP(4,"ebp"),$inp);
257e71b7053SJung-uk Kim	&mov	(&DWP(8,"ebp"),$len);
258e71b7053SJung-uk Kim	&mov	($len,$chunk);
259e71b7053SJung-uk Kim	&mov	(&DWP(12,"ebp"),$chunk);	# chunk
260e71b7053SJung-uk Kim						if ($mode eq "ctr32") {
261e71b7053SJung-uk Kim	&mov	("ecx",&DWP(-4,$ctx));
262e71b7053SJung-uk Kim	&xor	($out,$out);
263e71b7053SJung-uk Kim	&mov	("eax",&DWP(-8,$ctx));		# borrow $len
264e71b7053SJung-uk Kim&set_label("${mode}_prepare");
265e71b7053SJung-uk Kim	&mov	(&DWP(12,"esp",$out),"ecx");
266e71b7053SJung-uk Kim	&bswap	("ecx");
267e71b7053SJung-uk Kim	&movq	(&QWP(0,"esp",$out),"mm0");
268e71b7053SJung-uk Kim	&inc	("ecx");
269e71b7053SJung-uk Kim	&mov	(&DWP(8,"esp",$out),"eax");
270e71b7053SJung-uk Kim	&bswap	("ecx");
271e71b7053SJung-uk Kim	&lea	($out,&DWP(16,$out));
272e71b7053SJung-uk Kim	&cmp	($out,$chunk);
273e71b7053SJung-uk Kim	&jb	(&label("${mode}_prepare"));
274e71b7053SJung-uk Kim
275e71b7053SJung-uk Kim	&mov	(&DWP(-4,$ctx),"ecx");
276e71b7053SJung-uk Kim	&lea	($inp,&DWP(0,"esp"));
277e71b7053SJung-uk Kim	&lea	($out,&DWP(0,"esp"));
278e71b7053SJung-uk Kim	&mov	($len,$chunk);
279e71b7053SJung-uk Kim						} else {
280e71b7053SJung-uk Kim	&test	($out,0x0f);			# out_misaligned
281e71b7053SJung-uk Kim	&cmovnz	($out,"esp");
282e71b7053SJung-uk Kim	&test	($inp,0x0f);			# inp_misaligned
283e71b7053SJung-uk Kim	&jz	(&label("${mode}_inp_aligned"));
284e71b7053SJung-uk Kim	&shr	($len,2);
285e71b7053SJung-uk Kim	&data_byte(0xf3,0xa5);			# rep movsl
286e71b7053SJung-uk Kim	&sub	($out,$chunk);
287e71b7053SJung-uk Kim	&mov	($len,$chunk);
288e71b7053SJung-uk Kim	&mov	($inp,$out);
289e71b7053SJung-uk Kim&set_label("${mode}_inp_aligned");
290e71b7053SJung-uk Kim						}
291e71b7053SJung-uk Kim	&lea	("eax",&DWP(-16,$ctx));		# ivp
292e71b7053SJung-uk Kim	&lea	("ebx",&DWP(16,$ctx));		# key
293e71b7053SJung-uk Kim	&shr	($len,4);			# len/=AES_BLOCK_SIZE
294e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa7,$opcode);	# rep xcrypt*
295e71b7053SJung-uk Kim						if ($mode !~ /ecb|ctr/) {
296e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"eax"));
297e71b7053SJung-uk Kim	&movaps	(&QWP(-16,$ctx),"xmm0");	# copy [or refresh] iv
298e71b7053SJung-uk Kim						}
299e71b7053SJung-uk Kim	&mov	($out,&DWP(0,"ebp"));		# restore parameters
300e71b7053SJung-uk Kim	&mov	($chunk,&DWP(12,"ebp"));
301e71b7053SJung-uk Kim						if ($mode eq "ctr32") {
302e71b7053SJung-uk Kim	&mov	($inp,&DWP(4,"ebp"));
303e71b7053SJung-uk Kim	&xor	($len,$len);
304e71b7053SJung-uk Kim&set_label("${mode}_xor");
305e71b7053SJung-uk Kim	&movups	("xmm1",&QWP(0,$inp,$len));
306e71b7053SJung-uk Kim	&lea	($len,&DWP(16,$len));
307e71b7053SJung-uk Kim	&pxor	("xmm1",&QWP(-16,"esp",$len));
308e71b7053SJung-uk Kim	&movups	(&QWP(-16,$out,$len),"xmm1");
309e71b7053SJung-uk Kim	&cmp	($len,$chunk);
310e71b7053SJung-uk Kim	&jb	(&label("${mode}_xor"));
311e71b7053SJung-uk Kim						} else {
312e71b7053SJung-uk Kim	&test	($out,0x0f);
313e71b7053SJung-uk Kim	&jz	(&label("${mode}_out_aligned"));
314e71b7053SJung-uk Kim	&mov	($len,$chunk);
315e71b7053SJung-uk Kim	&lea	($inp,&DWP(0,"esp"));
316e71b7053SJung-uk Kim	&shr	($len,2);
317e71b7053SJung-uk Kim	&data_byte(0xf3,0xa5);			# rep movsl
318e71b7053SJung-uk Kim	&sub	($out,$chunk);
319e71b7053SJung-uk Kim&set_label("${mode}_out_aligned");
320e71b7053SJung-uk Kim	&mov	($inp,&DWP(4,"ebp"));
321e71b7053SJung-uk Kim						}
322e71b7053SJung-uk Kim	&mov	($len,&DWP(8,"ebp"));
323e71b7053SJung-uk Kim	&add	($out,$chunk);
324e71b7053SJung-uk Kim	&add	($inp,$chunk);
325e71b7053SJung-uk Kim	&sub	($len,$chunk);
326e71b7053SJung-uk Kim	&mov	($chunk,$PADLOCK_CHUNK);
327e71b7053SJung-uk Kim    if (!$PADLOCK_PREFETCH{$mode}) {
328e71b7053SJung-uk Kim	&jnz	(&label("${mode}_loop"));
329e71b7053SJung-uk Kim    } else {
330e71b7053SJung-uk Kim	&jz	(&label("${mode}_break"));
331e71b7053SJung-uk Kim	&cmp	($len,$chunk);
332e71b7053SJung-uk Kim	&jae	(&label("${mode}_loop"));
333e71b7053SJung-uk Kim
334e71b7053SJung-uk Kim&set_label("${mode}_unaligned_tail");
335e71b7053SJung-uk Kim	&xor	("eax","eax");
336e71b7053SJung-uk Kim	&cmp	("esp","ebp");
337e71b7053SJung-uk Kim	&cmove	("eax",$len);
338e71b7053SJung-uk Kim	&sub	("esp","eax");			# alloca
339e71b7053SJung-uk Kim	&mov	("eax", $out);			# save parameters
340e71b7053SJung-uk Kim	&mov	($chunk,$len);
341e71b7053SJung-uk Kim	&shr	($len,2);
342e71b7053SJung-uk Kim	&lea	($out,&DWP(0,"esp"));
343e71b7053SJung-uk Kim	&data_byte(0xf3,0xa5);			# rep movsl
344e71b7053SJung-uk Kim	&mov	($inp,"esp");
345e71b7053SJung-uk Kim	&mov	($out,"eax");			# restore parameters
346e71b7053SJung-uk Kim	&mov	($len,$chunk);
347e71b7053SJung-uk Kim	&jmp	(&label("${mode}_loop"));
348e71b7053SJung-uk Kim
349e71b7053SJung-uk Kim&set_label("${mode}_break",16);
350e71b7053SJung-uk Kim    }
351e71b7053SJung-uk Kim						if ($mode ne "ctr32") {
352e71b7053SJung-uk Kim	&cmp	("esp","ebp");
353e71b7053SJung-uk Kim	&je	(&label("${mode}_done"));
354e71b7053SJung-uk Kim						}
355e71b7053SJung-uk Kim	&pxor	("xmm0","xmm0");
356e71b7053SJung-uk Kim	&lea	("eax",&DWP(0,"esp"));
357e71b7053SJung-uk Kim&set_label("${mode}_bzero");
358e71b7053SJung-uk Kim	&movaps	(&QWP(0,"eax"),"xmm0");
359e71b7053SJung-uk Kim	&lea	("eax",&DWP(16,"eax"));
360e71b7053SJung-uk Kim	&cmp	("ebp","eax");
361e71b7053SJung-uk Kim	&ja	(&label("${mode}_bzero"));
362e71b7053SJung-uk Kim
363e71b7053SJung-uk Kim&set_label("${mode}_done");
364e71b7053SJung-uk Kim	&mov	("ebp",&DWP(16,"ebp"));
365e71b7053SJung-uk Kim	&lea	("esp",&DWP(24,"ebp"));
366e71b7053SJung-uk Kim						if ($mode ne "ctr32") {
367e71b7053SJung-uk Kim	&jmp	(&label("${mode}_exit"));
368e71b7053SJung-uk Kim
369e71b7053SJung-uk Kim&set_label("${mode}_aligned",16);
370e71b7053SJung-uk Kim    if ($PADLOCK_PREFETCH{$mode}) {
371e71b7053SJung-uk Kim	&lea	("ebp",&DWP(0,$inp,$len));
372e71b7053SJung-uk Kim	&neg	("ebp");
373e71b7053SJung-uk Kim	&and	("ebp",0xfff);			# distance to page boundary
374e71b7053SJung-uk Kim	&xor	("eax","eax");
375e71b7053SJung-uk Kim	&cmp	("ebp",$PADLOCK_PREFETCH{$mode});
376e71b7053SJung-uk Kim	&mov	("ebp",$PADLOCK_PREFETCH{$mode}-1);
377e71b7053SJung-uk Kim	&cmovae	("ebp","eax");
378e71b7053SJung-uk Kim	&and	("ebp",$len);			# remainder
379e71b7053SJung-uk Kim	&sub	($len,"ebp");
380e71b7053SJung-uk Kim	&jz	(&label("${mode}_aligned_tail"));
381e71b7053SJung-uk Kim    }
382e71b7053SJung-uk Kim	&lea	("eax",&DWP(-16,$ctx));		# ivp
383e71b7053SJung-uk Kim	&lea	("ebx",&DWP(16,$ctx));		# key
384e71b7053SJung-uk Kim	&shr	($len,4);			# len/=AES_BLOCK_SIZE
385e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa7,$opcode);	# rep xcrypt*
386e71b7053SJung-uk Kim						if ($mode ne "ecb") {
387e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"eax"));
388e71b7053SJung-uk Kim	&movaps	(&QWP(-16,$ctx),"xmm0");	# copy [or refresh] iv
389e71b7053SJung-uk Kim						}
390e71b7053SJung-uk Kim    if ($PADLOCK_PREFETCH{$mode}) {
391e71b7053SJung-uk Kim	&test	("ebp","ebp");
392e71b7053SJung-uk Kim	&jz	(&label("${mode}_exit"));
393e71b7053SJung-uk Kim
394e71b7053SJung-uk Kim&set_label("${mode}_aligned_tail");
395e71b7053SJung-uk Kim	&mov	($len,"ebp");
396e71b7053SJung-uk Kim	&lea	("ebp",&DWP(-24,"esp"));
397e71b7053SJung-uk Kim	&mov	("esp","ebp");
398e71b7053SJung-uk Kim	&mov	("eax","ebp");
399e71b7053SJung-uk Kim	&sub	("esp",$len);
400e71b7053SJung-uk Kim	&and	("ebp",-16);
401e71b7053SJung-uk Kim	&and	("esp",-16);
402e71b7053SJung-uk Kim	&mov	(&DWP(16,"ebp"),"eax");
403e71b7053SJung-uk Kim	&mov	("eax", $out);			# save parameters
404e71b7053SJung-uk Kim	&mov	($chunk,$len);
405e71b7053SJung-uk Kim	&shr	($len,2);
406e71b7053SJung-uk Kim	&lea	($out,&DWP(0,"esp"));
407e71b7053SJung-uk Kim	&data_byte(0xf3,0xa5);			# rep movsl
408e71b7053SJung-uk Kim	&mov	($inp,"esp");
409e71b7053SJung-uk Kim	&mov	($out,"eax");			# restore parameters
410e71b7053SJung-uk Kim	&mov	($len,$chunk);
411e71b7053SJung-uk Kim	&jmp	(&label("${mode}_loop"));
412e71b7053SJung-uk Kim    }
413e71b7053SJung-uk Kim&set_label("${mode}_exit");			}
414e71b7053SJung-uk Kim	&mov	("eax",1);
415e71b7053SJung-uk Kim	&lea	("esp",&DWP(4,"esp"));		# popf
416e71b7053SJung-uk Kim	&emms	()				if ($mode eq "ctr32");
417e71b7053SJung-uk Kim&set_label("${mode}_abort");
418e71b7053SJung-uk Kim&function_end("padlock_${mode}_encrypt");
419e71b7053SJung-uk Kim}
420e71b7053SJung-uk Kim
421e71b7053SJung-uk Kim&generate_mode("ecb",0xc8);
422e71b7053SJung-uk Kim&generate_mode("cbc",0xd0);
423e71b7053SJung-uk Kim&generate_mode("cfb",0xe0);
424e71b7053SJung-uk Kim&generate_mode("ofb",0xe8);
425e71b7053SJung-uk Kim&generate_mode("ctr32",0xc8);	# yes, it implements own CTR with ECB opcode,
426e71b7053SJung-uk Kim				# because hardware CTR was introduced later
427e71b7053SJung-uk Kim				# and even has errata on certain C7 stepping.
428e71b7053SJung-uk Kim				# own implementation *always* works, though
429e71b7053SJung-uk Kim				# ~15% slower than dedicated hardware...
430e71b7053SJung-uk Kim
431e71b7053SJung-uk Kim&function_begin_B("padlock_xstore");
432e71b7053SJung-uk Kim	&push	("edi");
433e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
434e71b7053SJung-uk Kim	&mov	("edx",&wparam(1));
435e71b7053SJung-uk Kim	&data_byte(0x0f,0xa7,0xc0);		# xstore
436e71b7053SJung-uk Kim	&pop	("edi");
437e71b7053SJung-uk Kim	&ret	();
438e71b7053SJung-uk Kim&function_end_B("padlock_xstore");
439e71b7053SJung-uk Kim
440e71b7053SJung-uk Kim&function_begin_B("_win32_segv_handler");
441e71b7053SJung-uk Kim	&mov	("eax",1);			# ExceptionContinueSearch
442e71b7053SJung-uk Kim	&mov	("edx",&wparam(0));		# *ExceptionRecord
443e71b7053SJung-uk Kim	&mov	("ecx",&wparam(2));		# *ContextRecord
444e71b7053SJung-uk Kim	&cmp	(&DWP(0,"edx"),0xC0000005)	# ExceptionRecord->ExceptionCode == STATUS_ACCESS_VIOLATION
445e71b7053SJung-uk Kim	&jne	(&label("ret"));
446e71b7053SJung-uk Kim	&add	(&DWP(184,"ecx"),4);		# skip over rep sha*
447e71b7053SJung-uk Kim	&mov	("eax",0);			# ExceptionContinueExecution
448e71b7053SJung-uk Kim&set_label("ret");
449e71b7053SJung-uk Kim	&ret	();
450e71b7053SJung-uk Kim&function_end_B("_win32_segv_handler");
451e71b7053SJung-uk Kim&safeseh("_win32_segv_handler")			if ($::win32);
452e71b7053SJung-uk Kim
453e71b7053SJung-uk Kim&function_begin_B("padlock_sha1_oneshot");
454e71b7053SJung-uk Kim	&push	("edi");
455e71b7053SJung-uk Kim	&push	("esi");
456e71b7053SJung-uk Kim	&xor	("eax","eax");
457e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
458e71b7053SJung-uk Kim	&mov	("esi",&wparam(1));
459e71b7053SJung-uk Kim	&mov	("ecx",&wparam(2));
460e71b7053SJung-uk Kim    if ($::win32 or $::coff) {
461e71b7053SJung-uk Kim    	&push	(&::islabel("_win32_segv_handler"));
462e71b7053SJung-uk Kim	&data_byte(0x64,0xff,0x30);		# push	%fs:(%eax)
463e71b7053SJung-uk Kim	&data_byte(0x64,0x89,0x20);		# mov	%esp,%fs:(%eax)
464e71b7053SJung-uk Kim    }
465e71b7053SJung-uk Kim	&mov	("edx","esp");			# put aside %esp
466e71b7053SJung-uk Kim	&add	("esp",-128);			# 32 is enough but spec says 128
467e71b7053SJung-uk Kim	&movups	("xmm0",&QWP(0,"edi"));		# copy-in context
468e71b7053SJung-uk Kim	&and	("esp",-16);
469e71b7053SJung-uk Kim	&mov	("eax",&DWP(16,"edi"));
470e71b7053SJung-uk Kim	&movaps	(&QWP(0,"esp"),"xmm0");
471e71b7053SJung-uk Kim	&mov	("edi","esp");
472e71b7053SJung-uk Kim	&mov	(&DWP(16,"esp"),"eax");
473e71b7053SJung-uk Kim	&xor	("eax","eax");
474e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa6,0xc8);	# rep xsha1
475e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"esp"));
476e71b7053SJung-uk Kim	&mov	("eax",&DWP(16,"esp"));
477e71b7053SJung-uk Kim	&mov	("esp","edx");			# restore %esp
478e71b7053SJung-uk Kim    if ($::win32 or $::coff) {
479e71b7053SJung-uk Kim	&data_byte(0x64,0x8f,0x05,0,0,0,0);	# pop	%fs:0
480e71b7053SJung-uk Kim	&lea	("esp",&DWP(4,"esp"));
481e71b7053SJung-uk Kim    }
482e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
483e71b7053SJung-uk Kim	&movups	(&QWP(0,"edi"),"xmm0");		# copy-out context
484e71b7053SJung-uk Kim	&mov	(&DWP(16,"edi"),"eax");
485e71b7053SJung-uk Kim	&pop	("esi");
486e71b7053SJung-uk Kim	&pop	("edi");
487e71b7053SJung-uk Kim	&ret	();
488e71b7053SJung-uk Kim&function_end_B("padlock_sha1_oneshot");
489e71b7053SJung-uk Kim
490e71b7053SJung-uk Kim&function_begin_B("padlock_sha1_blocks");
491e71b7053SJung-uk Kim	&push	("edi");
492e71b7053SJung-uk Kim	&push	("esi");
493e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
494e71b7053SJung-uk Kim	&mov	("esi",&wparam(1));
495e71b7053SJung-uk Kim	&mov	("edx","esp");			# put aside %esp
496e71b7053SJung-uk Kim	&mov	("ecx",&wparam(2));
497e71b7053SJung-uk Kim	&add	("esp",-128);
498e71b7053SJung-uk Kim	&movups	("xmm0",&QWP(0,"edi"));		# copy-in context
499e71b7053SJung-uk Kim	&and	("esp",-16);
500e71b7053SJung-uk Kim	&mov	("eax",&DWP(16,"edi"));
501e71b7053SJung-uk Kim	&movaps	(&QWP(0,"esp"),"xmm0");
502e71b7053SJung-uk Kim	&mov	("edi","esp");
503e71b7053SJung-uk Kim	&mov	(&DWP(16,"esp"),"eax");
504e71b7053SJung-uk Kim	&mov	("eax",-1);
505e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa6,0xc8);	# rep xsha1
506e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"esp"));
507e71b7053SJung-uk Kim	&mov	("eax",&DWP(16,"esp"));
508e71b7053SJung-uk Kim	&mov	("esp","edx");			# restore %esp
509e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
510e71b7053SJung-uk Kim	&movups	(&QWP(0,"edi"),"xmm0");		# copy-out context
511e71b7053SJung-uk Kim	&mov	(&DWP(16,"edi"),"eax");
512e71b7053SJung-uk Kim 	&pop	("esi");
513e71b7053SJung-uk Kim	&pop	("edi");
514e71b7053SJung-uk Kim	&ret	();
515e71b7053SJung-uk Kim&function_end_B("padlock_sha1_blocks");
516e71b7053SJung-uk Kim
517e71b7053SJung-uk Kim&function_begin_B("padlock_sha256_oneshot");
518e71b7053SJung-uk Kim	&push	("edi");
519e71b7053SJung-uk Kim	&push	("esi");
520e71b7053SJung-uk Kim	&xor	("eax","eax");
521e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
522e71b7053SJung-uk Kim	&mov	("esi",&wparam(1));
523e71b7053SJung-uk Kim	&mov	("ecx",&wparam(2));
524e71b7053SJung-uk Kim    if ($::win32 or $::coff) {
525e71b7053SJung-uk Kim    	&push	(&::islabel("_win32_segv_handler"));
526e71b7053SJung-uk Kim	&data_byte(0x64,0xff,0x30);		# push	%fs:(%eax)
527e71b7053SJung-uk Kim	&data_byte(0x64,0x89,0x20);		# mov	%esp,%fs:(%eax)
528e71b7053SJung-uk Kim    }
529e71b7053SJung-uk Kim	&mov	("edx","esp");			# put aside %esp
530e71b7053SJung-uk Kim	&add	("esp",-128);
531e71b7053SJung-uk Kim	&movups	("xmm0",&QWP(0,"edi"));		# copy-in context
532e71b7053SJung-uk Kim	&and	("esp",-16);
533e71b7053SJung-uk Kim	&movups	("xmm1",&QWP(16,"edi"));
534e71b7053SJung-uk Kim	&movaps	(&QWP(0,"esp"),"xmm0");
535e71b7053SJung-uk Kim	&mov	("edi","esp");
536e71b7053SJung-uk Kim	&movaps	(&QWP(16,"esp"),"xmm1");
537e71b7053SJung-uk Kim	&xor	("eax","eax");
538e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa6,0xd0);	# rep xsha256
539e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"esp"));
540e71b7053SJung-uk Kim	&movaps	("xmm1",&QWP(16,"esp"));
541e71b7053SJung-uk Kim	&mov	("esp","edx");			# restore %esp
542e71b7053SJung-uk Kim    if ($::win32 or $::coff) {
543e71b7053SJung-uk Kim	&data_byte(0x64,0x8f,0x05,0,0,0,0);	# pop	%fs:0
544e71b7053SJung-uk Kim	&lea	("esp",&DWP(4,"esp"));
545e71b7053SJung-uk Kim    }
546e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
547e71b7053SJung-uk Kim	&movups	(&QWP(0,"edi"),"xmm0");		# copy-out context
548e71b7053SJung-uk Kim	&movups	(&QWP(16,"edi"),"xmm1");
549e71b7053SJung-uk Kim	&pop	("esi");
550e71b7053SJung-uk Kim	&pop	("edi");
551e71b7053SJung-uk Kim	&ret	();
552e71b7053SJung-uk Kim&function_end_B("padlock_sha256_oneshot");
553e71b7053SJung-uk Kim
554e71b7053SJung-uk Kim&function_begin_B("padlock_sha256_blocks");
555e71b7053SJung-uk Kim	&push	("edi");
556e71b7053SJung-uk Kim	&push	("esi");
557e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
558e71b7053SJung-uk Kim	&mov	("esi",&wparam(1));
559e71b7053SJung-uk Kim	&mov	("ecx",&wparam(2));
560e71b7053SJung-uk Kim	&mov	("edx","esp");			# put aside %esp
561e71b7053SJung-uk Kim	&add	("esp",-128);
562e71b7053SJung-uk Kim	&movups	("xmm0",&QWP(0,"edi"));		# copy-in context
563e71b7053SJung-uk Kim	&and	("esp",-16);
564e71b7053SJung-uk Kim	&movups	("xmm1",&QWP(16,"edi"));
565e71b7053SJung-uk Kim	&movaps	(&QWP(0,"esp"),"xmm0");
566e71b7053SJung-uk Kim	&mov	("edi","esp");
567e71b7053SJung-uk Kim	&movaps	(&QWP(16,"esp"),"xmm1");
568e71b7053SJung-uk Kim	&mov	("eax",-1);
569e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa6,0xd0);	# rep xsha256
570e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"esp"));
571e71b7053SJung-uk Kim	&movaps	("xmm1",&QWP(16,"esp"));
572e71b7053SJung-uk Kim	&mov	("esp","edx");			# restore %esp
573e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
574e71b7053SJung-uk Kim	&movups	(&QWP(0,"edi"),"xmm0");		# copy-out context
575e71b7053SJung-uk Kim	&movups	(&QWP(16,"edi"),"xmm1");
576e71b7053SJung-uk Kim	&pop	("esi");
577e71b7053SJung-uk Kim	&pop	("edi");
578e71b7053SJung-uk Kim	&ret	();
579e71b7053SJung-uk Kim&function_end_B("padlock_sha256_blocks");
580e71b7053SJung-uk Kim
581e71b7053SJung-uk Kim&function_begin_B("padlock_sha512_blocks");
582e71b7053SJung-uk Kim	&push	("edi");
583e71b7053SJung-uk Kim	&push	("esi");
584e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
585e71b7053SJung-uk Kim	&mov	("esi",&wparam(1));
586e71b7053SJung-uk Kim	&mov	("ecx",&wparam(2));
587e71b7053SJung-uk Kim	&mov	("edx","esp");			# put aside %esp
588e71b7053SJung-uk Kim	&add	("esp",-128);
589e71b7053SJung-uk Kim	&movups	("xmm0",&QWP(0,"edi"));		# copy-in context
590e71b7053SJung-uk Kim	&and	("esp",-16);
591e71b7053SJung-uk Kim	&movups	("xmm1",&QWP(16,"edi"));
592e71b7053SJung-uk Kim	&movups	("xmm2",&QWP(32,"edi"));
593e71b7053SJung-uk Kim	&movups	("xmm3",&QWP(48,"edi"));
594e71b7053SJung-uk Kim	&movaps	(&QWP(0,"esp"),"xmm0");
595e71b7053SJung-uk Kim	&mov	("edi","esp");
596e71b7053SJung-uk Kim	&movaps	(&QWP(16,"esp"),"xmm1");
597e71b7053SJung-uk Kim	&movaps	(&QWP(32,"esp"),"xmm2");
598e71b7053SJung-uk Kim	&movaps	(&QWP(48,"esp"),"xmm3");
599e71b7053SJung-uk Kim	&data_byte(0xf3,0x0f,0xa6,0xe0);	# rep xsha512
600e71b7053SJung-uk Kim	&movaps	("xmm0",&QWP(0,"esp"));
601e71b7053SJung-uk Kim	&movaps	("xmm1",&QWP(16,"esp"));
602e71b7053SJung-uk Kim	&movaps	("xmm2",&QWP(32,"esp"));
603e71b7053SJung-uk Kim	&movaps	("xmm3",&QWP(48,"esp"));
604e71b7053SJung-uk Kim	&mov	("esp","edx");			# restore %esp
605e71b7053SJung-uk Kim	&mov	("edi",&wparam(0));
606e71b7053SJung-uk Kim	&movups	(&QWP(0,"edi"),"xmm0");		# copy-out context
607e71b7053SJung-uk Kim	&movups	(&QWP(16,"edi"),"xmm1");
608e71b7053SJung-uk Kim	&movups	(&QWP(32,"edi"),"xmm2");
609e71b7053SJung-uk Kim	&movups	(&QWP(48,"edi"),"xmm3");
610e71b7053SJung-uk Kim	&pop	("esi");
611e71b7053SJung-uk Kim	&pop	("edi");
612e71b7053SJung-uk Kim	&ret	();
613e71b7053SJung-uk Kim&function_end_B("padlock_sha512_blocks");
614e71b7053SJung-uk Kim
615e71b7053SJung-uk Kim&asciz	("VIA Padlock x86 module, CRYPTOGAMS by <appro\@openssl.org>");
616e71b7053SJung-uk Kim&align	(16);
617e71b7053SJung-uk Kim
618e71b7053SJung-uk Kim&dataseg();
619e71b7053SJung-uk Kim# Essentially this variable belongs in thread local storage.
620e71b7053SJung-uk Kim# Having this variable global on the other hand can only cause
621e71b7053SJung-uk Kim# few bogus key reloads [if any at all on signle-CPU system],
622e71b7053SJung-uk Kim# so we accept the penalty...
623e71b7053SJung-uk Kim&set_label("padlock_saved_context",4);
624e71b7053SJung-uk Kim&data_word(0);
625e71b7053SJung-uk Kim
626e71b7053SJung-uk Kim&asm_finish();
627e71b7053SJung-uk Kim
628e71b7053SJung-uk Kimclose STDOUT;
629