#if defined(lint) || defined(__lint) #include "arcfour.h" /* ARGSUSED */ void arcfour_crypt_asm(ARCFour_key *key, uchar_t *in, uchar_t *out, size_t len) {} /* ARGSUSED */ void arcfour_key_init(ARCFour_key *key, uchar_t *keyval, int keyvallen) {} #else #include ENTRY_NP(arcfour_crypt_asm) or %rcx,%rcx # If (len == 0) return jne .Lentry ret .Lentry: push %r12 push %r13 / Set %rdi to beginning of array, key->arr[0] add $8,%rdi / Get key->j movl -8(%rdi),%r8d / Get key->i movl -4(%rdi),%r12d / / Use a 4-byte key schedule element array / inc %r8b movl (%rdi,%r8,4),%r9d test $-8,%rcx jz .Lloop1 jmp .Lloop8 .align 16 .Lloop8: add %r9b,%r12b mov %r8,%r10 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 0=0 inc %r10b movl (%rdi,%r10,4),%r11d cmp %r10,%r12 movl %r9d,(%rdi,%r12,4) cmove %r9,%r11 movl %r13d,(%rdi,%r8,4) add %r9b,%r13b movb (%rdi,%r13,4),%al add %r11b,%r12b mov %r10,%r8 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 1=0 inc %r8b movl (%rdi,%r8,4),%r9d cmp %r8,%r12 movl %r11d,(%rdi,%r12,4) cmove %r11,%r9 movl %r13d,(%rdi,%r10,4) add %r11b,%r13b movb (%rdi,%r13,4),%al add %r9b,%r12b mov %r8,%r10 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 2=0 inc %r10b movl (%rdi,%r10,4),%r11d cmp %r10,%r12 movl %r9d,(%rdi,%r12,4) cmove %r9,%r11 movl %r13d,(%rdi,%r8,4) add %r9b,%r13b movb (%rdi,%r13,4),%al add %r11b,%r12b mov %r10,%r8 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 3=0 inc %r8b movl (%rdi,%r8,4),%r9d cmp %r8,%r12 movl %r11d,(%rdi,%r12,4) cmove %r11,%r9 movl %r13d,(%rdi,%r10,4) add %r11b,%r13b movb (%rdi,%r13,4),%al add %r9b,%r12b mov %r8,%r10 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 4=0 inc %r10b movl (%rdi,%r10,4),%r11d cmp %r10,%r12 movl %r9d,(%rdi,%r12,4) cmove %r9,%r11 movl %r13d,(%rdi,%r8,4) add %r9b,%r13b movb (%rdi,%r13,4),%al add %r11b,%r12b mov %r10,%r8 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 5=0 inc %r8b movl (%rdi,%r8,4),%r9d cmp %r8,%r12 movl %r11d,(%rdi,%r12,4) cmove %r11,%r9 movl %r13d,(%rdi,%r10,4) add %r11b,%r13b movb (%rdi,%r13,4),%al add %r9b,%r12b mov %r8,%r10 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 6=0 inc %r10b movl (%rdi,%r10,4),%r11d cmp %r10,%r12 movl %r9d,(%rdi,%r12,4) cmove %r9,%r11 movl %r13d,(%rdi,%r8,4) add %r9b,%r13b movb (%rdi,%r13,4),%al add %r11b,%r12b mov %r10,%r8 movl (%rdi,%r12,4),%r13d ror $8,%rax # ror is redundant when 7=0 inc %r8b movl (%rdi,%r8,4),%r9d cmp %r8,%r12 movl %r11d,(%rdi,%r12,4) cmove %r11,%r9 movl %r13d,(%rdi,%r10,4) add %r11b,%r13b movb (%rdi,%r13,4),%al ror $8,%rax sub $8,%rcx xor (%rsi),%rax add $8,%rsi mov %rax,(%rdx) add $8,%rdx test $-8,%rcx jnz .Lloop8 cmp $0,%rcx jne .Lloop1 .Lexit: / / Cleanup and exit code / / --i to undo ++i done at entry sub $1,%r8b / set key->i movl %r8d,-8(%rdi) / set key->j movl %r12d,-4(%rdi) pop %r13 pop %r12 ret .align 16 .Lloop1: add %r9b,%r12b movl (%rdi,%r12,4),%r13d movl %r9d,(%rdi,%r12,4) movl %r13d,(%rdi,%r8,4) add %r13b,%r9b inc %r8b movl (%rdi,%r9,4),%r13d movl (%rdi,%r8,4),%r9d xorb (%rsi),%r13b inc %rsi movb %r13b,(%rdx) inc %rdx dec %rcx jnz .Lloop1 jmp .Lexit ret SET_SIZE(arcfour_crypt_asm) / int arcfour_crypt_on_intel(void); .extern arcfour_crypt_on_intel ENTRY_NP(arcfour_key_init) / Find out if we're running on Intel or something else (e.g., AMD64). / This sets %eax to 1 for Intel, otherwise 0. push %rdi / Save arg1 push %rsi / Save arg2 push %rdx / Save arg3 call arcfour_crypt_on_intel pop %rdx / Restore arg3 pop %rsi / Restore arg2 pop %rdi / Restore arg1 / Save return value in key->flag (1=Intel, 0=AMD) movl %eax,1032(%rdi) / Set %rdi to beginning of array, key->arr[0] lea 8(%rdi),%rdi lea (%rsi,%rdx),%rsi neg %rdx mov %rdx,%rcx xor %eax,%eax xor %r9,%r9 xor %r10,%r10 xor %r11,%r11 / Use a 4-byte data array jmp .Lw1stloop .align 16 .Lw1stloop: / AMD64 (4-byte array) mov %eax,(%rdi,%rax,4) add $1,%al jnc .Lw1stloop xor %r9,%r9 xor %r8,%r8 .align 16 .Lw2ndloop: mov (%rdi,%r9,4),%r10d add (%rsi,%rdx,1),%r8b add %r10b,%r8b add $1,%rdx mov (%rdi,%r8,4),%r11d cmovz %rcx,%rdx mov %r10d,(%rdi,%r8,4) mov %r11d,(%rdi,%r9,4) add $1,%r9b jnc .Lw2ndloop / Exit code xor %eax,%eax mov %eax,-8(%rdi) mov %eax,-4(%rdi) ret SET_SIZE(arcfour_key_init) .asciz "RC4 for x86_64, CRYPTOGAMS by " #endif /* !lint && !__lint */