/*
 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*
 * Copyright (c) 2002 Advanced Micro Devices, Inc.
 * 
 * All rights reserved.
 * 
 * Redistribution and  use in source and binary  forms, with or
 * without  modification,  are   permitted  provided  that  the
 * following conditions are met:
 * 
 * + Redistributions  of source  code  must  retain  the  above
 *   copyright  notice,   this  list  of   conditions  and  the
 *   following disclaimer.
 * 
 * + Redistributions  in binary  form must reproduce  the above
 *   copyright  notice,   this  list  of   conditions  and  the
 *   following  disclaimer in  the  documentation and/or  other
 *   materials provided with the distribution.
 * 
 * + Neither the  name of Advanced Micro Devices,  Inc. nor the
 *   names  of  its contributors  may  be  used  to endorse  or
 *   promote  products  derived   from  this  software  without
 *   specific prior written permission.
 * 
 * THIS  SOFTWARE  IS PROVIDED  BY  THE  COPYRIGHT HOLDERS  AND
 * CONTRIBUTORS AS IS AND  ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING,  BUT NOT  LIMITED TO,  THE IMPLIED  WARRANTIES OF
 * MERCHANTABILITY  AND FITNESS  FOR A  PARTICULAR  PURPOSE ARE
 * DISCLAIMED.  IN  NO  EVENT  SHALL  ADVANCED  MICRO  DEVICES,
 * INC.  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT, INDIRECT,
 * INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR CONSEQUENTIAL  DAMAGES
 * (INCLUDING,  BUT NOT LIMITED  TO, PROCUREMENT  OF SUBSTITUTE
 * GOODS  OR  SERVICES;  LOSS  OF  USE, DATA,  OR  PROFITS;  OR
 * BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON  ANY THEORY OF
 * LIABILITY,  WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
 * (INCLUDING NEGLIGENCE  OR OTHERWISE) ARISING IN  ANY WAY OUT
 * OF THE  USE  OF  THIS  SOFTWARE, EVEN  IF  ADVISED  OF  THE
 * POSSIBILITY OF SUCH DAMAGE.
 * 
 * It is  licensee's responsibility  to comply with  any export
 * regulations applicable in licensee's jurisdiction.
 */

	.ident	"%Z%%M%	%I%	%E% SMI"

	.file	"%M%"

#include "SYS.h"
#include "cache.h"

#define LABEL(s) .strcpy/**/s

#ifdef USE_AS_STRNCPY
	ENTRY(strncpy)
#else
	ENTRY(strcpy)                        /* (char *, const char *) */
#endif

#ifdef USE_AS_STRNCPY
	test	%rdx, %rdx		/* (char *, const char *, size_t) */
	mov	%rdx, %r11
	jz	LABEL(exitn)		/* early exit */
#endif

        xor     %edx, %edx

LABEL(aligntry):
        mov     %rsi, %r8		/* align by source */
        and     $7, %r8
	jz	LABEL(alignafter)

LABEL(align):				/* 8-byte align */
        sub     $8, %r8

	.p2align 4

LABEL(alignloop):
#ifdef USE_AS_STRNCPY
	dec	%r11
	jl	LABEL(exitn)
#endif

        mov     (%rsi, %rdx), %al       /* check if same character */
        test    %al, %al                /* check if character a NUL */
        mov     %al, (%rdi, %rdx)
        jz      LABEL(exit)

        inc     %edx
        inc     %r8
        jnz     LABEL(alignloop)

	.p2align 4

LABEL(alignafter):

LABEL(8try):
        mov     $0xfefefefefefefeff, %rcx

LABEL(8):                               /* 8-byte */
        mov     (%rsi, %rdx), %rax

LABEL(8loop):
#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

LABEL(8after):

LABEL(64try):
        mov     _sref_(.amd64cache1half), %r9

LABEL(64):				/* 64-byte */

        .p2align 4

LABEL(64loop):
#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        cmp     %r9, %rdx

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        lea     8 (%rdx), %rdx

        jbe     LABEL(64loop)

LABEL(64after):

LABEL(pretry):
        mov     _sref_(.amd64cache2half), %r9

LABEL(pre):                              /* 64-byte prefetch */

        .p2align 4

LABEL(preloop):
#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        mov     %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %edx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(tail)

        cmp     %r9, %rdx

        mov     %rax, (%rdi, %rdx)
        prefetchnta 512 + 8 (%rdi, %rdx)	/* 3DNow: use prefetchw */
        mov     8 (%rsi, %rdx), %rax
        prefetchnta 512 + 8 (%rsi, %rdx)	/* 3DNow: use prefetch */
        lea     8 (%rdx), %rdx

        jb	LABEL(preloop)

        .p2align 4

LABEL(preafter):

LABEL(NTtry):
	mfence

LABEL(NT):				/* 64-byte NT */

        .p2align 4

LABEL(NTloop):
#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
        mov     8 (%rsi, %rdx), %rax
        add     $8, %rdx

#ifdef USE_AS_STRNCPY
	sub	$8, %r11
	jl	LABEL(tail)
#endif

        mov     %rcx, %r8
        add     %rax, %r8
        sbb     %r10, %r10

        xor     %rax, %r8
        or      %rcx, %r8
        sub     %r10, %r8
        jnz     LABEL(NTtail)

        movnti  %rax, (%rdi, %rdx)
	mov     8 (%rsi, %rdx), %rax
	prefetchnta 768 + 8 (%rsi, %rdx)
        add     $8, %rdx

        jmp     LABEL(NTloop)

        .p2align 4

LABEL(NTtail):
	mfence

        .p2align 4

LABEL(NTafter):

LABEL(tailtry):

LABEL(tail):                             /* 1-byte tail */
#ifdef USE_AS_STRNCPY
	add	$8, %r11
#endif

        .p2align 4

LABEL(tailloop):
#ifdef USE_AS_STRNCPY
	dec	%r11
	jl	LABEL(exitn)
#endif

        test    %al, %al
        mov     %al, (%rdi, %rdx)
        jz      LABEL(exit)

        inc     %rdx

#ifdef USE_AS_STRNCPY
	dec	%r11
	jl	LABEL(exitn)

	mov	%ah, %al
#endif

        test    %ah, %ah
        mov     %ah, (%rdi, %rdx)
        jz      LABEL(exit)

        inc     %rdx

#ifdef USE_AS_STRNCPY
	dec	%r11
	jl	LABEL(exitn)
#endif

        shr     $16, %rax

        test    %al, %al
        mov     %al, (%rdi, %rdx)
        jz      LABEL(exit)

        inc     %rdx

#ifdef USE_AS_STRNCPY
	dec	%r11
	jl	LABEL(exitn)

	mov	%ah, %al
#endif

        test    %ah, %ah
        mov     %ah, (%rdi, %rdx)
        jz      LABEL(exit)

        shr     $16, %rax
        inc     %rdx

        jmp     LABEL(tailloop)

        .p2align 4

LABEL(tailafter):

LABEL(exit):
#ifdef USE_AS_STRNCPY
	test	%r11, %r11
	mov	%r11, %rcx

#ifdef USE_AS_STPCPY
        lea     (%rdi, %rdx), %r8
#else
        mov     %rdi, %r8
#endif

	jz	2f

	xor	%eax, %eax		/* bzero () would do too, but usually there are only a handfull of bytes left */
	shr	$3, %rcx
        lea     1 (%rdi, %rdx), %rdi
	jz	1f

	rep	stosq

1:
	mov	%r11d, %ecx
	and	$7, %ecx
	jz	2f

        .p2align 4,, 3

3:
	dec	%ecx
	mov	%al, (%rdi, %rcx)
	jnz	3b

        .p2align 4,, 3

2:
	mov	%r8, %rax
        ret

#endif

        .p2align 4

LABEL(exitn):
#ifdef USE_AS_STPCPY
        lea     (%rdi, %rdx), %rax
#else
        mov     %rdi, %rax
#endif

        ret

#ifdef USE_AS_STRNCPY
	SET_SIZE(strncpy)
#else
	SET_SIZE(strcpy)                        /* (char *, const char *) */
#endif