1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe/* 22*5d9d9091SRichard Lowe * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23*5d9d9091SRichard Lowe * Use is subject to license terms. 24*5d9d9091SRichard Lowe */ 25*5d9d9091SRichard Lowe 26*5d9d9091SRichard Lowe .file "strncat.s" 27*5d9d9091SRichard Lowe 28*5d9d9091SRichard Lowe/ 29*5d9d9091SRichard Lowe/ strncat(s1, s2, n) 30*5d9d9091SRichard Lowe/ 31*5d9d9091SRichard Lowe/ Concatenates s2 on the end of s1. s1's space must be large enough. 32*5d9d9091SRichard Lowe/ At most n characters are moved. 33*5d9d9091SRichard Lowe/ Returns s1. 34*5d9d9091SRichard Lowe/ 35*5d9d9091SRichard Lowe/ Fast assembly language version of the following C-program strncat 36*5d9d9091SRichard Lowe/ which represents the `standard' for the C-library. 37*5d9d9091SRichard Lowe/ 38*5d9d9091SRichard Lowe/ char * 39*5d9d9091SRichard Lowe/ strncat(char *s1, const char *s2, size_t n) 40*5d9d9091SRichard Lowe/ { 41*5d9d9091SRichard Lowe/ char *os1 = s1; 42*5d9d9091SRichard Lowe/ 43*5d9d9091SRichard Lowe/ n++; 44*5d9d9091SRichard Lowe/ while (*s1++) 45*5d9d9091SRichard Lowe/ ; 46*5d9d9091SRichard Lowe/ --s1; 47*5d9d9091SRichard Lowe/ while (*s1++ = *s2++) 48*5d9d9091SRichard Lowe/ if (--n == 0) { 49*5d9d9091SRichard Lowe/ s1[-1] = '\0'; 50*5d9d9091SRichard Lowe/ break; 51*5d9d9091SRichard Lowe/ } 52*5d9d9091SRichard Lowe/ return (os1); 53*5d9d9091SRichard Lowe/ } 54*5d9d9091SRichard Lowe/ 55*5d9d9091SRichard Lowe/ In this assembly language version, the following expression is used 56*5d9d9091SRichard Lowe/ to check if a 32-bit word data contains a null byte or not: 57*5d9d9091SRichard Lowe/ (((A & 0x7f7f7f7f) + 0x7f7f7f7f) | A) & 0x80808080 58*5d9d9091SRichard Lowe/ If the above expression geneates a value other than 0x80808080, 59*5d9d9091SRichard Lowe/ that means the 32-bit word data contains a null byte. 60*5d9d9091SRichard Lowe/ 61*5d9d9091SRichard Lowe/ The above has been extended for 64-bit support. 62*5d9d9091SRichard Lowe/ 63*5d9d9091SRichard Lowe 64*5d9d9091SRichard Lowe#include "SYS.h" 65*5d9d9091SRichard Lowe 66*5d9d9091SRichard Lowe ENTRY(strncat) /* (char *, char *, size_t) */ 67*5d9d9091SRichard Lowe movq %rdi, %rax / save return value 68*5d9d9091SRichard Lowe movabsq $0x7f7f7f7f7f7f7f7f, %r8 / %r8 = 0x7f... 69*5d9d9091SRichard Lowe movq %r8, %r9 70*5d9d9091SRichard Lowe notq %r9 / %r9 = 0x80... 71*5d9d9091SRichard Lowe testq $7, %rdi / if %rdi not quadword aligned 72*5d9d9091SRichard Lowe jnz .L1 / goto .L1 73*5d9d9091SRichard Lowe .align 4 74*5d9d9091SRichard Lowe.L2: 75*5d9d9091SRichard Lowe movq (%rdi), %r11 / move 1 quadword from (%rdi) to %r11 76*5d9d9091SRichard Lowe movq %r8, %rcx 77*5d9d9091SRichard Lowe andq %r11, %rcx / %rcx = %r11 & 0x7f7f7f7f 78*5d9d9091SRichard Lowe addq $8, %rdi / next quadword 79*5d9d9091SRichard Lowe addq %r8, %rcx / %rcx += 0x7f7f7f7f 80*5d9d9091SRichard Lowe orq %r11, %rcx / %rcx |= %r11 81*5d9d9091SRichard Lowe andq %r9, %rcx / %rcx &= 0x80808080 82*5d9d9091SRichard Lowe cmpq %r9, %rcx / if no null byte in this quadword 83*5d9d9091SRichard Lowe je .L2 / goto .L2 84*5d9d9091SRichard Lowe subq $8, %rdi / post-incremented 85*5d9d9091SRichard Lowe.L1: 86*5d9d9091SRichard Lowe cmpb $0, (%rdi) / if a byte in (%rdi) is null 87*5d9d9091SRichard Lowe je .L3 / goto .L3 88*5d9d9091SRichard Lowe incq %rdi / next byte 89*5d9d9091SRichard Lowe testq $7, %rdi / if %rdi not quadword aligned 90*5d9d9091SRichard Lowe jnz .L1 / goto .L1 91*5d9d9091SRichard Lowe jmp .L2 / goto .L2 (%rdi quadword aligned) 92*5d9d9091SRichard Lowe .align 4 93*5d9d9091SRichard Lowe.L3: 94*5d9d9091SRichard Lowe / %rdi points to a null byte in destination string 95*5d9d9091SRichard Lowe 96*5d9d9091SRichard Lowe testq $7, %rsi / if %rsi not quadword aligned 97*5d9d9091SRichard Lowe jnz .L4 / goto .L4 98*5d9d9091SRichard Lowe cmpq $8, %rdx / if number of bytes < 8 99*5d9d9091SRichard Lowe jb .L7 / goto .L7 100*5d9d9091SRichard Lowe .align 4 101*5d9d9091SRichard Lowe.L5: 102*5d9d9091SRichard Lowe movq (%rsi), %r11 / move 1 quadword from (%rsi) to %r11 103*5d9d9091SRichard Lowe movq %r8, %rcx 104*5d9d9091SRichard Lowe andq %r11, %rcx / %rcx = %r11 & 0x7f7f7f7f 105*5d9d9091SRichard Lowe addq $8, %rsi / next quadword 106*5d9d9091SRichard Lowe addq %r8, %rcx / %rcx += 0x7f7f7f7f 107*5d9d9091SRichard Lowe orq %r11, %rcx / %rcx |= %r11 108*5d9d9091SRichard Lowe andq %r9, %rcx / %rcx &= 0x80808080 109*5d9d9091SRichard Lowe cmpq %r9, %rcx / if null byte in this quadword 110*5d9d9091SRichard Lowe jne .L6 / goto .L6 111*5d9d9091SRichard Lowe movq %r11, (%rdi) / copy this quadword to (%rdi) 112*5d9d9091SRichard Lowe subq $8, %rdx / decrement number of bytes by 8 113*5d9d9091SRichard Lowe addq $8, %rdi / next quadword 114*5d9d9091SRichard Lowe cmpq $8, %rdx / if number of bytes >= 8 115*5d9d9091SRichard Lowe jae .L5 / goto .L5 116*5d9d9091SRichard Lowe jmp .L7 / goto .L7 117*5d9d9091SRichard Lowe.L6: 118*5d9d9091SRichard Lowe subq $8, %rsi / post-incremented 119*5d9d9091SRichard Lowe .align 4 120*5d9d9091SRichard Lowe.L7: 121*5d9d9091SRichard Lowe / number of bytes < 8 or a null byte found in the quadword 122*5d9d9091SRichard Lowe cmpq $0, %rdx / if number of bytes == 0 123*5d9d9091SRichard Lowe jz .L8 / goto .L8 (finished) 124*5d9d9091SRichard Lowe movb (%rsi), %r11b / %r11b = a byte in (%rsi) 125*5d9d9091SRichard Lowe decq %rdx / decrement number of bytes by 1 126*5d9d9091SRichard Lowe movb %r11b, (%rdi) / copy %r11b to (%rdi) 127*5d9d9091SRichard Lowe incq %rsi / next byte 128*5d9d9091SRichard Lowe incq %rdi / next byte 129*5d9d9091SRichard Lowe cmpb $0, %r11b / compare %r11b with a null byte 130*5d9d9091SRichard Lowe je .L9 / if %r11b is a null, goto .L9 131*5d9d9091SRichard Lowe jmp .L7 / goto .L7 132*5d9d9091SRichard Lowe .align 4 133*5d9d9091SRichard Lowe 134*5d9d9091SRichard Lowe.L4: 135*5d9d9091SRichard Lowe / %rsi not aligned 136*5d9d9091SRichard Lowe cmpq $0, %rdx / if number of bytes == 0 137*5d9d9091SRichard Lowe jz .L8 / goto .L8 (finished) 138*5d9d9091SRichard Lowe movb (%rsi), %r11b / %r11b = a byte in (%rsi) 139*5d9d9091SRichard Lowe decq %rdx / decrement number of bytes by 1 140*5d9d9091SRichard Lowe movb %r11b, (%rdi) / copy %r11b to (%rdi) 141*5d9d9091SRichard Lowe incq %rdi / next byte 142*5d9d9091SRichard Lowe incq %rsi / next byte 143*5d9d9091SRichard Lowe cmpb $0, %r11b / compare %r11b with a null byte 144*5d9d9091SRichard Lowe je .L9 / if %r11b is a null, goto .L9 145*5d9d9091SRichard Lowe jmp .L4 / goto .L4 146*5d9d9091SRichard Lowe .align 4 147*5d9d9091SRichard Lowe.L8: 148*5d9d9091SRichard Lowe movb $0, (%rdi) / null termination 149*5d9d9091SRichard Lowe.L9: 150*5d9d9091SRichard Lowe ret 151*5d9d9091SRichard Lowe SET_SIZE(strncat) 152