1/* 2 * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) 3 * 4 * Finds length of a 0-terminated string. Optimized for the 5 * Alpha architecture: 6 * 7 * - memory accessed as aligned quadwords only 8 * - uses bcmpge to compare 8 bytes in parallel 9 * - does binary search to find 0 byte in last 10 * quadword (HAKMEM needed 12 instructions to 11 * do this instead of the 9 instructions that 12 * binary search needs). 13 */ 14 15 .set noreorder 16 .set noat 17 18 .align 3 19 20 .globl strlen 21 .ent strlen 22 23strlen: 24 ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) 25 lda $2, -1($31) 26 insqh $2, $16, $2 27 andnot $16, 7, $0 28 or $2, $1, $1 29 cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 30 bne $2, found 31 32loop: ldq $1, 8($0) 33 addq $0, 8, $0 # addr += 8 34 nop # helps dual issue last two insns 35 cmpbge $31, $1, $2 36 beq $2, loop 37 38found: blbs $2, done # make aligned case fast 39 negq $2, $3 40 and $2, $3, $2 41 42 and $2, 0x0f, $1 43 addq $0, 4, $3 44 cmoveq $1, $3, $0 45 46 and $2, 0x33, $1 47 addq $0, 2, $3 48 cmoveq $1, $3, $0 49 50 and $2, 0x55, $1 51 addq $0, 1, $3 52 cmoveq $1, $3, $0 53 54done: subq $0, $16, $0 55 ret $31, ($26) 56 57 .end strlen 58