/* * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) * * Finds length of a 0-terminated string. Optimized for the * Alpha architecture: * * - memory accessed as aligned quadwords only * - uses bcmpge to compare 8 bytes in parallel * - does binary search to find 0 byte in last * quadword (HAKMEM needed 12 instructions to * do this instead of the 9 instructions that * binary search needs). */ .set noreorder .set noat .align 3 .globl strlen .ent strlen strlen: ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) lda $2, -1($31) insqh $2, $16, $2 andnot $16, 7, $0 or $2, $1, $1 cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 bne $2, found loop: ldq $1, 8($0) addq $0, 8, $0 # addr += 8 nop # helps dual issue last two insns cmpbge $31, $1, $2 beq $2, loop found: blbs $2, done # make aligned case fast negq $2, $3 and $2, $3, $2 and $2, 0x0f, $1 addq $0, 4, $3 cmoveq $1, $3, $0 and $2, 0x33, $1 addq $0, 2, $3 cmoveq $1, $3, $0 and $2, 0x55, $1 addq $0, 1, $3 cmoveq $1, $3, $0 done: subq $0, $16, $0 ret $31, ($26) .end strlen