diff options
author | 2023-10-10 14:33:42 +0000 | |
---|---|---|
committer | 2023-10-10 14:33:42 +0000 | |
commit | af1a266670d040d2f4083ff309d732d648afba2a (patch) | |
tree | 2fc46203448ddcc6f81546d379abfaeb323575e9 /roms/qemu-palcode/strlen.S | |
parent | e02cda008591317b1625707ff8e115a4841aa889 (diff) |
Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec
Diffstat (limited to 'roms/qemu-palcode/strlen.S')
-rw-r--r-- | roms/qemu-palcode/strlen.S | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/roms/qemu-palcode/strlen.S b/roms/qemu-palcode/strlen.S new file mode 100644 index 000000000..5a77d7293 --- /dev/null +++ b/roms/qemu-palcode/strlen.S @@ -0,0 +1,59 @@ +/* + * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) + * + * Finds length of a 0-terminated string. Optimized for the + * Alpha architecture: + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * - does binary search to find 0 byte in last + * quadword (HAKMEM needed 12 instructions to + * do this instead of the 9 instructions that + * binary search needs). + */ + + .set noreorder + .set noat + + .align 3 + + .globl strlen + .ent strlen +strlen: + .frame $sp, 0, $26, 0 + .prologue 0 + + ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) + lda $2, -1($31) + insqh $2, $16, $2 + andnot $16, 7, $0 + or $2, $1, $1 + cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne $2, found + +loop: ldq $1, 8($0) + addq $0, 8, $0 # addr += 8 + nop # helps dual issue last two insns + cmpbge $31, $1, $2 + beq $2, loop + +found: blbs $2, done # make aligned case fast + negq $2, $3 + and $2, $3, $2 + + and $2, 0x0f, $1 + addq $0, 4, $3 + cmoveq $1, $3, $0 + + and $2, 0x33, $1 + addq $0, 2, $3 + cmoveq $1, $3, $0 + + and $2, 0x55, $1 + addq $0, 1, $3 + cmoveq $1, $3, $0 + +done: subq $0, $16, $0 + ret $31, ($26) + + .end strlen |