aboutsummaryrefslogtreecommitdiffstats
path: root/roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
diff options
context:
space:
mode:
Diffstat (limited to 'roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm')
-rw-r--r--roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm118
1 files changed, 118 insertions, 0 deletions
diff --git a/roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm b/roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
new file mode 100644
index 000000000..27a1efc8c
--- /dev/null
+++ b/roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
@@ -0,0 +1,118 @@
+;
+; Copyright (c) 2013 - 2016, Linaro Limited
+; All rights reserved.
+; SPDX-License-Identifier: BSD-2-Clause-Patent
+;
+
+; Parameters and result.
+#define src1 r0
+#define src2 r1
+#define limit r2
+#define result r0
+
+; Internal variables.
+#define data1 r3
+#define data2 r4
+#define limit_wd r5
+#define diff r6
+#define tmp1 r7
+#define tmp2 r12
+#define pos r8
+#define mask r14
+
+ EXPORT InternalMemCompareMem
+ THUMB
+ AREA CompareMem, CODE, READONLY
+
+InternalMemCompareMem
+ push {r4-r8, lr}
+ eor tmp1, src1, src2
+ tst tmp1, #3
+ bne Lmisaligned4
+ ands tmp1, src1, #3
+ bne Lmutual_align
+ add limit_wd, limit, #3
+ nop.w
+ lsr limit_wd, limit_wd, #2
+
+ ; Start of performance-critical section -- one 32B cache line.
+Lloop_aligned
+ ldr data1, [src1], #4
+ ldr data2, [src2], #4
+Lstart_realigned
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 ; Non-zero if differences found.
+ cbnz diff, L0
+ bne Lloop_aligned
+ ; End of performance-critical section -- one 32B cache line.
+
+ ; Not reached the limit, must have found a diff.
+L0
+ cbnz limit_wd, Lnot_limit
+
+ // Limit % 4 == 0 => all bytes significant.
+ ands limit, limit, #3
+ beq Lnot_limit
+
+ lsl limit, limit, #3 // Bits -> bytes.
+ mov mask, #~0
+ lsl mask, mask, limit
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+ orr diff, diff, mask
+
+Lnot_limit
+ rev diff, diff
+ rev data1, data1
+ rev data2, data2
+
+ ; The MS-non-zero bit of DIFF marks either the first bit
+ ; that is different, or the end of the significant data.
+ ; Shifting left now will bring the critical information into the
+ ; top bits.
+ clz pos, diff
+ lsl data1, data1, pos
+ lsl data2, data2, pos
+
+ ; But we need to zero-extend (char is unsigned) the value and then
+ ; perform a signed 32-bit subtraction.
+ lsr data1, data1, #28
+ sub result, data1, data2, lsr #28
+ pop {r4-r8, pc}
+
+Lmutual_align
+ ; Sources are mutually aligned, but are not currently at an
+ ; alignment boundary. Round down the addresses and then mask off
+ ; the bytes that precede the start point.
+ bic src1, src1, #3
+ bic src2, src2, #3
+ add limit, limit, tmp1 ; Adjust the limit for the extra.
+ lsl tmp1, tmp1, #2 ; Bytes beyond alignment -> bits.
+ ldr data1, [src1], #4
+ neg tmp1, tmp1 ; Bits to alignment -32.
+ ldr data2, [src2], #4
+ mov tmp2, #~0
+
+ ; Little-endian. Early bytes are at LSB.
+ lsr tmp2, tmp2, tmp1 ; Shift (tmp1 & 31).
+ add limit_wd, limit, #3
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ lsr limit_wd, limit_wd, #2
+ b Lstart_realigned
+
+Lmisaligned4
+ sub limit, limit, #1
+L1
+ // Perhaps we can do better than this.
+ ldrb data1, [src1], #1
+ ldrb data2, [src2], #1
+ subs limit, limit, #1
+ it cs
+ cmpcs data1, data2
+ beq L1
+ sub result, data1, data2
+ pop {r4-r8, pc}
+
+ END