aboutsummaryrefslogtreecommitdiffstats
path: root/roms/edk2/MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm
blob: 3ebcfd5b4cfd9f29f864079488d4f660f21ad67d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
;------------------------------------------------------------------------------
;
; CopyMem() worker for ARM
;
; This file started out as C code that did 64 bit moves if the buffer was
; 32-bit aligned, else it does a byte copy. It also does a byte copy for
; any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
;
; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
;------------------------------------------------------------------------------

    EXPORT  InternalMemCopyMem
    AREA    SetMem, CODE, READONLY
    THUMB

InternalMemCopyMem
  stmfd  sp!, {r4-r11, lr}
  // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
  mov  r11, r0
  mov  r10, r0
  mov  r12, r2
  mov  r14, r1

memcopy_check_overlapped
  cmp  r11, r1
  // If (dest < source)
  bcc  memcopy_check_optim_default

  // If (source + length < dest)
  rsb  r3, r1, r11
  cmp  r12, r3
  bcc  memcopy_check_optim_default
  b     memcopy_check_optim_overlap

memcopy_check_optim_default
  // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
  tst  r0, #0xF
  movne  r0, #0
  bne   memcopy_default
  tst  r1, #0xF
  movne  r3, #0
  moveq  r3, #1
  cmp  r2, #31
  movls  r0, #0
  andhi  r0, r3, #1
  b     memcopy_default

memcopy_check_optim_overlap
  // r10 = dest_end, r14 = source_end
  add  r10, r11, r12
  add  r14, r12, r1

  // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
  cmp  r2, #31
  movls  r0, #0
  movhi  r0, #1
  tst  r10, #0xF
  movne  r0, #0
  tst  r14, #0xF
  movne  r0, #0
  b  memcopy_overlapped

memcopy_overlapped_non_optim
  // We read 1 byte from the end of the source buffer
  sub  r3, r14, #1
  sub  r12, r12, #1
  ldrb  r3, [r3, #0]
  sub  r2, r10, #1
  cmp  r12, #0
  // We write 1 byte at the end of the dest buffer
  sub  r10, r10, #1
  sub  r14, r14, #1
  strb  r3, [r2, #0]
  bne  memcopy_overlapped_non_optim
  b   memcopy_end

// r10 = dest_end, r14 = source_end
memcopy_overlapped
  // Are we in the optimized case ?
  cmp  r0, #0
  beq  memcopy_overlapped_non_optim

  // Optimized Overlapped - Read 32 bytes
  sub  r14, r14, #32
  sub  r12, r12, #32
  cmp  r12, #31
  ldmia  r14, {r2-r9}

  // If length is less than 32 then disable optim
  movls  r0, #0

  cmp  r12, #0

  // Optimized Overlapped - Write 32 bytes
  sub  r10, r10, #32
  stmia  r10, {r2-r9}

  // while (length != 0)
  bne  memcopy_overlapped
  b   memcopy_end

memcopy_default_non_optim
  // Byte copy
  ldrb  r3, [r14], #1
  sub  r12, r12, #1
  strb  r3, [r10], #1

memcopy_default
  cmp  r12, #0
  beq  memcopy_end

// r10 = dest, r14 = source
memcopy_default_loop
  cmp  r0, #0
  beq  memcopy_default_non_optim

  // Optimized memcopy - Read 32 Bytes
  sub  r12, r12, #32
  cmp  r12, #31
  ldmia  r14!, {r2-r9}

  // If length is less than 32 then disable optim
  movls  r0, #0

  cmp  r12, #0

  // Optimized memcopy - Write 32 Bytes
  stmia  r10!, {r2-r9}

  // while (length != 0)
  bne  memcopy_default_loop

memcopy_end
  mov  r0, r11
  ldmfd  sp!, {r4-r11, pc}

  END