MODULE  ARM_MEMORY

         PUBLIC  ARM_MEMCPY
PUBLIC ARM_MEMSET
PUBLIC ARM_MEMSET8
PUBLIC ARM_MEMSET16
PUBLIC ARM_MEMSET32 SECTION .text:CODE:NOROOT()
CODE32 ;-------------------------------------------------------------------------------
; void ARM_MEMCPY(void* pDest, void* pSrc, U32 NumBytes)
;
; Function description
; Copy data in memory from source address to destination address.
;
; Register usage:
;
; R0 pDest
; R1 pSrc
; R2 NumBytes
;
; R3 Used for data transfers
; R4 Used for data transfers
; R12 Used for data transfers
; R14 Used for data transfers
;
; R13 SP
; R14 LR (contains return address)
; R15 PC
;
;-------------------------------------------------------------------------------
ARM_MEMCPY:
;-------------------------------------------------------------------------------
cmp R2, #+ ; R2 = NumBytes
bls ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R12, R0, #+ ; R0 = destination address
beq ARM_MEMCPY_DestIsDWordAligned ; Is destination address already word aligned ? ;-------------------------------------------------------------------------------
; Handle as much bytes as necessary to align destination address
;
ldrb R3, [R1], #+ ; We need at least one byte to the next word alignment, so we read one.
cmp R12, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R12 ; Adjust NumBytes : 1, 2, 3
ldrbls R12, [R1], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
strb R3, [R0], #+
ldrbcc R3, [R1], #+ ; Carry clear (CC)? -> We need one more byte
strbls R12, [R0], #+
sub R2, R2, #+ ; Adjust NumBytes
strbcc R3, [R0], #+ ; now destination address already is word aligned ;-------------------------------------------------------------------------------
; Choose best way to transfer data
;
ARM_MEMCPY_DestIsDWordAligned:
ands R3, R1, #+
beq ARM_MEMCPY_HandleBulkWordData ; If source and destination are aligned, use bulk word transfer subs R2, R2, #+
bcc ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word left, use single byte transfer ldr R12, [R1, -R3]! ; Read first mis-aligned data word and word align source address
cmp R3, #+
beq ARM_MEMCPY_Loop16BitShift bhi ARM_MEMCPY_Loop24BitShift ;-------------------------------------------------------------------------------
; Handle data in units of word
;
; This is done by reading mis-aligned words from source address and
; shift them into the right alignment. After this the next data word
; will be read to complete the missing data part.
;
ARM_MEMCPY_Loop8BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop8BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ARM_MEMCPY_Loop16BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop16BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ARM_MEMCPY_Loop24BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop24BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ;-------------------------------------------------------------------------------
; Handle large bulk data in blocks of 8 words (32 bytes)
;
ARM_MEMCPY_HandleBulkWordData:
subs R2, R2, #+0x20
stmdb SP!, {R4, LR}
bcc ARM_MEMCPY_HandleTrailingWords ARM_MEMCPY_LoopHandleBulkWord:
ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
stm R0!, {R3, R4, R12, LR}
ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
stm R0!, {R3, R4, R12, LR}
subs R2, R2, #+0x20
bcs ARM_MEMCPY_LoopHandleBulkWord ;-------------------------------------------------------------------------------
; Handle trailing 7 words
;
ARM_MEMCPY_HandleTrailingWords:
movs R12, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmcs R1!, {R3, R4, R12, LR} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
stmcs R0!, {R3, R4, R12, LR}
ldmmi R1!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
stmmi R0!, {R3, R4} movs R12, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmia SP!, {R4, LR}
ldrcs R3, [R1], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
strcs R3, [R0], #+
bxeq LR ;-------------------------------------------------------------------------------
; Handle trailing 3 bytes
;
; N Z C V Q ***** I F T M4 3 2 1 0
; N = bit[31]
; C = last shift bit : shift
; C = 1 ADD/CMN has carry bit
; C = 0 SUB/CMP no borrow bit
; xxxxxxxxxxxxxxxxxxxx10 << 31 : N=0, C=1
; xxxxxxxxxxxxxxxxxxxx01 << 31 : N=1, C=0
; BMI : N=1
; BCS : C=1
ARM_MEMCPY_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrbmi R2, [R1], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
ldrbcs R3, [R1], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
ldrbcs R12, [R1], #+
strbmi R2, [R0], #+
strbcs R3, [R0], #+
strbcs R12, [R0], #+
bx LR ;-------------------------------------------------------------------------------
; void ARM_MEMSET(void* pDest, U32 c, U32 NumBytes)
;
; Function description
; Copy data in memory from source address to destination address.
;
; Register usage:
;
; R0 pDest
; R1 c
; R2 NumBytes
;
; R3 Used for data transfers
; R4 Used for data transfers
; R5 Used for data transfers
; R6 Used for data transfers
;
; R13 SP
; R14 LR (contains return address)
; R15 PC
;
;-------------------------------------------------------------------------------
ARM_MEMSET:
;-------------------------------------------------------------------------------
orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #+ ; R2 = NumBytes
bls ARM_MEMSET_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R3, R0, #+ ; R0 = destination address
beq ARM_MEMSET_DestIsAligned ; Is destination address already word aligned ? ; Handle as much bytes as necessary to align destination address strb R1, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
cmp R3, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R3 ; Adjust NumBytes
strbls R1, [R0], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
sub R2, R2, #+ ; Adjust NumBytes
strbcc R1, [R0], #+ ; Carry clear (CC)? -> We need one more byte ; Choose best way to transfer data ARM_MEMSET_DestIsAligned: ; destination is aligned, use bulk word transfer ; Handle large bulk data in blocks of 8 words (32 bytes) ARM_MEMSET_HandleBulkWordData:
stmdb SP!, {R4, R5, R6} mov R3, R1, LSL #+ ; Transfer 16 bytes at once
mov R4, R1, LSL #+
mov R5, R1, LSL #+ subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
bcc ARM_MEMSET_HandleTrailingWords ARM_MEMSET_LoopHandleBulkWord:
stm R0!, {R1, R3, R4, R5}
stm R0!, {R1, R3, R4, R5}
subs R2, R2, #+0x20
bcs ARM_MEMSET_LoopHandleBulkWord ; Handle trailing 7 words ARM_MEMSET_HandleTrailingWords:
movs R6, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
stmcs R0!, {R1, R3, R4, R5} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
stmmi R0!, {R1, R3} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set) movs R6, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
strcs R1, [R0], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set) ldmia SP!, {R4, R5, R6}
bxeq LR ; Z flag contain no Trailing Bytes ; Handle trailing 3 bytes ARM_MEMSET_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
strbmi R1, [R0], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
strbcs R1, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
strbcs R1, [R0], #+
bx LR ; int ARM_MEMSET8(void* pDest, U32 c, U32 NumBytes);
;-------------------------------------------------------------------------------
ARM_MEMSET8:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5}
cmp R2, #
blt ARM_MEMSET8_loop3 ; Alignment is unknown
tst R0, #
strneb R1, [R0], #
subne R2, R2, # ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
orr R1, R1, R1, LSL #
tst R0, #
strneh R1, [R0], #
subne R2, R2, # ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
orr R1, R1, R1, LSL #
mov R3, R1
cmp R2, #
blt ARM_MEMSET8_loop2
tst R0, #
strne R1, [R0], #
subne R2, R2, #
tst R0, #
stmneia R0!, {R1, R3}
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
ARM_MEMSET8_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET8_loop1
add R2, R2, # ARM_MEMSET8_loop2:
; Copy up to 3 remaining 32-bit values
tst R2, #
stmneia R0!, {R1, R3}
tst R2, #
strne R1, [R0], #
and R2, R2, # ARM_MEMSET8_loop3:
; Copy up to 3 remaining bytes
subs R2, R2, #
strgeb R1, [R0], #
subs R2, R2, #
strgeb R1, [R0], #
subs R2, R2, #
strgeb R1, [R0], #
ldmia SP!, {R4, R5}
bx LR ; int ARM_MEMSET16(void* pDest, U32 c, U32 NumHalfWords);
;-------------------------------------------------------------------------------
ARM_MEMSET16:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5} cmp R2, #
blt ARM_MEMSET16_HandleTrailingHalfWord ; 1 or 0 ; Alignment is known to be at least 16-bit
tst R0, #
strneh R1, [R0], # ; xxxx-xx10 --->
subne R2, R2, # ; xxxx-xx00 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
orr R1, R1, R1, LSL #
mov R4, R1 cmp R2, #
blt ARM_MEMSET16_HandleTrailingWords ; 7, 6, ... 0 tst R0, #
strne R1, [R0], # ; xxxx-x100 --->
subne R2, R2, # ; xxxx-x000 ---> ; Now we are 64-bit aligned
tst R0, #
stmneia R0!, {R1, R4} ; xxxx-1000 --->
subne R2, R2, # ; xxxx-0000 ---> ARM_MEMSET16_HandleBulkWordData:
; Now we are 128-bit aligned
mov R5, R1
mov R3, R1 ARM_MEMSET16_LoopHandleBulkWord:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET16_LoopHandleBulkWord
add R2, R2, # ARM_MEMSET16_HandleTrailingWords:
; Copy up to 3 remaining 32-bit values
tst R2, #
stmneia R0!, {R1, R4} tst R2, #
strne R1, [R0], # and R2, R2, # ARM_MEMSET16_HandleTrailingHalfWord:
; Copy up to 1 remaining 16-bit value
subs R2, R2, #
strgeh R1, [R0], # ldmia SP!, {R4, R5}
bx LR ; int ARM_MEMSET32(void* pDest, U32 c, U32 NumWords);
;-------------------------------------------------------------------------------
ARM_MEMSET32:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5} cmp R2, #
blt ARM_MEMSET32_loop2 ; Alignment is known to be at least 32-bit
mov R3, R1 tst R0, #
strne R1, [R0], #
subne R2, R2, # ; Now we are 64-bit aligned
tst R0, #
stmneia R0!, {R1, R3}
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
ARM_MEMSET32_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET32_loop1
add R2, R2, # ARM_MEMSET32_loop2:
; Copy up to 3 remaining 32-bit values
subs R2, R2, #
strge R1, [R0], #
subs R2, R2, #
strge R1, [R0], #
subs R2, R2, #
strge R1, [R0], # ldmia SP!, {R4, R5}
bx LR ;-__arm void ARM_memxor(void* pDest, U32 c, U32 NumBytes);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor:
;-------------------------------------------------------------------------------
orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #+ ; R2 = NumBytes
bls arm_memxor_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R3, R0, #+ ; R0 = destination address
beq arm_memxor_DestIsAligned ; Is destination address already word aligned ? ;-
; Handle as much bytes as necessary to align destination address
;-
ldrb R12, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eor R12, R12, r1
strb R12, [R0], #+ ; We need at least one byte to the next word alignment, so we read one. cmp R3, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R3 ; Adjust NumBytes ldrbls R3, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eorls R3, R3, r1
strbls R3, [R0], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address sub R2, R2, #+ ; Adjust NumBytes ldrbcc R3, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eorcc R3, R3, r1
strbcc R3, [R0], #+ ; Carry clear (CC)? -> We need one more byte ;-
; Choose best way to transfer data
;-
arm_memxor_DestIsAligned: ; destination is aligned, use bulk word transfer
;-
; Handle large bulk data in blocks of 8 words (32 bytes)
;-
arm_memxor_HandleBulkWordData:
stmdb SP!, {R4, R5, R6, R7} subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
bcc arm_memxor_HandleTrailingWords arm_memxor_LoopHandleBulkWord:
ldm R0, {R3, R4, R5, R6}
eor r3, r3, r1
eor r4, r4, r1
eor r5, r5, r1
eor r6, r6, r1
stm R0!, {R3, R4, R5, R6} ldm R0, {R3, R4, R5, R6}
eor r3, r3, r1
eor r4, r4, r1
eor r5, r5, r1
eor r6, r6, r1
stm R0!, {R3, R4, R5, R6} subs R2, R2, #+0x20
bcs arm_memxor_LoopHandleBulkWord ;-
; Handle trailing 7 words
;-
arm_memxor_HandleTrailingWords:
movs R7, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmcs R0, {R3, R4, R5, R6}
eorcs r3, r3, r1
eorcs r4, r4, r1
eorcs r5, r5, r1
eorcs r6, r6, r1
stmcs R0!, {R3, R4, R5, R6} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is xor) ldmmi R0, {R3, R4}
eormi r3, r3, r1
eormi r4, r4, r1
stmmi R0!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is xor) movs R7, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrcs R3, [R0]
eorcs r3, r3, r1
strcs R3, [R0], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is xor) ldmia SP!, {R4, R5, R6, R7}
bxeq LR ; Z flag contain no Trailing Bytes ;-
; Handle trailing 3 bytes
;-
arm_memxor_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrmi R2, [R0]
eormi R2, R2, r1
strbmi R2, [R0], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is xor) ldrcs R2, [R0]
eorcs R2, R2, r1
strbcs R2, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) ldrcs R2, [R0]
eorcs R2, R2, r1
strbcs R2, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) bx LR ;-__arm int arm_memxor8(void* pDest, U32 c, U32 NumBytes);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor8:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6} orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #
blt arm_memxor8_loop3 ; Alignment is unknown
tst R0, # ldrneb R6, [R0]
eorne R6, r6, R1
strneb R6, [R0], # subne R2, R2, # ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
tst R0, # ldrneh R6, [R0]
eorne R6, r6, R1
strneh R6, [R0], # subne R2, R2, # ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
cmp R2, #
blt arm_memxor8_loop2
tst R0, # ldrne R6, [R0]
eorne R6, r6, R1
strne R6, [R0], #
; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
subne R2, R2, #
tst R0, # ldmneia R0, {R3, R6}
eorne R3, r3, R1
eorne R6, r6, R1
stmneia R0!, {R3, R6} subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
arm_memxor8_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor8_loop1
add R2, R2, # arm_memxor8_loop2:
; Copy up to 3 remaining 32-bit values
tst R2, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} tst R2, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # and R2, R2, # arm_memxor8_loop3:
; Copy up to 3 remaining bytes
subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R3, [R0], # subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R1, [R0], # subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R1, [R0], # ldmia SP!, {R4, R5, R6}
bx LR ;-__arm int arm_memxor16(void* pDest, U32 c, U32 NumHalfWords);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor16:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6}
orr R1, R1, R1, LSL #+ cmp R2, #
blt arm_memxor16_HandleTrailingHalfWord ; 1 or 0 ; Alignment is known to be at least 16-bit
tst R0, # ldrneh R6, [R0]
eorne R6, r6, R1
strneh R6, [R0], # ; xxxx-xx10 ---> subne R2, R2, # ; xxxx-xx00 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
cmp R2, #
blt arm_memxor16_HandleTrailingWords ; 7, 6, ... 0 tst R0, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # ; xxxx-x100 ---> subne R2, R2, # ; xxxx-x000 ---> ; Now we are 64-bit aligned
tst R0, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} ; xxxx-1000 ---> subne R2, R2, # ; xxxx-0000 ---> arm_memxor16_HandleBulkWordData:
; Now we are 128-bit aligned
mov R5, R1
mov R6, R1 arm_memxor16_LoopHandleBulkWord:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor16_LoopHandleBulkWord
add R2, R2, # arm_memxor16_HandleTrailingWords:
; Copy up to 3 remaining 32-bit values
tst R2, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} tst R2, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # and R2, R2, # arm_memxor16_HandleTrailingHalfWord:
; Copy up to 1 remaining 16-bit value
subs R2, R2, # ldrgeh R3, [R0]
eorge r3, r3, r1
strgeh R3, [R0], # ldmia SP!, {R4, R5, R6}
bx LR ;-__arm int arm_memxor32(void* pDest, U32 c, U32 NumWords);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor32:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6} cmp R2, #
blt arm_memxor32_loop2 ; Alignment is known to be at least 32-bit, is it 64-bit aligned ?
tst R0, #
; No, it is 32-bit aligned
ldrne R3, [R0]
eorne R3, r3, R1
strne R3, [R0], #
subne R2, R2, # ; Now we are 64-bit aligned, is it 128-bit aligned ?
tst R0, #
; No, it is 64-bit aligned
ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} ; xxxx-1000 --->
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
arm_memxor32_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor32_loop1
add R2, R2, # arm_memxor32_loop2:
; Copy up to 3 remaining 32-bit values subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # ldmia SP!, {R4, R5, R6}
bx LR END

ARM Memory Copy的更多相关文章

  1. Android笔记:C memory copy

    socket通讯问题之一: 在c中按字节发送数据  比如设备1状态(1字节)值(1字节)设备2状态(1字节)值(1字节)....这种格式拆分的问题 在c中可以利用struct的 memory copy ...

  2. 【ARM-Linux开发】Linux内存管理:ARM Memory Layout以及mmu配置

    原文:Linux内存管理:ARM Memory Layout以及mmu配置 在内核进行page初始化以及mmu配置之前,首先需要知道整个memory map. 1. ARM Memory Layout ...

  3. 阅读ARM Memory(L1/L2/MMU)笔记

    <ARM Architecture Reference Manual ARMv8-A>里面有Memory层级框架图,从中可以看出L1.L2.DRAM.Disk.MMU之间的关系,以及他们在 ...

  4. [转]Whirlwind Tour of ARM Assembly

    ref:http://www.coranac.com/tonc/text/asm.htm 23.1. Introduction Very broadly speaking, you can divid ...

  5. 附录:ARM 手册 词汇表

    来自:<DDI0406C_C_arm_architecture_reference_manual.pdf>p2723 能够查询到:“RAZ RAO WI 等的意思” RAZ:Read-As ...

  6. Windows And Video Memory

    MSDN Blogs > Zemblanity > Windows And Video Memory   Windows And Video Memory Tom_Mulcahy 11 F ...

  7. Off-heap Memory in Apache Flink and the curious JIT compiler

    https://flink.apache.org/news/2015/09/16/off-heap-memory.html   Running data-intensive code in the J ...

  8. 如何展开Linux Memory Management学习?

    Linux的进程和内存是两座大山,没有翻过这两座大山对于内核的理解始终是不完整的. 关于Linux内存管理,在开始之前做些准备工作. 首先bing到了Quora的<How can one rea ...

  9. ARM架构相关学习归纳总结

    ARM作为一个生态不仅提供了CPU Core,还提供了一系列相关的IP,比如GIC.MMU.AMBA.CoreLink.CoreSight.Mali等等. 其他还包括Debug工具.开发工具.IDE等 ...

随机推荐

  1. Android改进版CoverFlow效果控件

    最近研究了一下如何在Android上实现CoverFlow效果的控件,其实早在2010年,就有Neil Davies开发并开源出了这个控件,Neil大神的这篇博客地址http://www.inter- ...

  2. shell 循环数组

    循环数组 ;i<${#o[*]};i++)) do echo ${o[$i]} done

  3. zabbix user parameters和Loadable modules的使用方法介绍

    目录 需求 实现 原理 前端配置 后端配置 shell实现 python实现 C实现 需求: 采集主机的-/+ buffers/cache  free的数据 实现: 采集/proc/meminfo中的 ...

  4. 第12月第25天 ImagePickerSheetController

    1.ImagePickerSheetController open class ImagePickerSheetController: UIViewController, UITableViewDat ...

  5. MVC js动态生成from提交数据然后生成文件下载

    前台: 点击触发下面事件 var turnForm = document.createElement("form"); //一定要加入到body中!! document.body. ...

  6. Project Euler Problem4

    Largest palindrome product Problem 4 A palindromic number reads the same both ways. The largest pali ...

  7. android调节声音大小

    android调节声音大小 1.背景音乐的一些知识 网上好多关于背景音乐添加用到的类: MediaPlayer,SoundPool,AudioManager的资料,可是有时候解决不了我们在开发中遇到的 ...

  8. Ibatis.Net 数据库操作学习(四)

    一.查询select 还记得第一篇示例中是如何读出数据库里3条数据的吗?就是调用了一个QueryForList方法,从方法名就知道,查询返回列表. 1.QueryForList  返回List< ...

  9. java 重新抛出异常

    一.有时希望把刚捕获的异常重新抛出,尤其时在使用Exception捕获所以异常的时候,既然已经得到了对当前异常对象的引用,可以重新把它抛出: catch(Exception e){ System.ou ...

  10. JS实现集合和ECMA6集合

    集合类似于数组,但是集合中的元素是唯一的,没有重复值的.就像你学高中数学的概念一样,集合还可以做很多比如,并集,交集,差集的计算.在ECMA6之前,JavaScript没有提供原生的Set类,所以只能 ...