diff options
Diffstat (limited to 'nuttx/arch/arm/src/armv7-m')
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/Kconfig | 51 | ||||
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/Toolchain.defs | 266 | ||||
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/memcpy.S | 351 | ||||
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/up_elf.c | 450 | ||||
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/up_exception.S | 4 | ||||
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/up_hardfault.c | 4 | ||||
-rw-r--r-- | nuttx/arch/arm/src/armv7-m/up_memcpy.S | 416 |
7 files changed, 1186 insertions, 356 deletions
diff --git a/nuttx/arch/arm/src/armv7-m/Kconfig b/nuttx/arch/arm/src/armv7-m/Kconfig new file mode 100644 index 000000000..dc5aa3915 --- /dev/null +++ b/nuttx/arch/arm/src/armv7-m/Kconfig @@ -0,0 +1,51 @@ +# +# For a description of the syntax of this configuration file, +# see misc/tools/kconfig-language.txt. +# + +comment "ARMV7M Configuration Options" + +choice + prompt "Toolchain Selection" + default ARMV7M_TOOLCHAIN_CODESOURCERYW if HOST_WINDOWS + default ARMV7M_TOOLCHAIN_GNU_EABI if !HOST_WINDOWS + +config ARMV7M_TOOLCHAIN_ATOLLIC + bool "Atollic Lite/Pro for Windows" + depends on HOST_WINDOWS + +config ARMV7M_TOOLCHAIN_BUILDROOT + bool "Buildroot (Cygwin or Linux)" + depends on !WINDOWS_NATIVE + +config ARMV7M_TOOLCHAIN_CODEREDL + bool "CodeRed for Linux" + depends on HOST_LINUX + +config ARMV7M_TOOLCHAIN_CODEREDW + bool "CodeRed for Windows" + depends on HOST_WINDOWS + +config ARMV7M_TOOLCHAIN_CODESOURCERYL + bool "CodeSourcery GNU toolchain under Linux" + depends on HOST_LINUX + +config ARMV7M_TOOLCHAIN_CODESOURCERYW + bool "CodeSourcery GNU toolchain under Windows" + depends on HOST_WINDOWS + +config ARMV7M_TOOLCHAIN_DEVKITARM + bool "devkitARM GNU toolchain" + depends on HOST_WINDOWS + +config ARMV7M_TOOLCHAIN_GNU_EABI + bool "Generic GNU EABI toolchain" + ---help--- + This option should work for any modern GNU toolchain (GCC 4.5 or newer) + configured for arm-none-eabi. + +config ARMV7M_TOOLCHAIN_RAISONANCE + bool "STMicro Raisonance for Windows" + depends on HOST_WINDOWS + +endchoice diff --git a/nuttx/arch/arm/src/armv7-m/Toolchain.defs b/nuttx/arch/arm/src/armv7-m/Toolchain.defs new file mode 100644 index 000000000..e214ce8bd --- /dev/null +++ b/nuttx/arch/arm/src/armv7-m/Toolchain.defs @@ -0,0 +1,266 @@ +############################################################################ +# arch/arm/src/armv7-m/Toolchain.defs +# +# Copyright (C) 2012 Gregory Nutt. All rights reserved. +# Author: Gregory Nutt <gnutt@nuttx.org> +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# 3. Neither the name NuttX nor the names of its contributors may be +# used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +############################################################################ + +# Setup for the selected toolchain + +# +# Handle old-style chip-specific toolchain names in the absence of +# a new-style toolchain specification, force the selection of a single +# toolchain and allow the selected toolchain to be overridden by a +# command-line selection. +# + +ifeq ($(filter y, \ + $(CONFIG_LPC43_ATOLLIC_LITE) \ + $(CONFIG_STM32_ATOLLIC_LITE) \ + $(CONFIG_LPC43_ATOLLIC_PRO) \ + $(CONFIG_STM32_ATOLLIC_PRO) \ + $(CONFIG_ARMV7M_TOOLCHAIN_ATOLLIC) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= ATOLLIC +endif +ifeq ($(filter y, \ + $(CONFIG_KINETIS_BUILDROOT) \ + $(CONFIG_LM3S_BUILDROOT) \ + $(CONFIG_LPC17_BUILDROOT) \ + $(CONFIG_LPC43_BUILDROOT) \ + $(CONFIG_SAM3U_BUILDROOT) \ + $(CONFIG_STM32_BUILDROOT) \ + $(CONFIG_ARMV7M_TOOLCHAIN_BUILDROOT) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= BUILDROOT +endif +ifeq ($(filter y, \ + $(CONFIG_LPC17_CODEREDL) \ + $(CONFIG_ARMV7M_TOOLCHAIN_CODEREDL) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= CODEREDL +endif +ifeq ($(filter y, \ + $(CONFIG_LPC17_CODEREDW) \ + $(CONFIG_LPC43_CODEREDW) \ + $(CONFIG_ARMV7M_TOOLCHAIN_CODEREDW) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= CODEREDW +endif +ifeq ($(filter y, \ + $(CONFIG_KINETIS_CODESOURCERYL) \ + $(CONFIG_LM3S_CODESOURCERYL) \ + $(CONFIG_LPC17_CODESOURCERYL) \ + $(CONFIG_LPC43_CODESOURCERYL) \ + $(CONFIG_SAM3U_CODESOURCERYL) \ + $(CONFIG_STM32_CODESOURCERYL) \ + $(CONFIG_ARMV7M_TOOLCHAIN_CODESOURCERYL) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= CODESOURCERYL +endif +ifeq ($(filter y, \ + $(CONFIG_KINETIS_CODESOURCERYW) \ + $(CONFIG_LM3S_CODESOURCERYW) \ + $(CONFIG_LPC17_CODESOURCERYW) \ + $(CONFIG_LPC43_CODESOURCERYW) \ + $(CONFIG_SAM3U_CODESOURCERYW) \ + $(CONFIG_STM32_CODESOURCERYW) \ + $(CONFIG_ARMV7M_TOOLCHAIN_CODESOURCERYW) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= CODESOURCERYW +endif +ifeq ($(filter y, \ + $(CONFIG_KINETIS_DEVKITARM) \ + $(CONFIG_LM3S_DEVKITARM) \ + $(CONFIG_LPC17_DEVKITARM) \ + $(CONFIG_LPC43_DEVKITARM) \ + $(CONFIG_SAM3U_DEVKITARM) \ + $(CONFIG_STM32_DEVKITARM) \ + $(CONFIG_ARMV7M_TOOLCHAIN_DEVKITARM) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= DEVKITARM +endif +ifeq ($(filter y, \ + $(CONFIG_ARMV7M_TOOLCHAIN_GNU_EABI) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= GNU_EABI +endif +ifeq ($(filter y, \ + $(CONFIG_STM32_RAISONANCE) \ + $(CONFIG_ARMV7M_TOOLCHAIN_RAISONANCE) \ + ),y) + CONFIG_ARMV7M_TOOLCHAIN ?= RAISONANCE +endif + +# +# Supported toolchains +# +# TODO - It's likely that all of these toolchains now support the +# CortexM4. Since they are all GCC-based, we could almost +# certainly simplify this further. +# +# Each toolchain definition should set: +# +# CROSSDEV The GNU toolchain triple (command prefix) +# ARCROSSDEV If required, an alternative prefix used when +# invoking ar and nm. +# ARCHCPUFLAGS CPU-specific flags selecting the instruction set +# FPU options, etc. +# MAXOPTIMIZATION The maximum optimization level that results in +# reliable code generation. +# + +# Atollic toolchain under Windows + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),ATOLLIC) + CROSSDEV = arm-atollic-eabi- + ARCROSSDEV = arm-atollic-eabi- + ifneq ($(CONFIG_WINDOWS_NATIVE),y) + WINTOOL = y + endif + ifeq ($(CONFIG_ARCH_CORTEXM4),y) + ifeq ($(CONFIG_ARCH_FPU),y) + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard + else + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft + endif + else ifeq ($(CONFIG_ARCH_CORTEXM3),y) + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft + endif +endif + +# NuttX buildroot under Linux or Cygwin + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),BUILDROOT) + # OABI + # CROSSDEV = arm-nuttx-elf- + # ARCROSSDEV = arm-nuttx-elf- + # ARCHCPUFLAGS = -mtune=cortex-m3 -march=armv7-m -mfloat-abi=soft + # EABI + CROSSDEV = arm-nuttx-eabi- + ARCROSSDEV = arm-nuttx-eabi- + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft + MAXOPTIMIZATION = -Os +endif + +# Code Red RedSuite under Linux + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODEREDL) + CROSSDEV = arm-none-eabi- + ARCROSSDEV = arm-none-eabi- + ifeq ($(CONFIG_ARCH_CORTEXM4),y) + ifeq ($(CONFIG_ARCH_FPU),y) + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard + else + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft + endif + else ifeq ($(CONFIG_ARCH_CORTEXM3),y) + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft + endif +endif + +# Code Red RedSuite under Windows + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODEREDW) + CROSSDEV = arm-none-eabi- + ARCROSSDEV = arm-none-eabi- + ifneq ($(CONFIG_WINDOWS_NATIVE),y) + WINTOOL = y + endif + ifeq ($(CONFIG_ARCH_CORTEXM4),y) + ifeq ($(CONFIG_ARCH_FPU),y) + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard + else + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft + endif + else ifeq ($(CONFIG_ARCH_CORTEXM3),y) + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft + endif +endif + +# CodeSourcery under Linux + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODESOURCERYL) + CROSSDEV = arm-none-eabi- + ARCROSSDEV = arm-none-eabi- + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft + MAXOPTIMIZATION = -O2 +endif + +# CodeSourcery under Windows + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODESOURCERYW) + CROSSDEV = arm-none-eabi- + ARCROSSDEV = arm-none-eabi- + ifneq ($(CONFIG_WINDOWS_NATIVE),y) + WINTOOL = y + endif + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft +endif + +# devkitARM under Windows + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),DEVKITARM) + CROSSDEV = arm-eabi- + ARCROSSDEV = arm-eabi- + ifneq ($(CONFIG_WINDOWS_NATIVE),y) + WINTOOL = y + endif + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft +endif + +# Generic GNU EABI toolchain on OS X, Linux or any typical Posix system + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),GNU_EABI) + CROSSDEV = arm-none-eabi- + ARCROSSDEV = arm-none-eabi- + MAXOPTIMIZATION = -O3 + ifeq ($(CONFIG_ARCH_CORTEXM4),y) + ifeq ($(CONFIG_ARCH_FPU),y) + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard + else + ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft + endif + else ifeq ($(CONFIG_ARCH_CORTEXM3),y) + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft + endif +endif + +# Raisonance RIDE7 under Windows + +ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),RAISONANCE) + CROSSDEV = arm-none-eabi- + ARCROSSDEV = arm-none-eabi- + ifneq ($(CONFIG_WINDOWS_NATIVE),y) + WINTOOL = y + endif + ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft +endif diff --git a/nuttx/arch/arm/src/armv7-m/memcpy.S b/nuttx/arch/arm/src/armv7-m/memcpy.S deleted file mode 100644 index c6d3ff649..000000000 --- a/nuttx/arch/arm/src/armv7-m/memcpy.S +++ /dev/null @@ -1,351 +0,0 @@ -@ -@ armv7m-optimised memcpy, apparently in the public domain -@ -@ Obtained via a posting on the Stellaris forum: -@ http://e2e.ti.com/support/microcontrollers/stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx -@ -@ Posted by rocksoft on Jul 24, 2008 10:19 AM -@ -@ Hi, -@ -@ I recently finished a "memcpy" replacement and thought it might be useful for others... -@ -@ I've put some instructions and the code here: -@ -@ http://www.rock-software.net/downloads/memcpy/ -@ -@ Hope it works for you as well as it did for me. -@ -@ Liam. -@ @ -@ ---------------------------------------------------------------------------- - -.syntax unified - -.thumb - -.cpu cortex-m3 - -@ ---------------------------------------------------------------------------- - - .global memcpy - - -@ ---------------------------------------------------------------------------- -@ Optimised "general" copy routine - -.text - -@ We have 16 possible alignment combinations of src and dst, this jump table directs the copy operation -@ Bits: Src=00, Dst=00 - Long to Long copy -@ Bits: Src=00, Dst=01 - Long to Byte before half word -@ Bits: Src=00, Dst=10 - Long to Half word -@ Bits: Src=00, Dst=11 - Long to Byte before long word -@ Bits: Src=01, Dst=00 - Byte before half word to long -@ Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment -@ Bits: Src=01, Dst=10 - Byte before half word to half word -@ Bits: Src=01, Dst=11 - Byte before half word to byte before long word -@ Bits: Src=10, Dst=00 - Half word to long word -@ Bits: Src=10, Dst=01 - Half word to byte before half word -@ Bits: Src=10, Dst=10 - Half word to half word - Same Alignment -@ Bits: Src=10, Dst=11 - Half word to byte before long word -@ Bits: Src=11, Dst=00 - Byte before long word to long word -@ Bits: Src=11, Dst=01 - Byte before long word to byte before half word -@ Bits: Src=11, Dst=11 - Byte before long word to half word -@ Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment - -MEM_DataCopyTable: - .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 1 - .byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 1 - - .align 2 - -@ ---------------------------------------------------------------------------- - -//#define 10 10 - -MEM_LongCopyTable: - .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 @ 0 bytes left - .byte 0 @ 4 bytes left - .byte (1 * 10) >> 1 @ 8 bytes left - .byte (2 * 10) >> 1 @ 12 bytes left - .byte (3 * 10) >> 1 @ 16 bytes left - .byte (4 * 10) >> 1 @ 20 bytes left - .byte (5 * 10) >> 1 @ 24 bytes left - .byte (6 * 10) >> 1 @ 28 bytes left - .byte (7 * 10) >> 1 @ 32 bytes left - .byte (8 * 10) >> 1 @ 36 bytes left - - .align 2 - -@ ---------------------------------------------------------------------------- -@ r0 = destination, r1 = source, r2 = length - -.thumb_func - -memcpy: - push {r14} - - @ This allows the inner workings to "assume" a minimum amount of bytes - cmp r2, #4 - blt MEM_DataCopyBytes - - and r14, r0, #3 @ Get destination alignment bits - bfi r14, r1, #2, #2 @ Get source alignment bits - ldr r3, =MEM_DataCopyTable @ Jump table base - tbb [r3, r14] @ Perform jump on src/dst alignment bits -MEM_DataCopyJump: - - .align 4 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment -@ 3 bytes to read for long word aligning - -MEM_DataCopy5: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=10, Dst=10 - Half word to half word - Same Alignment -@ 2 bytes to read for long word aligning - -MEM_DataCopy10: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment -@ 1 bytes to read for long word aligning - -MEM_DataCopy15: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=00, Dst=00 - Long to Long copy - -MEM_DataCopy0: - @ Save regs - push {r4-r12} - - cmp r2, #0x28 - blt MEM_DataCopy0_2 - -MEM_DataCopy0_1: - ldmia r1!, {r3-r12} - stmia r0!, {r3-r12} - sub r2, r2, #0x28 - cmp r2, #0x28 - bge MEM_DataCopy0_1 - -MEM_DataCopy0_2: - @ Copy remaining long words - ldr r14, =MEM_LongCopyTable - lsr r11, r2, #0x02 - tbb [r14, r11] - -MEM_LongCopyJump: - ldr.w r3, [r1], #0x04 @ 4 bytes remain - str.w r3, [r0], #0x04 - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r4} @ 8 bytes remain - stmia.w r0!, {r3-r4} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r5} @ 12 bytes remain - stmia.w r0!, {r3-r5} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r6} @ 16 bytes remain - stmia.w r0!, {r3-r6} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r7} @ 20 bytes remain - stmia.w r0!, {r3-r7} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r8} @ 24 bytes remain - stmia.w r0!, {r3-r8} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r9} @ 28 bytes remain - stmia.w r0!, {r3-r9} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r10} @ 32 bytes remain - stmia.w r0!, {r3-r10} - b MEM_LongCopyEnd - ldmia.w r1!, {r3-r11} @ 36 bytes remain - stmia.w r0!, {r3-r11} - -MEM_LongCopyEnd: - pop {r4-r12} - and r2, r2, #0x03 @ All the longs have been copied - -@ ---------------------------------------------------------------------------- - -MEM_DataCopyBytes: - @ Deal with up to 3 remaining bytes - cmp r2, #0x00 - it eq - popeq {pc} - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - subs r2, r2, #0x01 - it eq - popeq {pc} - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - subs r2, r2, #0x01 - it eq - popeq {pc} - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - pop {pc} - - .align 4 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=01, Dst=11 - Byte before half word to byte before long word -@ 3 bytes to read for long word aligning the source - -MEM_DataCopy7: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=10, Dst=00 - Half word to long word -@ 2 bytes to read for long word aligning the source - -MEM_DataCopy8: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=11, Dst=01 - Byte before long word to byte before half word -@ 1 byte to read for long word aligning the source - -MEM_DataCopy13: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=00, Dst=10 - Long to Half word - -MEM_DataCopy2: - cmp r2, #0x28 - blt MEM_DataCopy2_1 - - @ Save regs - push {r4-r12} -MEM_DataCopy2_2: - ldmia r1!, {r3-r12} - - strh r3, [r0], #0x02 - - lsr r3, r3, #0x10 - bfi r3, r4, #0x10, #0x10 - lsr r4, r4, #0x10 - bfi r4, r5, #0x10, #0x10 - lsr r5, r5, #0x10 - bfi r5, r6, #0x10, #0x10 - lsr r6, r6, #0x10 - bfi r6, r7, #0x10, #0x10 - lsr r7, r7, #0x10 - bfi r7, r8, #0x10, #0x10 - lsr r8, r8, #0x10 - bfi r8, r9, #0x10, #0x10 - lsr r9, r9, #0x10 - bfi r9, r10, #0x10, #0x10 - lsr r10, r10, #0x10 - bfi r10, r11, #0x10, #0x10 - lsr r11, r11, #0x10 - bfi r11, r12, #0x10, #0x10 - stmia r0!, {r3-r11} - lsr r12, r12, #0x10 - strh r12, [r0], #0x02 - - sub r2, r2, #0x28 - cmp r2, #0x28 - bge MEM_DataCopy2_2 - pop {r4-r12} - -MEM_DataCopy2_1: @ Read longs and write 2 x half words - cmp r2, #4 - blt MEM_DataCopyBytes - ldr r3, [r1], #0x04 - strh r3, [r0], #0x02 - lsr r3, r3, #0x10 - strh r3, [r0], #0x02 - sub r2, r2, #0x04 - b MEM_DataCopy2 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=01, Dst=00 - Byte before half word to long -@ Bits: Src=01, Dst=10 - Byte before half word to half word -@ 3 bytes to read for long word aligning the source - -MEM_DataCopy4: -MEM_DataCopy6: - @ Read B and write B - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=10, Dst=01 - Half word to byte before half word -@ Bits: Src=10, Dst=11 - Half word to byte before long word -@ 2 bytes to read for long word aligning the source - -MEM_DataCopy9: -MEM_DataCopy11: - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=11, Dst=00 - Byte before long word to long word -@ Bits: Src=11, Dst=11 - Byte before long word to half word -@ 1 byte to read for long word aligning the source - -MEM_DataCopy12: -MEM_DataCopy14: - @ Read B and write B - ldrb r3, [r1], #0x01 - strb r3, [r0], #0x01 - sub r2, r2, #0x01 - -@ ---------------------------------------------------------------------------- -@ Bits: Src=00, Dst=01 - Long to Byte before half word -@ Bits: Src=00, Dst=11 - Long to Byte before long word - -MEM_DataCopy1: @ Read longs, write B->H->B -MEM_DataCopy3: - cmp r2, #4 - blt MEM_DataCopyBytes - ldr r3, [r1], #0x04 - strb r3, [r0], #0x01 - lsr r3, r3, #0x08 - strh r3, [r0], #0x02 - lsr r3, r3, #0x10 - strb r3, [r0], #0x01 - sub r2, r2, #0x04 - b MEM_DataCopy3 - -@ ---------------------------------------------------------------------------- - diff --git a/nuttx/arch/arm/src/armv7-m/up_elf.c b/nuttx/arch/arm/src/armv7-m/up_elf.c new file mode 100644 index 000000000..b838a6905 --- /dev/null +++ b/nuttx/arch/arm/src/armv7-m/up_elf.c @@ -0,0 +1,450 @@ +/**************************************************************************** + * arch/arm/src/armv7-m/up_elf.c + * + * Copyright (C) 2012 Gregory Nutt. All rights reserved. + * Author: Gregory Nutt <gnutt@nuttx.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name NuttX nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************/ + +/**************************************************************************** + * Included Files + ****************************************************************************/ + +#include <nuttx/config.h> + +#include <stdlib.h> +#include <elf32.h> +#include <errno.h> +#include <debug.h> + +#include <arch/elf.h> +#include <nuttx/arch.h> +#include <nuttx/binfmt/elf.h> + +/**************************************************************************** + * Pre-processor Definitions + ****************************************************************************/ + +/**************************************************************************** + * Private Data + ****************************************************************************/ + +/**************************************************************************** + * Private Functions + ****************************************************************************/ + +/**************************************************************************** + * Public Functions + ****************************************************************************/ + +/**************************************************************************** + * Name: arch_checkarch + * + * Description: + * Given the ELF header in 'hdr', verify that the ELF file is appropriate + * for the current, configured architecture. Every architecture that uses + * the ELF loader must provide this function. + * + * Input Parameters: + * hdr - The ELF header read from the ELF file. + * + * Returned Value: + * True if the architecture supports this ELF file. + * + ****************************************************************************/ + +bool arch_checkarch(FAR const Elf32_Ehdr *ehdr) +{ + /* Make sure it's an ARM executable */ + + if (ehdr->e_machine != EM_ARM) + { + bdbg("Not for ARM: e_machine=%04x\n", ehdr->e_machine); + return -ENOEXEC; + } + + /* Make sure that 32-bit objects are supported */ + + if (ehdr->e_ident[EI_CLASS] != ELFCLASS32) + { + bdbg("Need 32-bit objects: e_ident[EI_CLASS]=%02x\n", ehdr->e_ident[EI_CLASS]); + return -ENOEXEC; + } + + /* Verify endian-ness */ + +#ifdef CONFIG_ENDIAN_BIG + if (ehdr->e_ident[EI_DATA] != ELFDATA2MSB) +#else + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) +#endif + { + bdbg("Wrong endian-ness: e_ident[EI_DATA]=%02x\n", ehdr->e_ident[EI_DATA]); + return -ENOEXEC; + } + + /* TODO: Check ABI here. */ + return OK; +} + +/**************************************************************************** + * Name: arch_relocate and arch_relocateadd + * + * Description: + * Perform on architecture-specific ELF relocation. Every architecture + * that uses the ELF loader must provide this function. + * + * Input Parameters: + * rel - The relocation type + * sym - The ELF symbol structure containing the fully resolved value. + * addr - The address that requires the relocation. + * + * Returned Value: + * Zero (OK) if the relocation was successful. Otherwise, a negated errno + * value indicating the cause of the relocation failure. + * + ****************************************************************************/ + +int arch_relocate(FAR const Elf32_Rel *rel, FAR const Elf32_Sym *sym, + uintptr_t addr) +{ + int32_t offset; + uint32_t upper_insn; + uint32_t lower_insn; + + switch (ELF32_R_TYPE(rel->r_info)) + { + case R_ARM_NONE: + { + /* No relocation */ + } + break; + + case R_ARM_PC24: + case R_ARM_CALL: + case R_ARM_JUMP24: + { + bvdbg("Performing PC24 [%d] link at addr %08lx [%08lx] to sym '%s' st_value=%08lx\n", + ELF32_R_TYPE(rel->r_info), (long)addr, (long)(*(uint32_t*)addr), + sym, (long)sym->st_value); + + offset = (*(uint32_t*)addr & 0x00ffffff) << 2; + if (offset & 0x02000000) + { + offset -= 0x04000000; + } + + offset += sym->st_value - addr; + if (offset & 3 || offset <= (int32_t) 0xfe000000 || offset >= (int32_t) 0x02000000) + { + bdbg(" ERROR: PC24 [%d] relocation out of range, offset=%08lx\n", + ELF32_R_TYPE(rel->r_info), offset); + + return -EINVAL; + } + + offset >>= 2; + + *(uint32_t*)addr &= 0xff000000; + *(uint32_t*)addr |= offset & 0x00ffffff; + } + break; + + case R_ARM_ABS32: + case R_ARM_TARGET1: /* New ABI: TARGET1 always treated as ABS32 */ + { + bvdbg("Performing ABS32 link at addr=%08lx [%08lx] to sym=%p st_value=%08lx\n", + (long)addr, (long)(*(uint32_t*)addr), sym, (long)sym->st_value); + + *(uint32_t*)addr += sym->st_value; + } + break; + + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + { + uint32_t S; + uint32_t J1; + uint32_t J2; + + /* Thumb BL and B.W instructions. Encoding: + * + * upper_insn: + * + * 1 1 1 1 1 1 + * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +----------+---+-------------------------------+--------------+ + * |1 1 1 |OP1| OP2 | | 32-Bit Instructions + * +----------+---+--+-----+----------------------+--------------+ + * |1 1 1 | 1 0| S | imm10 | BL Instruction + * +----------+------+-----+-------------------------------------+ + * + * lower_insn: + * + * 1 1 1 1 1 1 + * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +---+---------------------------------------------------------+ + * |OP | | 32-Bit Instructions + * +---+--+---+---+---+------------------------------------------+ + * |1 1 |J1 | 1 |J2 | imm11 | BL Instruction + * +------+---+---+---+------------------------------------------+ + * + * The branch target is encoded in these bits: + * + * S = upper_insn[10] + * imm10 = upper_insn[0:9] + * imm11 = lower_insn[0:10] + * J1 = lower_insn[13] + * J2 = lower_insn[11] + */ + + upper_insn = (uint32_t)(*(uint16_t*)addr); + lower_insn = (uint32_t)(*(uint16_t*)(addr + 2)); + + bvdbg("Performing THM_JUMP24 [%d] link at addr=%08lx [%04x %04x] to sym=%p st_value=%08lx\n", + ELF32_R_TYPE(rel->r_info), (long)addr, (int)upper_insn, (int)lower_insn, + sym, (long)sym->st_value); + + /* Extract the 25-bit offset from the 32-bit instruction: + * + * offset[24] = S + * offset[23] = ~(J1 ^ S) + * offset[22] = ~(J2 ^ S)] + * offset[12:21] = imm10 + * offset[1:11] = imm11 + * offset[0] = 0 + */ + + S = (upper_insn >> 10) & 1; + J1 = (lower_insn >> 13) & 1; + J2 = (lower_insn >> 11) & 1; + + offset = (S << 24) | /* S - > offset[24] */ + ((~(J1 ^ S) & 1) << 23) | /* J1 -> offset[23] */ + ((~(J2 ^ S) & 1) << 22) | /* J2 -> offset[22] */ + ((upper_insn & 0x03ff) << 12) | /* imm10 -> offset[12:21] */ + ((lower_insn & 0x07ff) << 1); /* imm11 -> offset[1:11] */ + /* 0 -> offset[0] */ + + /* Sign extend */ + + if (offset & 0x01000000) + { + offset -= 0x02000000; + } + + /* And perform the relocation */ + + bvdbg(" S=%d J1=%d J2=%d offset=%08lx branch target=%08lx\n", + S, J1, J2, (long)offset, offset + sym->st_value - addr); + + offset += sym->st_value - addr; + + /* Is this a function symbol? If so, then the branch target must be + * an odd Thumb address + */ + + if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC && (offset & 1) == 0) + { + bdbg(" ERROR: JUMP24 [%d] requires odd offset, offset=%08lx\n", + ELF32_R_TYPE(rel->r_info), offset); + + return -EINVAL; + } + + /* Check the range of the offset */ + + if (offset <= (int32_t)0xff000000 || offset >= (int32_t)0x01000000) + { + bdbg(" ERROR: JUMP24 [%d] relocation out of range, branch taget=%08lx\n", + ELF32_R_TYPE(rel->r_info), offset); + + return -EINVAL; + } + + /* Now, reconstruct the 32-bit instruction using the new, relocated + * branch target. + */ + + S = (offset >> 24) & 1; + J1 = S ^ (~(offset >> 23) & 1); + J2 = S ^ (~(offset >> 22) & 1); + + upper_insn = ((upper_insn & 0xf800) | (S << 10) | ((offset >> 12) & 0x03ff)); + *(uint16_t*)addr = (uint16_t)upper_insn; + + lower_insn = ((lower_insn & 0xd000) | (J1 << 13) | (J2 << 11) | ((offset >> 1) & 0x07ff)); + *(uint16_t*)(addr + 2) = (uint16_t)lower_insn; + + bvdbg(" S=%d J1=%d J2=%d insn [%04x %04x]\n", + S, J1, J2, (int)upper_insn, (int)lower_insn); + } + break; + + case R_ARM_V4BX: + { + bvdbg("Performing V4BX link at addr=%08lx [%08lx]\n", + (long)addr, (long)(*(uint32_t*)addr)); + + /* Preserve only Rm and the condition code */ + + *(uint32_t*)addr &= 0xf000000f; + + /* Change instruction to 'mov pc, Rm' */ + + *(uint32_t*)addr |= 0x01a0f000; + } + break; + + case R_ARM_PREL31: + { + bvdbg("Performing PREL31 link at addr=%08lx [%08lx] to sym=%p st_value=%08lx\n", + (long)addr, (long)(*(uint32_t*)addr), sym, (long)sym->st_value); + + offset = *(uint32_t*)addr + sym->st_value - addr; + *(uint32_t*)addr = offset & 0x7fffffff; + } + break; + + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVT_ABS: + { + bvdbg("Performing MOVx_ABS [%d] link at addr=%08lx [%08lx] to sym=%p st_value=%08lx\n", + ELF32_R_TYPE(rel->r_info), (long)addr, (long)(*(uint32_t*)addr), + sym, (long)sym->st_value); + + offset = *(uint32_t*)addr; + offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); + offset = (offset ^ 0x8000) - 0x8000; + + offset += sym->st_value; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) + { + offset >>= 16; + } + + *(uint32_t*)addr &= 0xfff0f000; + *(uint32_t*)addr |= ((offset & 0xf000) << 4) | (offset & 0x0fff); + } + break; + + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVT_ABS: + { + /* Thumb BL and B.W instructions. Encoding: + * + * upper_insn: + * + * 1 1 1 1 1 1 + * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +----------+---+-------------------------------+--------------+ + * |1 1 1 |OP1| OP2 | | 32-Bit Instructions + * +----------+---+--+-----+----------------------+--------------+ + * |1 1 1 | 1 0| i | 1 0 1 1 0 0 | imm4 | MOVT Instruction + * +----------+------+-----+----------------------+--------------+ + * + * lower_insn: + * + * 1 1 1 1 1 1 + * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +---+---------------------------------------------------------+ + * |OP | | 32-Bit Instructions + * +---+----------+--------------+-------------------------------+ + * |0 | imm3 | Rd | imm8 | MOVT Instruction + * +---+----------+--------------+-------------------------------+ + * + * The 16-bit immediate value is encoded in these bits: + * + * i = imm16[11] = upper_insn[10] + * imm4 = imm16[12:15] = upper_insn[3:0] + * imm3 = imm16[8:10] = lower_insn[14:12] + * imm8 = imm16[0:7] = lower_insn[7:0] + */ + + upper_insn = (uint32_t)(*(uint16_t*)addr); + lower_insn = (uint32_t)(*(uint16_t*)(addr + 2)); + + bvdbg("Performing THM_MOVx [%d] link at addr=%08lx [%04x %04x] to sym=%p st_value=%08lx\n", + ELF32_R_TYPE(rel->r_info), (long)addr, (int)upper_insn, (int)lower_insn, + sym, (long)sym->st_value); + + /* Extract the 16-bit offset from the 32-bit instruction */ + + offset = ((upper_insn & 0x000f) << 12) | /* imm4 -> imm16[8:10] */ + ((upper_insn & 0x0400) << 1) | /* i -> imm16[11] */ + ((lower_insn & 0x7000) >> 4) | /* imm3 -> imm16[8:10] */ + (lower_insn & 0x00ff); /* imm8 -> imm16[0:7] */ + + /* Sign extend */ + + offset = (offset ^ 0x8000) - 0x8000; + + /* And perform the relocation */ + + bvdbg(" offset=%08lx branch target=%08lx\n", + (long)offset, offset + sym->st_value); + + offset += sym->st_value; + + /* Update the immediate value in the instruction. For MOVW we want the bottom + * 16-bits; for MOVT we want the top 16-bits. + */ + + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) + { + offset >>= 16; + } + + upper_insn = ((upper_insn & 0xfbf0) | ((offset & 0xf000) >> 12) | ((offset & 0x0800) >> 1)); + *(uint16_t*)addr = (uint16_t)upper_insn; + + lower_insn = ((lower_insn & 0x8f00) | ((offset & 0x0700) << 4) | (offset & 0x00ff)); + *(uint16_t*)(addr + 2) = (uint16_t)lower_insn; + + bvdbg(" insn [%04x %04x]\n", + (int)upper_insn, (int)lower_insn); + } + break; + + default: + bdbg("Unsupported relocation: %d\n", ELF32_R_TYPE(rel->r_info)); + return -EINVAL; + } + + return OK; +} + +int arch_relocateadd(FAR const Elf32_Rela *rel, FAR const Elf32_Sym *sym, + uintptr_t addr) +{ + bdbg("RELA relocation not supported\n"); + return -ENOSYS; +} + diff --git a/nuttx/arch/arm/src/armv7-m/up_exception.S b/nuttx/arch/arm/src/armv7-m/up_exception.S index 31d8dbb0c..c9f216027 100644 --- a/nuttx/arch/arm/src/armv7-m/up_exception.S +++ b/nuttx/arch/arm/src/armv7-m/up_exception.S @@ -134,9 +134,9 @@ exception_common: #if CONFIG_ARCH_INTERRUPTSTACK > 3 ldr sp, =g_intstackbase - push r1 /* Save the MSP on the interrupt stack */ + push {r1} /* Save the MSP on the interrupt stack */ bl up_doirq /* R0=IRQ, R1=register save area on stack */ - pop r1 /* Recover R1=main stack pointer */ + pop {r1} /* Recover R1=main stack pointer */ #else msr msp, r1 /* We are using the main stack pointer */ bl up_doirq /* R0=IRQ, R1=register save area on stack */ diff --git a/nuttx/arch/arm/src/armv7-m/up_hardfault.c b/nuttx/arch/arm/src/armv7-m/up_hardfault.c index cb3ce9e8a..c30015ad2 100644 --- a/nuttx/arch/arm/src/armv7-m/up_hardfault.c +++ b/nuttx/arch/arm/src/armv7-m/up_hardfault.c @@ -57,9 +57,7 @@ /* Debug output from this file may interfere with context switching! */ -#undef DEBUG_HARDFAULTS /* Define to debug hard faults */ - -#ifdef DEBUG_HARDFAULTS +#ifdef CONFIG_DEBUG_HARDFAULT # define hfdbg(format, arg...) lldbg(format, ##arg) #else # define hfdbg(x...) diff --git a/nuttx/arch/arm/src/armv7-m/up_memcpy.S b/nuttx/arch/arm/src/armv7-m/up_memcpy.S new file mode 100644 index 000000000..a154cab61 --- /dev/null +++ b/nuttx/arch/arm/src/armv7-m/up_memcpy.S @@ -0,0 +1,416 @@ +/************************************************************************************ + * nuttx/arch/arm/src/armv7-m/up_memcpy.S + * + * armv7m-optimised memcpy, contributed by Mike Smith. Apparently in the public + * domain and is re-released here under the modified BSD license: + * + * Obtained via a posting on the Stellaris forum: + * http://e2e.ti.com/support/microcontrollers/\ + * stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx + * + * Posted by rocksoft on Jul 24, 2008 10:19 AM + * + * Hi, + * + * I recently finished a "memcpy" replacement and thought it might be useful for + * others... + * + * I've put some instructions and the code here: + * + * http://www.rock-software.net/downloads/memcpy/ + * + * Hope it works for you as well as it did for me. + * + * Liam. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name NuttX nor the names of its contributors may be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ************************************************************************************/ + +/************************************************************************************ + * Global Symbols + ************************************************************************************/ + + .global memcpy + + .syntax unified + .thumb + .cpu cortex-m3 + .file "up_memcpy.S" + +/************************************************************************************ + * .text + ************************************************************************************/ + + .text + +/************************************************************************************ + * Private Constant Data + ************************************************************************************/ + +/* We have 16 possible alignment combinations of src and dst, this jump table + * directs the copy operation + * + * Bits: Src=00, Dst=00 - Long to Long copy + * Bits: Src=00, Dst=01 - Long to Byte before half word + * Bits: Src=00, Dst=10 - Long to Half word + * Bits: Src=00, Dst=11 - Long to Byte before long word + * Bits: Src=01, Dst=00 - Byte before half word to long + * Bits: Src=01, Dst=01 - Byte before half word to byte before half word - + * Same alignment + * Bits: Src=01, Dst=10 - Byte before half word to half word + * Bits: Src=01, Dst=11 - Byte before half word to byte before long word + * Bits: Src=10, Dst=00 - Half word to long word + * Bits: Src=10, Dst=01 - Half word to byte before half word + * Bits: Src=10, Dst=10 - Half word to half word - Same Alignment + * Bits: Src=10, Dst=11 - Half word to byte before long word + * Bits: Src=11, Dst=00 - Byte before long word to long word + * Bits: Src=11, Dst=01 - Byte before long word to byte before half word + * Bits: Src=11, Dst=11 - Byte before long word to half word + * Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - + * Same alignment + */ + +MEM_DataCopyTable: + .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 1 + .byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 1 + + .align 2 + +MEM_LongCopyTable: + .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 /* 0 bytes left */ + .byte 0 /* 4 bytes left */ + .byte (1 * 10) >> 1 /* 8 bytes left */ + .byte (2 * 10) >> 1 /* 12 bytes left */ + .byte (3 * 10) >> 1 /* 16 bytes left */ + .byte (4 * 10) >> 1 /* 20 bytes left */ + .byte (5 * 10) >> 1 /* 24 bytes left */ + .byte (6 * 10) >> 1 /* 28 bytes left */ + .byte (7 * 10) >> 1 /* 32 bytes left */ + .byte (8 * 10) >> 1 /* 36 bytes left */ + + .align 2 + +/************************************************************************************ + * Public Functions + ************************************************************************************/ +/************************************************************************************ + * Name: memcpy + * + * Description: + * Optimised "general" copy routine + * + * Input Parameters: + * r0 = destination, r1 = source, r2 = length + * + ************************************************************************************/ + + .thumb_func +memcpy: + push {r14} + + /* This allows the inner workings to "assume" a minimum amount of bytes */ + /* Quickly check for very short copies */ + + cmp r2, #4 + blt MEM_DataCopyBytes + + and r14, r0, #3 /* Get destination alignment bits */ + bfi r14, r1, #2, #2 /* Get source alignment bits */ + ldr r3, =MEM_DataCopyTable /* Jump table base */ + tbb [r3, r14] /* Perform jump on src/dst alignment bits */ +MEM_DataCopyJump: + + .align 4 + +/* Bits: Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment + * 3 bytes to read for long word aligning + */ + +MEM_DataCopy5: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=10, Dst=10 - Half word to half word - Same Alignment + * 2 bytes to read for long word aligning + */ + +MEM_DataCopy10: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment + * 1 bytes to read for long word aligning + */ + +MEM_DataCopy15: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=00, Dst=00 - Long to Long copy */ + +MEM_DataCopy0: + /* Save regs that may be used by memcpy */ + + push {r4-r12} + + /* Check for short word-aligned copy */ + + cmp r2, #0x28 + blt MEM_DataCopy0_2 + + /* Bulk copy loop */ + +MEM_DataCopy0_1: + ldmia r1!, {r3-r12} + stmia r0!, {r3-r12} + sub r2, r2, #0x28 + cmp r2, #0x28 + bge MEM_DataCopy0_1 + + /* Copy remaining long words */ + +MEM_DataCopy0_2: + /* Copy remaining long words */ + + ldr r14, =MEM_LongCopyTable + lsr r11, r2, #0x02 + tbb [r14, r11] + + /* longword copy branch table anchor */ + +MEM_LongCopyJump: + ldr.w r3, [r1], #0x04 /* 4 bytes remain */ + str.w r3, [r0], #0x04 + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r4} /* 8 bytes remain */ + stmia.w r0!, {r3-r4} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r5} /* 12 bytes remain */ + stmia.w r0!, {r3-r5} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r6} /* 16 bytes remain */ + stmia.w r0!, {r3-r6} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r7} /* 20 bytes remain */ + stmia.w r0!, {r3-r7} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r8} /* 24 bytes remain */ + stmia.w r0!, {r3-r8} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r9} /* 28 bytes remain */ + stmia.w r0!, {r3-r9} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r10} /* 32 bytes remain */ + stmia.w r0!, {r3-r10} + b MEM_LongCopyEnd + ldmia.w r1!, {r3-r11} /* 36 bytes remain */ + stmia.w r0!, {r3-r11} + +MEM_LongCopyEnd: + pop {r4-r12} + and r2, r2, #0x03 /* All the longs have been copied */ + + /* Deal with up to 3 remaining bytes */ + +MEM_DataCopyBytes: + /* Deal with up to 3 remaining bytes */ + + cmp r2, #0x00 + it eq + popeq {pc} + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + subs r2, r2, #0x01 + it eq + popeq {pc} + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + subs r2, r2, #0x01 + it eq + popeq {pc} + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + pop {pc} + + .align 4 + +/* Bits: Src=01, Dst=11 - Byte before half word to byte before long word + * 3 bytes to read for long word aligning the source + */ + +MEM_DataCopy7: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=10, Dst=00 - Half word to long word + * 2 bytes to read for long word aligning the source + */ + +MEM_DataCopy8: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=11, Dst=01 - Byte before long word to byte before half word + * 1 byte to read for long word aligning the source + */ + +MEM_DataCopy13: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=00, Dst=10 - Long to Half word */ + +MEM_DataCopy2: + cmp r2, #0x28 + blt MEM_DataCopy2_1 + + /* Save regs */ + + push {r4-r12} + + /* Bulk copy loop */ + +MEM_DataCopy2_2: + ldmia r1!, {r3-r12} + + strh r3, [r0], #0x02 + + lsr r3, r3, #0x10 + bfi r3, r4, #0x10, #0x10 + lsr r4, r4, #0x10 + bfi r4, r5, #0x10, #0x10 + lsr r5, r5, #0x10 + bfi r5, r6, #0x10, #0x10 + lsr r6, r6, #0x10 + bfi r6, r7, #0x10, #0x10 + lsr r7, r7, #0x10 + bfi r7, r8, #0x10, #0x10 + lsr r8, r8, #0x10 + bfi r8, r9, #0x10, #0x10 + lsr r9, r9, #0x10 + bfi r9, r10, #0x10, #0x10 + lsr r10, r10, #0x10 + bfi r10, r11, #0x10, #0x10 + lsr r11, r11, #0x10 + bfi r11, r12, #0x10, #0x10 + stmia r0!, {r3-r11} + lsr r12, r12, #0x10 + strh r12, [r0], #0x02 + + sub r2, r2, #0x28 + cmp r2, #0x28 + bge MEM_DataCopy2_2 + pop {r4-r12} + +MEM_DataCopy2_1: /* Read longs and write 2 x half words */ + cmp r2, #4 + blt MEM_DataCopyBytes + ldr r3, [r1], #0x04 + strh r3, [r0], #0x02 + lsr r3, r3, #0x10 + strh r3, [r0], #0x02 + sub r2, r2, #0x04 + b MEM_DataCopy2 + +/* Bits: Src=01, Dst=00 - Byte before half word to long + * Bits: Src=01, Dst=10 - Byte before half word to half word + * 3 bytes to read for long word aligning the source + */ + +MEM_DataCopy4: +MEM_DataCopy6: + /* Read B and write B */ + + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=10, Dst=01 - Half word to byte before half word + * Bits: Src=10, Dst=11 - Half word to byte before long word + * 2 bytes to read for long word aligning the source + */ + +MEM_DataCopy9: +MEM_DataCopy11: + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=11, Dst=00 -chm Byte before long word to long word + * Bits: Src=11, Dst=11 - Byte before long word to half word + * 1 byte to read for long word aligning the source + */ + +MEM_DataCopy12: +MEM_DataCopy14: + /* Read B and write B */ + + ldrb r3, [r1], #0x01 + strb r3, [r0], #0x01 + sub r2, r2, #0x01 + +/* Bits: Src=00, Dst=01 - Long to Byte before half word + * Bits: Src=00, Dst=11 - Long to Byte before long word + */ + +MEM_DataCopy1: /* Read longs, write B->H->B */ +MEM_DataCopy3: + cmp r2, #4 + blt MEM_DataCopyBytes + ldr r3, [r1], #0x04 + strb r3, [r0], #0x01 + lsr r3, r3, #0x08 + strh r3, [r0], #0x02 + lsr r3, r3, #0x10 + strb r3, [r0], #0x01 + sub r2, r2, #0x04 + b MEM_DataCopy3 + + .size memcpy, .-memcpy + .end |