7 files changed, 1186 insertions, 356 deletions
diff --git a/nuttx/arch/arm/src/armv7-m/Kconfig b/nuttx/arch/arm/src/armv7-m/Kconfig
new file mode 100644
index 000000000..dc5aa3915
--- /dev/null
+++ b/nuttx/arch/arm/src/armv7-m/Kconfig
@@ -0,0 +1,51 @@
+#
+# For a description of the syntax of this configuration file,
+# see misc/tools/kconfig-language.txt.
+#
+
+comment "ARMV7M Configuration Options"
+
+choice
+	prompt "Toolchain Selection"
+	default ARMV7M_TOOLCHAIN_CODESOURCERYW if HOST_WINDOWS
+	default ARMV7M_TOOLCHAIN_GNU_EABI if !HOST_WINDOWS
+
+config ARMV7M_TOOLCHAIN_ATOLLIC
+	bool "Atollic Lite/Pro for Windows"
+	depends on HOST_WINDOWS
+
+config ARMV7M_TOOLCHAIN_BUILDROOT
+	bool "Buildroot (Cygwin or Linux)"
+	depends on !WINDOWS_NATIVE
+
+config ARMV7M_TOOLCHAIN_CODEREDL
+	bool "CodeRed for Linux"
+	depends on HOST_LINUX
+
+config ARMV7M_TOOLCHAIN_CODEREDW
+	bool "CodeRed for Windows"
+	depends on HOST_WINDOWS
+
+config ARMV7M_TOOLCHAIN_CODESOURCERYL
+	bool "CodeSourcery GNU toolchain under Linux"
+	depends on HOST_LINUX
+
+config ARMV7M_TOOLCHAIN_CODESOURCERYW
+	bool "CodeSourcery GNU toolchain under Windows"
+	depends on HOST_WINDOWS
+
+config ARMV7M_TOOLCHAIN_DEVKITARM
+	bool "devkitARM GNU toolchain"
+	depends on HOST_WINDOWS
+
+config ARMV7M_TOOLCHAIN_GNU_EABI
+	bool "Generic GNU EABI toolchain"
+	---help---
+		This option should work for any modern GNU toolchain (GCC 4.5 or newer)
+		configured for arm-none-eabi.
+
+config ARMV7M_TOOLCHAIN_RAISONANCE
+	bool "STMicro Raisonance for Windows"
+	depends on HOST_WINDOWS
+
+endchoice
diff --git a/nuttx/arch/arm/src/armv7-m/Toolchain.defs b/nuttx/arch/arm/src/armv7-m/Toolchain.defs
new file mode 100644
index 000000000..e214ce8bd
--- /dev/null
+++ b/nuttx/arch/arm/src/armv7-m/Toolchain.defs
@@ -0,0 +1,266 @@
+############################################################################
+# arch/arm/src/armv7-m/Toolchain.defs
+#
+#   Copyright (C) 2012 Gregory Nutt. All rights reserved.
+#   Author: Gregory Nutt <gnutt@nuttx.org>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the
+#    distribution.
+# 3. Neither the name NuttX nor the names of its contributors may be
+#    used to endorse or promote products derived from this software
+#    without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+############################################################################
+
+# Setup for the selected toolchain
+
+#
+# Handle old-style chip-specific toolchain names in the absence of
+# a new-style toolchain specification, force the selection of a single
+# toolchain and allow the selected toolchain to be overridden by a
+# command-line selection.
+#
+
+ifeq ($(filter y, \
+      $(CONFIG_LPC43_ATOLLIC_LITE) \
+      $(CONFIG_STM32_ATOLLIC_LITE) \
+      $(CONFIG_LPC43_ATOLLIC_PRO) \
+      $(CONFIG_STM32_ATOLLIC_PRO) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_ATOLLIC) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= ATOLLIC
+endif
+ifeq ($(filter y, \
+      $(CONFIG_KINETIS_BUILDROOT) \
+      $(CONFIG_LM3S_BUILDROOT) \
+      $(CONFIG_LPC17_BUILDROOT) \
+      $(CONFIG_LPC43_BUILDROOT) \
+      $(CONFIG_SAM3U_BUILDROOT) \
+      $(CONFIG_STM32_BUILDROOT) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_BUILDROOT) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= BUILDROOT
+endif
+ifeq ($(filter y, \
+      $(CONFIG_LPC17_CODEREDL) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_CODEREDL) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= CODEREDL
+endif
+ifeq ($(filter y, \
+      $(CONFIG_LPC17_CODEREDW) \
+      $(CONFIG_LPC43_CODEREDW) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_CODEREDW) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= CODEREDW
+endif
+ifeq ($(filter y, \
+      $(CONFIG_KINETIS_CODESOURCERYL) \
+      $(CONFIG_LM3S_CODESOURCERYL) \
+      $(CONFIG_LPC17_CODESOURCERYL) \
+      $(CONFIG_LPC43_CODESOURCERYL) \
+      $(CONFIG_SAM3U_CODESOURCERYL) \
+      $(CONFIG_STM32_CODESOURCERYL) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_CODESOURCERYL) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= CODESOURCERYL
+endif
+ifeq ($(filter y, \
+      $(CONFIG_KINETIS_CODESOURCERYW) \
+      $(CONFIG_LM3S_CODESOURCERYW) \
+      $(CONFIG_LPC17_CODESOURCERYW) \
+      $(CONFIG_LPC43_CODESOURCERYW) \
+      $(CONFIG_SAM3U_CODESOURCERYW) \
+      $(CONFIG_STM32_CODESOURCERYW) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_CODESOURCERYW) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= CODESOURCERYW
+endif
+ifeq ($(filter y, \
+      $(CONFIG_KINETIS_DEVKITARM) \
+      $(CONFIG_LM3S_DEVKITARM) \
+      $(CONFIG_LPC17_DEVKITARM) \
+      $(CONFIG_LPC43_DEVKITARM) \
+      $(CONFIG_SAM3U_DEVKITARM) \
+      $(CONFIG_STM32_DEVKITARM) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_DEVKITARM) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= DEVKITARM
+endif
+ifeq ($(filter y, \
+      $(CONFIG_ARMV7M_TOOLCHAIN_GNU_EABI) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= GNU_EABI
+endif
+ifeq ($(filter y, \
+      $(CONFIG_STM32_RAISONANCE) \
+      $(CONFIG_ARMV7M_TOOLCHAIN_RAISONANCE) \
+    ),y)
+  CONFIG_ARMV7M_TOOLCHAIN ?= RAISONANCE
+endif
+
+#
+# Supported toolchains
+#
+# TODO - It's likely that all of these toolchains now support the
+#        CortexM4.  Since they are all GCC-based, we could almost
+#        certainly simplify this further.
+#
+# Each toolchain definition should set:
+#
+#  CROSSDEV         The GNU toolchain triple (command prefix)
+#  ARCROSSDEV       If required, an alternative prefix used when 
+#                   invoking ar and nm.
+#  ARCHCPUFLAGS     CPU-specific flags selecting the instruction set
+#                   FPU options, etc.
+#  MAXOPTIMIZATION  The maximum optimization level that results in
+#                   reliable code generation.
+#
+
+# Atollic toolchain under Windows
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),ATOLLIC)
+  CROSSDEV = arm-atollic-eabi-
+  ARCROSSDEV = arm-atollic-eabi-
+  ifneq ($(CONFIG_WINDOWS_NATIVE),y)
+    WINTOOL = y
+  endif
+  ifeq ($(CONFIG_ARCH_CORTEXM4),y)
+    ifeq ($(CONFIG_ARCH_FPU),y)
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard
+    else
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft
+    endif
+  else ifeq ($(CONFIG_ARCH_CORTEXM3),y)
+    ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+  endif
+endif
+
+# NuttX buildroot under Linux or Cygwin
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),BUILDROOT)
+  # OABI
+  # CROSSDEV = arm-nuttx-elf-
+  # ARCROSSDEV = arm-nuttx-elf-
+  # ARCHCPUFLAGS = -mtune=cortex-m3 -march=armv7-m -mfloat-abi=soft
+  # EABI
+  CROSSDEV = arm-nuttx-eabi-
+  ARCROSSDEV = arm-nuttx-eabi-
+  ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+  MAXOPTIMIZATION = -Os
+endif
+
+# Code Red RedSuite under Linux
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODEREDL)
+  CROSSDEV = arm-none-eabi-
+  ARCROSSDEV = arm-none-eabi-
+  ifeq ($(CONFIG_ARCH_CORTEXM4),y)
+    ifeq ($(CONFIG_ARCH_FPU),y)
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard
+    else
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft
+    endif
+  else ifeq ($(CONFIG_ARCH_CORTEXM3),y)
+    ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+  endif
+endif
+
+# Code Red RedSuite under Windows
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODEREDW)
+  CROSSDEV = arm-none-eabi-
+  ARCROSSDEV = arm-none-eabi-
+  ifneq ($(CONFIG_WINDOWS_NATIVE),y)
+    WINTOOL = y
+  endif
+  ifeq ($(CONFIG_ARCH_CORTEXM4),y)
+    ifeq ($(CONFIG_ARCH_FPU),y)
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard
+    else
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft
+    endif
+  else ifeq ($(CONFIG_ARCH_CORTEXM3),y)
+    ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+  endif
+endif
+
+# CodeSourcery under Linux
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODESOURCERYL)
+  CROSSDEV = arm-none-eabi-
+  ARCROSSDEV = arm-none-eabi-
+  ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+  MAXOPTIMIZATION = -O2
+endif
+
+# CodeSourcery under Windows
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),CODESOURCERYW)
+  CROSSDEV = arm-none-eabi-
+  ARCROSSDEV = arm-none-eabi-
+  ifneq ($(CONFIG_WINDOWS_NATIVE),y)
+    WINTOOL = y
+  endif
+  ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+endif
+
+# devkitARM under Windows
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),DEVKITARM)
+  CROSSDEV = arm-eabi-
+  ARCROSSDEV = arm-eabi-
+  ifneq ($(CONFIG_WINDOWS_NATIVE),y)
+    WINTOOL = y
+  endif
+  ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+endif
+
+# Generic GNU EABI toolchain on OS X, Linux or any typical Posix system
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),GNU_EABI)
+  CROSSDEV = arm-none-eabi-
+  ARCROSSDEV = arm-none-eabi-
+  MAXOPTIMIZATION = -O3
+  ifeq ($(CONFIG_ARCH_CORTEXM4),y)
+    ifeq ($(CONFIG_ARCH_FPU),y)
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard
+    else
+      ARCHCPUFLAGS = -mcpu=cortex-m4 -mthumb -march=armv7e-m -mfloat-abi=soft
+    endif
+  else ifeq ($(CONFIG_ARCH_CORTEXM3),y)
+    ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+  endif
+endif  
+
+# Raisonance RIDE7 under Windows
+
+ifeq ($(CONFIG_ARMV7M_TOOLCHAIN),RAISONANCE)
+  CROSSDEV = arm-none-eabi-
+  ARCROSSDEV = arm-none-eabi-
+  ifneq ($(CONFIG_WINDOWS_NATIVE),y)
+    WINTOOL = y
+  endif
+  ARCHCPUFLAGS = -mcpu=cortex-m3 -mthumb -mfloat-abi=soft
+endif
diff --git a/nuttx/arch/arm/src/armv7-m/memcpy.S b/nuttx/arch/arm/src/armv7-m/memcpy.S
deleted file mode 100644
index c6d3ff649..000000000
--- a/nuttx/arch/arm/src/armv7-m/memcpy.S
+++ /dev/null
@@ -1,351 +0,0 @@
-@
-@ armv7m-optimised memcpy, apparently in the public domain
-@
-@ Obtained via a posting on the Stellaris forum:
-@	http://e2e.ti.com/support/microcontrollers/stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx
-@
-@ Posted by rocksoft on Jul 24, 2008 10:19 AM
-@ 
-@ Hi, 
-@ 
-@ I recently finished a "memcpy" replacement and thought it might be useful for others...
-@ 
-@ I've put some instructions and the code here:
-@ 
-@ http://www.rock-software.net/downloads/memcpy/
-@ 
-@ Hope it works for you as well as it did for me.
-@ 
-@ Liam.
-@ @	
-@ ---------------------------------------------------------------------------- 
-
-.syntax unified
-
-.thumb
-
-.cpu cortex-m3
-
-@ ---------------------------------------------------------------------------- 
-
- .global memcpy
- 
-
-@ ---------------------------------------------------------------------------- 
-@ Optimised "general" copy routine
-
-.text
-
-@ We have 16 possible alignment combinations of src and dst, this jump table directs the copy operation
-@ Bits:  Src=00, Dst=00 - Long to Long copy
-@ Bits:  Src=00, Dst=01 - Long to Byte before half word
-@ Bits:  Src=00, Dst=10 - Long to Half word
-@ Bits:  Src=00, Dst=11 - Long to Byte before long word
-@ Bits:  Src=01, Dst=00 - Byte before half word to long
-@ Bits:  Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
-@ Bits:  Src=01, Dst=10 - Byte before half word to half word
-@ Bits:  Src=01, Dst=11 - Byte before half word to byte before long word
-@ Bits:  Src=10, Dst=00 - Half word to long word
-@ Bits:  Src=10, Dst=01 - Half word to byte before half word
-@ Bits:  Src=10, Dst=10 - Half word to half word - Same Alignment
-@ Bits:  Src=10, Dst=11 - Half word to byte before long word
-@ Bits:  Src=11, Dst=00 - Byte before long word to long word
-@ Bits:  Src=11, Dst=01 - Byte before long word to byte before half word
-@ Bits:  Src=11, Dst=11 - Byte before long word to half word
-@ Bits:  Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment
-
-MEM_DataCopyTable:
-  .byte (MEM_DataCopy0 - MEM_DataCopyJump) >> 1      
-  .byte (MEM_DataCopy1 - MEM_DataCopyJump) >> 1      
-  .byte (MEM_DataCopy2 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy3 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy4 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy5 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy6 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy7 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy8 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy9 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy10 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy11 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy12 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy13 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy14 - MEM_DataCopyJump) >> 1
-  .byte (MEM_DataCopy15 - MEM_DataCopyJump) >> 1
-
-  .align 2
-
-@ ---------------------------------------------------------------------------- 
-
-//#define 10 10
-
-MEM_LongCopyTable:
-  .byte (MEM_LongCopyEnd - MEM_LongCopyJump) >> 1 @ 0 bytes left
-  .byte 0                    @ 4 bytes left  
-  .byte (1 * 10) >> 1   @ 8 bytes left
-  .byte (2 * 10) >> 1   @ 12 bytes left
-  .byte (3 * 10) >> 1   @ 16 bytes left
-  .byte (4 * 10) >> 1   @ 20 bytes left
-  .byte (5 * 10) >> 1   @ 24 bytes left  
-  .byte (6 * 10) >> 1   @ 28 bytes left
-  .byte (7 * 10) >> 1   @ 32 bytes left
-  .byte (8 * 10) >> 1   @ 36 bytes left
-
-  .align 2
-
-@ ---------------------------------------------------------------------------- 
-@ r0 = destination, r1 = source, r2 = length
-
-.thumb_func
-
-memcpy:
-  push {r14}
-  
-  @ This allows the inner workings to "assume" a minimum amount of bytes
-  cmp r2, #4
-  blt MEM_DataCopyBytes
-    
-  and r14, r0, #3             @ Get destination alignment bits
-  bfi r14, r1, #2, #2         @ Get source alignment bits
-  ldr r3, =MEM_DataCopyTable  @ Jump table base  
-  tbb [r3, r14]               @ Perform jump on src/dst alignment bits
-MEM_DataCopyJump:
-
- .align 4
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
-@ 3 bytes to read for long word aligning
-
-MEM_DataCopy5:
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=10, Dst=10 - Half word to half word - Same Alignment
-@ 2 bytes to read for long word aligning
-
-MEM_DataCopy10:
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment
-@ 1 bytes to read for long word aligning
-
-MEM_DataCopy15:
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=00, Dst=00 - Long to Long copy
-
-MEM_DataCopy0:
-  @ Save regs
-  push {r4-r12}
-
-  cmp r2, #0x28
-  blt MEM_DataCopy0_2
-  
-MEM_DataCopy0_1:
-  ldmia r1!, {r3-r12}
-  stmia r0!, {r3-r12}
-  sub r2, r2, #0x28    
-  cmp r2, #0x28
-  bge MEM_DataCopy0_1
-
-MEM_DataCopy0_2:
-  @ Copy remaining long words
-  ldr r14, =MEM_LongCopyTable
-  lsr r11, r2, #0x02
-  tbb [r14, r11]
-
-MEM_LongCopyJump:
-  ldr.w r3, [r1], #0x04   @ 4 bytes remain 
-  str.w r3, [r0], #0x04
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r4}    @ 8 bytes remain 
-  stmia.w r0!, {r3-r4}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r5}    @ 12 bytes remain
-  stmia.w r0!, {r3-r5}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r6}    @ 16 bytes remain
-  stmia.w r0!, {r3-r6}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r7}    @ 20 bytes remain
-  stmia.w r0!, {r3-r7}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r8}    @ 24 bytes remain
-  stmia.w r0!, {r3-r8}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r9}    @ 28 bytes remain
-  stmia.w r0!, {r3-r9}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r10}   @ 32 bytes remain
-  stmia.w r0!, {r3-r10}
-  b MEM_LongCopyEnd
-  ldmia.w r1!, {r3-r11}   @ 36 bytes remain
-  stmia.w r0!, {r3-r11}
-  
-MEM_LongCopyEnd:  
-  pop {r4-r12}
-  and r2, r2, #0x03    @ All the longs have been copied
-  
-@ ----------------------------------------------------------------------------   
-
-MEM_DataCopyBytes:
-  @ Deal with up to 3 remaining bytes
-  cmp r2, #0x00
-  it eq
-  popeq {pc}
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  subs r2, r2, #0x01
-  it eq
-  popeq {pc}
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  subs r2, r2, #0x01
-  it eq
-  popeq {pc}
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  pop {pc}
-
- .align 4
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=01, Dst=11 - Byte before half word to byte before long word
-@ 3 bytes to read for long word aligning the source
-
-MEM_DataCopy7:
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-  
-@ ----------------------------------------------------------------------------   
-@ Bits:  Src=10, Dst=00 - Half word to long word
-@ 2 bytes to read for long word aligning the source
-
-MEM_DataCopy8: 
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=11, Dst=01 - Byte before long word to byte before half word
-@ 1 byte to read for long word aligning the source
-
-MEM_DataCopy13:
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=00, Dst=10 - Long to Half word
-
-MEM_DataCopy2: 
-  cmp r2, #0x28
-  blt MEM_DataCopy2_1
-
-  @ Save regs
-  push {r4-r12}
-MEM_DataCopy2_2:
-  ldmia r1!, {r3-r12}
-
-  strh r3, [r0], #0x02
-
-  lsr r3, r3, #0x10  
-  bfi r3, r4, #0x10, #0x10  
-  lsr r4, r4, #0x10  
-  bfi r4, r5, #0x10, #0x10  
-  lsr r5, r5, #0x10  
-  bfi r5, r6, #0x10, #0x10  
-  lsr r6, r6, #0x10  
-  bfi r6, r7, #0x10, #0x10  
-  lsr r7, r7, #0x10  
-  bfi r7, r8, #0x10, #0x10  
-  lsr r8, r8, #0x10  
-  bfi r8, r9, #0x10, #0x10  
-  lsr r9, r9, #0x10  
-  bfi r9, r10, #0x10, #0x10  
-  lsr r10, r10, #0x10  
-  bfi r10, r11, #0x10, #0x10  
-  lsr r11, r11, #0x10  
-  bfi r11, r12, #0x10, #0x10  
-  stmia r0!, {r3-r11}
-  lsr r12, r12, #0x10  
-  strh r12, [r0], #0x02
-
-  sub r2, r2, #0x28    
-  cmp r2, #0x28
-  bge MEM_DataCopy2_2
-  pop {r4-r12}
-
-MEM_DataCopy2_1: @ Read longs and write 2 x half words
-  cmp r2, #4
-  blt MEM_DataCopyBytes
-  ldr r3, [r1], #0x04
-  strh r3, [r0], #0x02
-  lsr r3, r3, #0x10
-  strh r3, [r0], #0x02
-  sub r2, r2, #0x04
-  b MEM_DataCopy2
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=01, Dst=00 - Byte before half word to long
-@ Bits:  Src=01, Dst=10 - Byte before half word to half word
-@ 3 bytes to read for long word aligning the source
-
-MEM_DataCopy4: 
-MEM_DataCopy6:  
-  @ Read B and write B
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=10, Dst=01 - Half word to byte before half word
-@ Bits:  Src=10, Dst=11 - Half word to byte before long word
-@ 2 bytes to read for long word aligning the source
-
-MEM_DataCopy9:
-MEM_DataCopy11:
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=11, Dst=00 - Byte before long word to long word
-@ Bits:  Src=11, Dst=11 - Byte before long word to half word
-@ 1 byte to read for long word aligning the source
-
-MEM_DataCopy12:
-MEM_DataCopy14:
-  @ Read B and write B
-  ldrb r3, [r1], #0x01
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x01
-
-@ ---------------------------------------------------------------------------- 
-@ Bits:  Src=00, Dst=01 - Long to Byte before half word
-@ Bits:  Src=00, Dst=11 - Long to Byte before long word
-
-MEM_DataCopy1: @ Read longs, write B->H->B
-MEM_DataCopy3: 
-  cmp r2, #4
-  blt MEM_DataCopyBytes
-  ldr r3, [r1], #0x04
-  strb r3, [r0], #0x01
-  lsr r3, r3, #0x08
-  strh r3, [r0], #0x02
-  lsr r3, r3, #0x10
-  strb r3, [r0], #0x01
-  sub r2, r2, #0x04  
-  b MEM_DataCopy3
-
-@ ---------------------------------------------------------------------------- 
-
diff --git a/nuttx/arch/arm/src/armv7-m/up_elf.c b/nuttx/arch/arm/src/armv7-m/up_elf.c
new file mode 100644
index 000000000..b838a6905
--- /dev/null
+++ b/nuttx/arch/arm/src/armv7-m/up_elf.c
@@ -0,0 +1,450 @@
+/****************************************************************************
+ * arch/arm/src/armv7-m/up_elf.c
+ *
+ *   Copyright (C) 2012 Gregory Nutt. All rights reserved.
+ *   Author: Gregory Nutt <gnutt@nuttx.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name NuttX nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <nuttx/config.h>
+
+#include <stdlib.h>
+#include <elf32.h>
+#include <errno.h>
+#include <debug.h>
+
+#include <arch/elf.h>
+#include <nuttx/arch.h>
+#include <nuttx/binfmt/elf.h>
+
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/****************************************************************************
+ * Private Functions
+ ****************************************************************************/
+
+/****************************************************************************
+ * Public Functions
+ ****************************************************************************/
+
+/****************************************************************************
+ * Name: arch_checkarch
+ *
+ * Description:
+ *   Given the ELF header in 'hdr', verify that the ELF file is appropriate
+ *   for the current, configured architecture.  Every architecture that uses
+ *   the ELF loader must provide this function.
+ *
+ * Input Parameters:
+ *   hdr - The ELF header read from the ELF file.
+ *
+ * Returned Value:
+ *   True if the architecture supports this ELF file.
+ *
+ ****************************************************************************/
+
+bool arch_checkarch(FAR const Elf32_Ehdr *ehdr)
+{
+  /* Make sure it's an ARM executable */
+
+  if (ehdr->e_machine != EM_ARM)
+    {
+      bdbg("Not for ARM: e_machine=%04x\n", ehdr->e_machine);
+      return -ENOEXEC;
+    }
+
+  /* Make sure that 32-bit objects are supported */
+
+  if (ehdr->e_ident[EI_CLASS] != ELFCLASS32)
+    {
+      bdbg("Need 32-bit objects: e_ident[EI_CLASS]=%02x\n", ehdr->e_ident[EI_CLASS]);
+      return -ENOEXEC;
+    }
+
+  /* Verify endian-ness */
+
+#ifdef CONFIG_ENDIAN_BIG
+  if (ehdr->e_ident[EI_DATA] != ELFDATA2MSB)
+#else
+  if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB)
+#endif
+    {
+      bdbg("Wrong endian-ness: e_ident[EI_DATA]=%02x\n", ehdr->e_ident[EI_DATA]);
+      return -ENOEXEC;
+    }
+
+  /* TODO:  Check ABI here. */
+  return OK;
+}
+
+/****************************************************************************
+ * Name: arch_relocate and arch_relocateadd
+ *
+ * Description:
+ *   Perform on architecture-specific ELF relocation.  Every architecture
+ *   that uses the ELF loader must provide this function.
+ *
+ * Input Parameters:
+ *   rel - The relocation type
+ *   sym - The ELF symbol structure containing the fully resolved value.
+ *   addr - The address that requires the relocation.
+ *
+ * Returned Value:
+ *   Zero (OK) if the relocation was successful.  Otherwise, a negated errno
+ *   value indicating the cause of the relocation failure.
+ *
+ ****************************************************************************/
+
+int arch_relocate(FAR const Elf32_Rel *rel, FAR const Elf32_Sym *sym,
+                  uintptr_t addr)
+{
+  int32_t offset;
+  uint32_t upper_insn;
+  uint32_t lower_insn;
+
+  switch (ELF32_R_TYPE(rel->r_info))
+    {
+    case R_ARM_NONE:
+      {
+        /* No relocation */
+      }
+      break;
+
+    case R_ARM_PC24:
+    case R_ARM_CALL:
+    case R_ARM_JUMP24:
+      {
+        bvdbg("Performing PC24 [%d] link at addr %08lx [%08lx] to sym '%s' st_value=%08lx\n",
+              ELF32_R_TYPE(rel->r_info), (long)addr, (long)(*(uint32_t*)addr),
+              sym, (long)sym->st_value);
+
+        offset = (*(uint32_t*)addr & 0x00ffffff) << 2;
+        if (offset & 0x02000000)
+          {
+            offset -= 0x04000000;
+          }
+
+        offset += sym->st_value - addr;
+        if (offset & 3 || offset <= (int32_t) 0xfe000000 || offset >= (int32_t) 0x02000000)
+          {
+            bdbg("  ERROR: PC24 [%d] relocation out of range, offset=%08lx\n",
+                 ELF32_R_TYPE(rel->r_info), offset);
+
+            return -EINVAL;
+          }
+
+        offset >>= 2;
+
+        *(uint32_t*)addr &= 0xff000000;
+        *(uint32_t*)addr |= offset & 0x00ffffff;
+      }
+      break;
+
+    case R_ARM_ABS32:
+    case R_ARM_TARGET1:  /* New ABI:  TARGET1 always treated as ABS32 */
+      {
+        bvdbg("Performing ABS32 link at addr=%08lx [%08lx] to sym=%p st_value=%08lx\n",
+              (long)addr, (long)(*(uint32_t*)addr), sym, (long)sym->st_value);
+
+        *(uint32_t*)addr += sym->st_value;
+      }
+      break;
+
+    case R_ARM_THM_CALL:
+    case R_ARM_THM_JUMP24:
+      {
+        uint32_t S;
+        uint32_t J1;
+        uint32_t J2;
+
+        /* Thumb BL and B.W instructions. Encoding:
+         *
+         * upper_insn:
+         *
+         *  1   1   1   1   1   1
+         *  5   4   3   2   1   0   9   8   7   6   5   4   3   2   1   0
+         * +----------+---+-------------------------------+--------------+
+         * |1   1   1 |OP1|     OP2                       |              | 32-Bit Instructions
+         * +----------+---+--+-----+----------------------+--------------+
+         * |1   1   1 | 1   0|  S  |              imm10                  | BL Instruction
+         * +----------+------+-----+-------------------------------------+
+         *
+         * lower_insn:
+         *
+         *  1   1   1   1   1   1
+         *  5   4   3   2   1   0   9   8   7   6   5   4   3   2   1   0
+         * +---+---------------------------------------------------------+
+         * |OP |                                                         | 32-Bit Instructions
+         * +---+--+---+---+---+------------------------------------------+
+         * |1   1 |J1 | 1 |J2 |                 imm11                    | BL Instruction
+         * +------+---+---+---+------------------------------------------+
+         *
+         * The branch target is encoded in these bits:
+         *
+         *   S     = upper_insn[10]
+         *   imm10 = upper_insn[0:9]
+         *   imm11 = lower_insn[0:10]
+         *   J1    = lower_insn[13]
+         *   J2    = lower_insn[11]
+         */
+
+        upper_insn = (uint32_t)(*(uint16_t*)addr);
+        lower_insn = (uint32_t)(*(uint16_t*)(addr + 2));
+
+        bvdbg("Performing THM_JUMP24 [%d] link at addr=%08lx [%04x %04x] to sym=%p st_value=%08lx\n",
+              ELF32_R_TYPE(rel->r_info), (long)addr, (int)upper_insn, (int)lower_insn,
+              sym, (long)sym->st_value);
+
+        /* Extract the 25-bit offset from the 32-bit instruction:
+         *
+         *   offset[24]    = S
+         *   offset[23]    = ~(J1 ^ S)
+         *   offset[22]    = ~(J2 ^ S)]
+         *   offset[12:21] = imm10
+         *   offset[1:11]  = imm11
+         *   offset[0]     = 0
+         */
+
+        S   = (upper_insn >> 10) & 1;
+        J1  = (lower_insn >> 13) & 1;
+        J2  = (lower_insn >> 11) & 1;
+
+        offset = (S << 24) |                       /* S -   > offset[24] */
+                 ((~(J1 ^ S) & 1) << 23) |         /* J1    -> offset[23] */
+                 ((~(J2 ^ S) & 1) << 22) |         /* J2    -> offset[22] */
+                 ((upper_insn & 0x03ff) << 12) |   /* imm10 -> offset[12:21] */
+                 ((lower_insn & 0x07ff) << 1);     /* imm11 -> offset[1:11] */
+                                                   /* 0     -> offset[0] */
+
+        /* Sign extend */
+
+        if (offset & 0x01000000)
+          {
+            offset -= 0x02000000;
+          }
+
+        /* And perform the relocation */
+
+        bvdbg("  S=%d J1=%d J2=%d offset=%08lx branch target=%08lx\n",
+              S, J1, J2, (long)offset, offset + sym->st_value - addr);
+
+        offset += sym->st_value - addr;
+
+        /* Is this a function symbol?  If so, then the branch target must be
+         * an odd Thumb address
+         */
+
+        if (ELF32_ST_TYPE(sym->st_info) == STT_FUNC && (offset & 1) == 0)
+          {
+            bdbg("  ERROR: JUMP24 [%d] requires odd offset, offset=%08lx\n",
+                 ELF32_R_TYPE(rel->r_info), offset);
+
+            return -EINVAL;
+          }
+
+        /* Check the range of the offset */
+
+        if (offset <= (int32_t)0xff000000 || offset >= (int32_t)0x01000000)
+          {
+            bdbg("  ERROR: JUMP24 [%d] relocation out of range, branch taget=%08lx\n",
+                 ELF32_R_TYPE(rel->r_info), offset);
+
+            return -EINVAL;
+          }
+
+        /* Now, reconstruct the 32-bit instruction using the new, relocated
+         * branch target.
+         */
+
+        S  = (offset >> 24) & 1;
+        J1 = S ^ (~(offset >> 23) & 1);
+        J2 = S ^ (~(offset >> 22) & 1);
+ 
+        upper_insn = ((upper_insn & 0xf800) | (S << 10) | ((offset >> 12) & 0x03ff));
+        *(uint16_t*)addr = (uint16_t)upper_insn;
+
+        lower_insn = ((lower_insn & 0xd000) | (J1 << 13) | (J2 << 11) | ((offset >> 1) & 0x07ff));
+        *(uint16_t*)(addr + 2) = (uint16_t)lower_insn;
+
+        bvdbg("  S=%d J1=%d J2=%d insn [%04x %04x]\n",
+              S, J1, J2, (int)upper_insn, (int)lower_insn);
+      }
+      break;
+
+    case R_ARM_V4BX:
+      {
+        bvdbg("Performing V4BX link at addr=%08lx [%08lx]\n",
+              (long)addr, (long)(*(uint32_t*)addr));
+
+         /* Preserve only Rm and the condition code */
+
+        *(uint32_t*)addr &= 0xf000000f;
+
+        /* Change instruction to 'mov pc, Rm' */
+
+        *(uint32_t*)addr |= 0x01a0f000;
+      }
+      break;
+
+    case R_ARM_PREL31:
+      {
+        bvdbg("Performing PREL31 link at addr=%08lx [%08lx] to sym=%p st_value=%08lx\n",
+              (long)addr, (long)(*(uint32_t*)addr), sym, (long)sym->st_value);
+
+        offset           = *(uint32_t*)addr + sym->st_value - addr;
+        *(uint32_t*)addr = offset & 0x7fffffff;
+      }
+      break;
+
+    case R_ARM_MOVW_ABS_NC:
+    case R_ARM_MOVT_ABS:
+      {
+        bvdbg("Performing MOVx_ABS [%d] link at addr=%08lx [%08lx] to sym=%p st_value=%08lx\n",
+              ELF32_R_TYPE(rel->r_info), (long)addr, (long)(*(uint32_t*)addr),
+              sym, (long)sym->st_value);
+
+        offset = *(uint32_t*)addr;
+        offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
+        offset = (offset ^ 0x8000) - 0x8000;
+
+        offset += sym->st_value;
+        if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
+          {
+            offset >>= 16;
+          }
+
+        *(uint32_t*)addr &= 0xfff0f000;
+        *(uint32_t*)addr |= ((offset & 0xf000) << 4) | (offset & 0x0fff);
+      }
+      break;
+
+    case R_ARM_THM_MOVW_ABS_NC:
+    case R_ARM_THM_MOVT_ABS:
+      {
+        /* Thumb BL and B.W instructions. Encoding:
+         *
+         * upper_insn:
+         *
+         *  1   1   1   1   1   1
+         *  5   4   3   2   1   0   9   8   7   6   5   4   3   2   1   0
+         * +----------+---+-------------------------------+--------------+
+         * |1   1   1 |OP1|     OP2                       |              | 32-Bit Instructions
+         * +----------+---+--+-----+----------------------+--------------+
+         * |1   1   1 | 1   0|  i  | 1  0   1   1   0   0 |    imm4      | MOVT Instruction
+         * +----------+------+-----+----------------------+--------------+
+         *
+         * lower_insn:
+         *
+         *  1   1   1   1   1   1
+         *  5   4   3   2   1   0   9   8   7   6   5   4   3   2   1   0
+         * +---+---------------------------------------------------------+
+         * |OP |                                                         | 32-Bit Instructions
+         * +---+----------+--------------+-------------------------------+
+         * |0  |   imm3   |      Rd      |            imm8               | MOVT Instruction
+         * +---+----------+--------------+-------------------------------+
+         *
+         * The 16-bit immediate value is encoded in these bits:
+         *
+         *   i    = imm16[11]    = upper_insn[10]
+         *   imm4 = imm16[12:15] = upper_insn[3:0]
+         *   imm3 = imm16[8:10]  = lower_insn[14:12]
+         *   imm8 = imm16[0:7]   = lower_insn[7:0]
+         */
+
+        upper_insn = (uint32_t)(*(uint16_t*)addr);
+        lower_insn = (uint32_t)(*(uint16_t*)(addr + 2));
+
+        bvdbg("Performing THM_MOVx [%d] link at addr=%08lx [%04x %04x] to sym=%p st_value=%08lx\n",
+              ELF32_R_TYPE(rel->r_info), (long)addr, (int)upper_insn, (int)lower_insn,
+              sym, (long)sym->st_value);
+
+        /* Extract the 16-bit offset from the 32-bit instruction */
+
+        offset = ((upper_insn & 0x000f) << 12) | /* imm4 -> imm16[8:10] */
+                 ((upper_insn & 0x0400) << 1) |  /* i    -> imm16[11] */
+                 ((lower_insn & 0x7000) >> 4) |  /* imm3 -> imm16[8:10] */
+                  (lower_insn & 0x00ff);         /* imm8 -> imm16[0:7] */
+
+        /* Sign extend */
+
+        offset = (offset ^ 0x8000) - 0x8000;
+
+        /* And perform the relocation */
+
+        bvdbg("  offset=%08lx branch target=%08lx\n",
+              (long)offset, offset + sym->st_value);
+
+        offset += sym->st_value;
+
+        /* Update the immediate value in the instruction.  For MOVW we want the bottom
+         * 16-bits; for MOVT we want the top 16-bits.
+         */
+
+        if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
+          {
+            offset >>= 16;
+          }
+
+        upper_insn = ((upper_insn & 0xfbf0) | ((offset & 0xf000) >> 12) | ((offset & 0x0800) >> 1));
+        *(uint16_t*)addr = (uint16_t)upper_insn;
+
+        lower_insn = ((lower_insn & 0x8f00) | ((offset & 0x0700) << 4) | (offset & 0x00ff));
+        *(uint16_t*)(addr + 2) = (uint16_t)lower_insn;
+
+        bvdbg("  insn [%04x %04x]\n",
+             (int)upper_insn, (int)lower_insn);
+      }
+      break;
+
+    default:
+      bdbg("Unsupported relocation: %d\n", ELF32_R_TYPE(rel->r_info));
+      return -EINVAL;
+    }
+
+  return OK;
+}
+
+int arch_relocateadd(FAR const Elf32_Rela *rel, FAR const Elf32_Sym *sym,
+                     uintptr_t addr)
+{
+  bdbg("RELA relocation not supported\n");
+  return -ENOSYS;
+}
+
diff --git a/nuttx/arch/arm/src/armv7-m/up_exception.S b/nuttx/arch/arm/src/armv7-m/up_exception.S
index 31d8dbb0c..c9f216027 100644
--- a/nuttx/arch/arm/src/armv7-m/up_exception.S
+++ b/nuttx/arch/arm/src/armv7-m/up_exception.S
@@ -134,9 +134,9 @@ exception_common:
 
 #if CONFIG_ARCH_INTERRUPTSTACK > 3
 	ldr		sp, =g_intstackbase
-	push	r1						/* Save the MSP on the interrupt stack */
+	push	{r1}					/* Save the MSP on the interrupt stack */
 	bl		up_doirq				/* R0=IRQ, R1=register save area on stack */
-	pop		r1						/* Recover R1=main stack pointer */
+	pop		{r1}					/* Recover R1=main stack pointer */
 #else
 	msr	 msp, r1					/* We are using the main stack pointer */
 	bl		up_doirq				/* R0=IRQ, R1=register save area on stack */
diff --git a/nuttx/arch/arm/src/armv7-m/up_hardfault.c b/nuttx/arch/arm/src/armv7-m/up_hardfault.c
index cb3ce9e8a..c30015ad2 100644
--- a/nuttx/arch/arm/src/armv7-m/up_hardfault.c
+++ b/nuttx/arch/arm/src/armv7-m/up_hardfault.c
@@ -57,9 +57,7 @@
 
 /* Debug output from this file may interfere with context switching! */
 
-#undef DEBUG_HARDFAULTS         /* Define to debug hard faults */
-
-#ifdef DEBUG_HARDFAULTS
+#ifdef CONFIG_DEBUG_HARDFAULT
 # define hfdbg(format, arg...) lldbg(format, ##arg)
 #else
 # define hfdbg(x...)
diff --git a/nuttx/arch/arm/src/armv7-m/up_memcpy.S b/nuttx/arch/arm/src/armv7-m/up_memcpy.S
new file mode 100644
index 000000000..a154cab61
--- /dev/null
+++ b/nuttx/arch/arm/src/armv7-m/up_memcpy.S
@@ -0,0 +1,416 @@
+/************************************************************************************
+ * nuttx/arch/arm/src/armv7-m/up_memcpy.S
+ *
+ * armv7m-optimised memcpy, contributed by Mike Smith.  Apparently in the public
+ * domain and is re-released here under the modified BSD license:
+ *
+ * Obtained via a posting on the Stellaris forum:
+ *  http://e2e.ti.com/support/microcontrollers/\
+ *       stellaris_arm_cortex-m3_microcontroller/f/473/t/44360.aspx
+ *
+ * Posted by rocksoft on Jul 24, 2008 10:19 AM
+ *
+ *   Hi,
+ *
+ *   I recently finished a "memcpy" replacement and thought it might be useful for
+ *   others...
+ *
+ *   I've put some instructions and the code here:
+ *
+ *   http://www.rock-software.net/downloads/memcpy/
+ *
+ *   Hope it works for you as well as it did for me.
+ *
+ *   Liam.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ * 3. Neither the name NuttX nor the names of its contributors may be
+ *    used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ ************************************************************************************/
+
+/************************************************************************************
+ * Global Symbols
+ ************************************************************************************/
+
+	.global		memcpy
+
+	.syntax		unified
+	.thumb
+	.cpu		cortex-m3
+	.file		"up_memcpy.S"
+
+/************************************************************************************
+ * .text
+ ************************************************************************************/
+
+	.text
+
+/************************************************************************************
+ * Private Constant Data
+ ************************************************************************************/
+
+/* We have 16 possible alignment combinations of src and dst, this jump table
+ * directs the copy operation
+ *
+ * Bits:  Src=00, Dst=00 - Long to Long copy
+ * Bits:  Src=00, Dst=01 - Long to Byte before half word
+ * Bits:  Src=00, Dst=10 - Long to Half word
+ * Bits:  Src=00, Dst=11 - Long to Byte before long word
+ * Bits:  Src=01, Dst=00 - Byte before half word to long
+ * Bits:  Src=01, Dst=01 - Byte before half word to byte before half word -
+ *                         Same alignment
+ * Bits:  Src=01, Dst=10 - Byte before half word to half word
+ * Bits:  Src=01, Dst=11 - Byte before half word to byte before long word
+ * Bits:  Src=10, Dst=00 - Half word to long word
+ * Bits:  Src=10, Dst=01 - Half word to byte before half word
+ * Bits:  Src=10, Dst=10 - Half word to half word - Same Alignment
+ * Bits:  Src=10, Dst=11 - Half word to byte before long word
+ * Bits:  Src=11, Dst=00 - Byte before long word to long word
+ * Bits:  Src=11, Dst=01 - Byte before long word to byte before half word
+ * Bits:  Src=11, Dst=11 - Byte before long word to half word
+ * Bits:  Src=11, Dst=11 - Byte before long word to Byte before long word -
+ *                         Same alignment
+ */
+
+MEM_DataCopyTable:
+	.byte	(MEM_DataCopy0 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy1 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy2 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy3 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy4 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy5 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy6 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy7 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy8 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy9 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy10 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy11 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy12 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy13 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy14 - MEM_DataCopyJump) >> 1
+	.byte	(MEM_DataCopy15 - MEM_DataCopyJump) >> 1
+
+	.align 2
+
+MEM_LongCopyTable:
+	.byte	(MEM_LongCopyEnd - MEM_LongCopyJump) >> 1	/* 0 bytes left */
+	.byte	0					/* 4 bytes left */
+	.byte	(1 * 10) >> 1		/* 8 bytes left */
+	.byte	(2 * 10) >> 1		/* 12 bytes left */
+	.byte	(3 * 10) >> 1		/* 16 bytes left */
+	.byte	(4 * 10) >> 1		/* 20 bytes left */
+	.byte	(5 * 10) >> 1		/* 24 bytes left */
+	.byte	(6 * 10) >> 1		/* 28 bytes left */
+	.byte	(7 * 10) >> 1		/* 32 bytes left */
+	.byte	(8 * 10) >> 1		/* 36 bytes left */
+
+	.align 2
+
+/************************************************************************************
+ * Public Functions
+ ************************************************************************************/
+/************************************************************************************
+ * Name: memcpy
+ *
+ * Description:
+ *   Optimised "general" copy routine
+ *
+ * Input Parameters:
+ *   r0 = destination, r1 = source, r2 = length
+ *
+ ************************************************************************************/
+
+	.thumb_func
+memcpy:
+	push	{r14}
+
+	/* This allows the inner workings to "assume" a minimum amount of bytes */
+	/* Quickly check for very short copies */
+
+	cmp		r2, #4
+	blt		MEM_DataCopyBytes
+
+	and		r14, r0, #3		 		/* Get destination alignment bits */
+	bfi		r14, r1, #2, #2	 		/* Get source alignment bits */
+	ldr		r3, =MEM_DataCopyTable	/* Jump table base */
+	tbb		[r3, r14]		   		/* Perform jump on src/dst alignment bits */
+MEM_DataCopyJump:
+
+	.align 4
+
+/* Bits:  Src=01, Dst=01 - Byte before half word to byte before half word - Same alignment
+ * 3 bytes to read for long word aligning
+ */
+
+MEM_DataCopy5:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=10, Dst=10 - Half word to half word - Same Alignment
+ * 2 bytes to read for long word aligning
+ */
+
+MEM_DataCopy10:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=11, Dst=11 - Byte before long word to Byte before long word - Same alignment
+ * 1 bytes to read for long word aligning
+ */
+
+MEM_DataCopy15:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=00, Dst=00 - Long to Long copy */
+
+MEM_DataCopy0:
+	/* Save regs that may be used by memcpy */
+
+	push	{r4-r12}
+
+	/* Check for short word-aligned copy */
+
+	cmp		r2, #0x28
+	blt		MEM_DataCopy0_2
+
+	/* Bulk copy loop */
+
+MEM_DataCopy0_1:
+	ldmia	r1!, {r3-r12}
+	stmia	r0!, {r3-r12}
+	sub		r2, r2, #0x28
+	cmp		r2, #0x28
+	bge		MEM_DataCopy0_1
+
+	/* Copy remaining long words */
+
+MEM_DataCopy0_2:
+	/* Copy remaining long words */
+
+	ldr		r14, =MEM_LongCopyTable
+	lsr		r11, r2, #0x02
+	tbb		[r14, r11]
+
+	/* longword copy branch table anchor */
+
+MEM_LongCopyJump:
+	ldr.w	r3, [r1], #0x04		/* 4 bytes remain */
+	str.w	r3, [r0], #0x04
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r4}		/* 8 bytes remain */
+	stmia.w	r0!, {r3-r4}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r5}		/* 12 bytes remain */
+	stmia.w	r0!, {r3-r5}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r6}		/* 16 bytes remain */
+	stmia.w	r0!, {r3-r6}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r7}		/* 20 bytes remain */
+	stmia.w	r0!, {r3-r7}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r8}		/* 24 bytes remain */
+	stmia.w	r0!, {r3-r8}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r9}		/* 28 bytes remain */
+	stmia.w	r0!, {r3-r9}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r10}		/* 32 bytes remain */
+	stmia.w	r0!, {r3-r10}
+	b		MEM_LongCopyEnd
+	ldmia.w	r1!, {r3-r11}		/* 36 bytes remain */
+	stmia.w	r0!, {r3-r11}
+
+MEM_LongCopyEnd:
+	pop		{r4-r12}
+	and		r2, r2, #0x03		/* All the longs have been copied */
+
+	/* Deal with up to 3 remaining bytes */
+
+MEM_DataCopyBytes:
+	/* Deal with up to 3 remaining bytes */
+
+	cmp		r2, #0x00
+	it		eq
+	popeq	{pc}
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	subs	r2, r2, #0x01
+	it		eq
+	popeq	{pc}
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	subs	r2, r2, #0x01
+	it		eq
+	popeq	{pc}
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	pop		{pc}
+
+ .align 4
+
+/* Bits:  Src=01, Dst=11 - Byte before half word to byte before long word
+ * 3 bytes to read for long word aligning the source
+ */
+
+MEM_DataCopy7:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=10, Dst=00 - Half word to long word
+ * 2 bytes to read for long word aligning the source
+ */
+
+MEM_DataCopy8:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=11, Dst=01 - Byte before long word to byte before half word
+ * 1 byte to read for long word aligning the source
+ */
+
+MEM_DataCopy13:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=00, Dst=10 - Long to Half word */
+
+MEM_DataCopy2:
+	cmp		r2, #0x28
+	blt		MEM_DataCopy2_1
+
+	/* Save regs */
+
+	push	{r4-r12}
+
+	/* Bulk copy loop */
+
+MEM_DataCopy2_2:
+	ldmia	r1!, {r3-r12}
+
+	strh	r3, [r0], #0x02
+
+	lsr		r3, r3, #0x10
+	bfi		r3, r4, #0x10, #0x10
+	lsr		r4, r4, #0x10
+	bfi		r4, r5, #0x10, #0x10
+	lsr		r5, r5, #0x10
+	bfi		r5, r6, #0x10, #0x10
+	lsr		r6, r6, #0x10
+	bfi		r6, r7, #0x10, #0x10
+	lsr		r7, r7, #0x10
+	bfi		r7, r8, #0x10, #0x10
+	lsr		r8, r8, #0x10
+	bfi		r8, r9, #0x10, #0x10
+	lsr		r9, r9, #0x10
+	bfi		r9, r10, #0x10, #0x10
+	lsr		r10, r10, #0x10
+	bfi		r10, r11, #0x10, #0x10
+	lsr		r11, r11, #0x10
+	bfi		r11, r12, #0x10, #0x10
+	stmia	r0!, {r3-r11}
+	lsr		r12, r12, #0x10
+	strh	r12, [r0], #0x02
+
+	sub		r2, r2, #0x28
+	cmp		r2, #0x28
+	bge		MEM_DataCopy2_2
+	pop		{r4-r12}
+
+MEM_DataCopy2_1: /* Read longs and write 2 x half words */
+	cmp		r2, #4
+	blt		MEM_DataCopyBytes
+	ldr		r3, [r1], #0x04
+	strh	r3, [r0], #0x02
+	lsr		r3, r3, #0x10
+	strh	r3, [r0], #0x02
+	sub		r2, r2, #0x04
+	b		MEM_DataCopy2
+
+/* Bits:  Src=01, Dst=00 - Byte before half word to long
+ * Bits:  Src=01, Dst=10 - Byte before half word to half word
+ * 3 bytes to read for long word aligning the source
+ */
+
+MEM_DataCopy4:
+MEM_DataCopy6:
+	/* Read B and write B */
+
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=10, Dst=01 - Half word to byte before half word
+ * Bits:  Src=10, Dst=11 - Half word to byte before long word
+ * 2 bytes to read for long word aligning the source
+ */
+
+MEM_DataCopy9:
+MEM_DataCopy11:
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=11, Dst=00 -chm Byte before long word to long word
+ * Bits:  Src=11, Dst=11 - Byte before long word to half word
+ * 1 byte to read for long word aligning the source
+ */
+
+MEM_DataCopy12:
+MEM_DataCopy14:
+	/* Read B and write B */
+
+	ldrb	r3, [r1], #0x01
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x01
+
+/* Bits:  Src=00, Dst=01 - Long to Byte before half word
+ * Bits:  Src=00, Dst=11 - Long to Byte before long word
+ */
+
+MEM_DataCopy1: /* Read longs, write B->H->B */
+MEM_DataCopy3:
+	cmp		r2, #4
+	blt		MEM_DataCopyBytes
+	ldr		r3, [r1], #0x04
+	strb	r3, [r0], #0x01
+	lsr		r3, r3, #0x08
+	strh	r3, [r0], #0x02
+	lsr		r3, r3, #0x10
+	strb	r3, [r0], #0x01
+	sub		r2, r2, #0x04
+	b		MEM_DataCopy3
+
+	.size	memcpy, .-memcpy
+	.end