20 files changed, 2748 insertions, 0 deletions
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_f32.c
new file mode 100644
index 000000000..619c1577a
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_f32.c
@@ -0,0 +1,130 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_copy_f32.c    
+*    
+* Description:	Copies the elements of a floating-point vector.  
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @defgroup copy Vector Copy    
+ *    
+ * Copies sample by sample from source vector to destination vector.    
+ *    
+ * <pre>    
+ * 	pDst[n] = pSrc[n];   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.     
+ */
+
+/**    
+ * @addtogroup copy    
+ * @{    
+ */
+
+/**    
+ * @brief Copies the elements of a floating-point vector.     
+ * @param[in]       *pSrc points to input vector    
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the input vector   
+ * @return none.    
+ *    
+ */
+
+
+void arm_copy_f32(
+  float32_t * pSrc,
+  float32_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  float32_t in1, in2, in3, in4;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the results in the destination buffer */
+    in1 = *pSrc++;
+    in2 = *pSrc++;
+    in3 = *pSrc++;
+    in4 = *pSrc++;
+
+    *pDst++ = in1;
+    *pDst++ = in2;
+    *pDst++ = in3;
+    *pDst++ = in4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the results in the destination buffer */
+    *pDst++ = *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of BasicCopy group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q15.c
new file mode 100644
index 000000000..00be9dc0b
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q15.c
@@ -0,0 +1,109 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_copy_q15.c    
+*    
+* Description:	Copies the elements of a Q15 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup copy    
+ * @{    
+ */
+/**    
+ * @brief Copies the elements of a Q15 vector.     
+ * @param[in]       *pSrc points to input vector    
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the input vector   
+ * @return none.    
+ *    
+ */
+
+void arm_copy_q15(
+  q15_t * pSrc,
+  q15_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Read two inputs */
+    *__SIMD32(pDst)++ = *__SIMD32(pSrc)++;
+    *__SIMD32(pDst)++ = *__SIMD32(pSrc)++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the value in the destination buffer */
+    *pDst++ = *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of BasicCopy group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q31.c
new file mode 100644
index 000000000..7ab0849f6
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q31.c
@@ -0,0 +1,118 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_copy_q31.c    
+*    
+* Description:	Copies the elements of a Q31 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup copy    
+ * @{    
+ */
+
+/**    
+ * @brief Copies the elements of a Q31 vector.     
+ * @param[in]       *pSrc points to input vector    
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the input vector   
+ * @return none.    
+ *    
+ */
+
+void arm_copy_q31(
+  q31_t * pSrc,
+  q31_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1, in2, in3, in4;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the values in the destination buffer */
+    in1 = *pSrc++;
+    in2 = *pSrc++;
+    in3 = *pSrc++;
+    in4 = *pSrc++;
+
+    *pDst++ = in1;
+    *pDst++ = in2;
+    *pDst++ = in3;
+    *pDst++ = in4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the value in the destination buffer */
+    *pDst++ = *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of BasicCopy group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q7.c
new file mode 100644
index 000000000..425885753
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_copy_q7.c
@@ -0,0 +1,110 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_copy_q7.c    
+*    
+* Description:	Copies the elements of a Q7 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup copy    
+ * @{    
+ */
+
+/**    
+ * @brief Copies the elements of a Q7 vector.    
+ * @param[in]       *pSrc points to input vector    
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the input vector   
+ * @return none.    
+ *    
+ */
+
+void arm_copy_q7(
+  q7_t * pSrc,
+  q7_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the results in the destination buffer */
+    /* 4 samples are copied and stored at a time using SIMD */
+    *__SIMD32(pDst)++ = *__SIMD32(pSrc)++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  while(blkCnt > 0u)
+  {
+    /* C = A */
+    /* Copy and then store the results in the destination buffer */
+    *pDst++ = *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of BasicCopy group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_f32.c
new file mode 100644
index 000000000..439e60db0
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_f32.c
@@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_fill_f32.c    
+*    
+* Description:	Fills a constant value into a floating-point vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @defgroup Fill Vector Fill    
+ *    
+ * Fills the destination vector with a constant value.    
+ *    
+ * <pre>    
+ * 	pDst[n] = value;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.     
+ */
+
+/**    
+ * @addtogroup Fill    
+ * @{    
+ */
+
+/**    
+ * @brief Fills a constant value into a floating-point vector.     
+ * @param[in]       value input value to be filled   
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the output vector   
+ * @return none.    
+ *    
+ */
+
+
+void arm_fill_f32(
+  float32_t value,
+  float32_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  float32_t in1 = value;
+  float32_t in2 = value;
+  float32_t in3 = value;
+  float32_t in4 = value;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *pDst++ = in1;
+    *pDst++ = in2;
+    *pDst++ = in3;
+    *pDst++ = in4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *pDst++ = value;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of Fill group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q15.c
new file mode 100644
index 000000000..8f60d3b4e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q15.c
@@ -0,0 +1,115 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_fill_q15.c    
+*    
+* Description:	Fills a constant value into a Q15 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup Fill    
+ * @{    
+ */
+
+/**    
+ * @brief Fills a constant value into a Q15 vector.    
+ * @param[in]       value input value to be filled   
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the output vector   
+ * @return none.    
+ *    
+ */
+
+void arm_fill_q15(
+  q15_t value,
+  q15_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t packedValue;                             /* value packed to 32 bits */
+
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Packing two 16 bit values to 32 bit value in order to use SIMD */
+  packedValue = __PKHBT(value, value, 16u);
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *__SIMD32(pDst)++ = packedValue;
+    *__SIMD32(pDst)++ = packedValue;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *pDst++ = value;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of Fill group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q31.c
new file mode 100644
index 000000000..35565c3cf
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q31.c
@@ -0,0 +1,116 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_fill_q31.c    
+*    
+* Description:	Fills a constant value into a Q31 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup Fill    
+ * @{    
+ */
+
+/**    
+ * @brief Fills a constant value into a Q31 vector.    
+ * @param[in]       value input value to be filled   
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the output vector   
+ * @return none.    
+ *    
+ */
+
+void arm_fill_q31(
+  q31_t value,
+  q31_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1 = value;
+  q31_t in2 = value;
+  q31_t in3 = value;
+  q31_t in4 = value;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *pDst++ = in1;
+    *pDst++ = in2;
+    *pDst++ = in3;
+    *pDst++ = in4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *pDst++ = value;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of Fill group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q7.c
new file mode 100644
index 000000000..481d4c8f7
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_fill_q7.c
@@ -0,0 +1,113 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_fill_q7.c    
+*    
+* Description:	Fills a constant value into a Q7 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup Fill    
+ * @{    
+ */
+
+/**    
+ * @brief Fills a constant value into a Q7 vector.    
+ * @param[in]       value input value to be filled   
+ * @param[out]      *pDst points to output vector    
+ * @param[in]       blockSize length of the output vector   
+ * @return none.    
+ *    
+ */
+
+void arm_fill_q7(
+  q7_t value,
+  q7_t * pDst,
+  uint32_t blockSize)
+{
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t packedValue;                             /* value packed to 32 bits */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* Packing four 8 bit values to 32 bit value in order to use SIMD */
+  packedValue = __PACKq7(value, value, value, value);
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *__SIMD32(pDst)++ = packedValue;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = value */
+    /* Fill the value in the destination buffer */
+    *pDst++ = value;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of Fill group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q15.c
new file mode 100644
index 000000000..1537b93ab
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q15.c
@@ -0,0 +1,196 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_float_to_q15.c    
+*    
+* Description:	Converts the elements of the floating-point vector to Q15 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup float_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the floating-point vector to Q15 vector.    
+ * @param[in]       *pSrc points to the floating-point input vector    
+ * @param[out]      *pDst points to the Q15 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ * \par   
+ * The equation used for the conversion process is:    
+ * <pre>    
+ * 	pDst[n] = (q15_t)(pSrc[n] * 32768);   0 <= n < blockSize.    
+ * </pre>    
+ * \par Scaling and Overflow Behavior:    
+ * \par    
+ * The function uses saturating arithmetic.    
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.    
+ * \note   
+ * In order to apply rounding, the library should be rebuilt with the ROUNDING macro     
+ * defined in the preprocessor section of project options.     
+ *    
+ */
+
+
+void arm_float_to_q15(
+  float32_t * pSrc,
+  q15_t * pDst,
+  uint32_t blockSize)
+{
+  float32_t *pIn = pSrc;                         /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifdef ARM_MATH_ROUNDING
+
+  float32_t in;
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+    /* C = A * 32768 */
+    /* convert from float to q15 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 32768.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
+
+    in = *pIn++;
+    in = (in * 32768.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
+
+    in = *pIn++;
+    in = (in * 32768.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
+
+    in = *pIn++;
+    in = (in * 32768.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
+
+#else
+
+    /* C = A * 32768 */
+    /* convert from float to q15 and then store the results in the destination buffer */
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+    /* C = A * 32768 */
+    /* convert from float to q15 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 32768.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
+
+#else
+
+    /* C = A * 32768 */
+    /* convert from float to q15 and then store the results in the destination buffer */
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+    /* C = A * 32768 */
+    /* convert from float to q15 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 32768.0f);
+    in += in > 0 ? 0.5f : -0.5f;
+    *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
+
+#else
+
+    /* C = A * 32768 */
+    /* convert from float to q15 and then store the results in the destination buffer */
+    *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**    
+ * @} end of float_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q31.c
new file mode 100644
index 000000000..3ab52b9ff
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q31.c
@@ -0,0 +1,203 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_float_to_q31.c    
+*    
+* Description:	Converts the elements of the floating-point vector to Q31 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @defgroup float_to_x  Convert 32-bit floating point value    
+ */
+
+/**    
+ * @addtogroup float_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the floating-point vector to Q31 vector.    
+ * @param[in]       *pSrc points to the floating-point input vector    
+ * @param[out]      *pDst points to the Q31 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ *\par Description:    
+ * \par   
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (q31_t)(pSrc[n] * 2147483648);   0 <= n < blockSize.    
+ * </pre>    
+ * <b>Scaling and Overflow Behavior:</b>    
+ * \par    
+ * The function uses saturating arithmetic.    
+ * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.    
+ *   
+ * \note In order to apply rounding, the library should be rebuilt with the ROUNDING macro     
+ * defined in the preprocessor section of project options.     
+ */
+
+
+void arm_float_to_q31(
+  float32_t * pSrc,
+  q31_t * pDst,
+  uint32_t blockSize)
+{
+  float32_t *pIn = pSrc;                         /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifdef ARM_MATH_ROUNDING
+
+  float32_t in;
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+
+    /* C = A * 32768 */
+    /* convert from float to Q31 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 2147483648.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = clip_q63_to_q31((q63_t) (in));
+
+    in = *pIn++;
+    in = (in * 2147483648.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = clip_q63_to_q31((q63_t) (in));
+
+    in = *pIn++;
+    in = (in * 2147483648.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = clip_q63_to_q31((q63_t) (in));
+
+    in = *pIn++;
+    in = (in * 2147483648.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = clip_q63_to_q31((q63_t) (in));
+
+#else
+
+    /* C = A * 2147483648 */
+    /* convert from float to Q31 and then store the results in the destination buffer */
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+
+    /* C = A * 2147483648 */
+    /* convert from float to Q31 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 2147483648.0f);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = clip_q63_to_q31((q63_t) (in));
+
+#else
+
+    /* C = A * 2147483648 */
+    /* convert from float to Q31 and then store the results in the destination buffer */
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+
+    /* C = A * 2147483648 */
+    /* convert from float to Q31 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 2147483648.0f);
+    in += in > 0 ? 0.5f : -0.5f;
+    *pDst++ = clip_q63_to_q31((q63_t) (in));
+
+#else
+
+    /* C = A * 2147483648 */
+    /* convert from float to Q31 and then store the results in the destination buffer */
+    *pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**    
+ * @} end of float_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q7.c
new file mode 100644
index 000000000..dea14c6a1
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_float_to_q7.c
@@ -0,0 +1,195 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_float_to_q7.c    
+*    
+* Description:	Converts the elements of the floating-point vector to Q7 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup float_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the floating-point vector to Q7 vector.    
+ * @param[in]       *pSrc points to the floating-point input vector    
+ * @param[out]      *pDst points to the Q7 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ *\par Description:    
+ * \par   
+ * The equation used for the conversion process is:    
+ * <pre>    
+ * 	pDst[n] = (q7_t)(pSrc[n] * 128);   0 <= n < blockSize.    
+ * </pre>    
+ * \par Scaling and Overflow Behavior:    
+ * \par    
+ * The function uses saturating arithmetic.    
+ * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.    
+ * \note   
+ * In order to apply rounding, the library should be rebuilt with the ROUNDING macro     
+ * defined in the preprocessor section of project options.     
+ */
+
+
+void arm_float_to_q7(
+  float32_t * pSrc,
+  q7_t * pDst,
+  uint32_t blockSize)
+{
+  float32_t *pIn = pSrc;                         /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifdef ARM_MATH_ROUNDING
+
+  float32_t in;
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+    /* C = A * 128 */
+    /* convert from float to q7 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 128);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
+
+    in = *pIn++;
+    in = (in * 128);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
+
+    in = *pIn++;
+    in = (in * 128);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
+
+    in = *pIn++;
+    in = (in * 128);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
+
+#else
+
+    /* C = A * 128 */
+    /* convert from float to q7 and then store the results in the destination buffer */
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+
+#ifdef ARM_MATH_ROUNDING
+    /* C = A * 128 */
+    /* convert from float to q7 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 128);
+    in += in > 0 ? 0.5 : -0.5;
+    *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
+
+#else
+
+    /* C = A * 128 */
+    /* convert from float to q7 and then store the results in the destination buffer */
+    *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+#ifdef ARM_MATH_ROUNDING
+    /* C = A * 128 */
+    /* convert from float to q7 and then store the results in the destination buffer */
+    in = *pIn++;
+    in = (in * 128.0f);
+    in += in > 0 ? 0.5f : -0.5f;
+    *pDst++ = (q7_t) (__SSAT((q31_t) (in), 8));
+
+#else
+
+    /* C = A * 128 */
+    /* convert from float to q7 and then store the results in the destination buffer */
+    *pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
+
+#endif /*      #ifdef ARM_MATH_ROUNDING        */
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**    
+ * @} end of float_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_float.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_float.c
new file mode 100644
index 000000000..ce51e9a39
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_float.c
@@ -0,0 +1,126 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q15_to_float.c    
+*    
+* Description:	Converts the elements of the Q15 vector to floating-point vector.     
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @defgroup q15_to_x  Convert 16-bit Integer value    
+ */
+
+/**    
+ * @addtogroup q15_to_x    
+ * @{    
+ */
+
+
+
+
+/**    
+ * @brief  Converts the elements of the Q15 vector to floating-point vector.     
+ * @param[in]       *pSrc points to the Q15 input vector    
+ * @param[out]      *pDst points to the floating-point output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (float32_t) pSrc[n] / 32768;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ */
+
+
+void arm_q15_to_float(
+  q15_t * pSrc,
+  float32_t * pDst,
+  uint32_t blockSize)
+{
+  q15_t *pIn = pSrc;                             /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (float32_t) A / 32768 */
+    /* convert from q15 to float and then store the results in the destination buffer */
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f);
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f);
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f);
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (float32_t) A / 32768 */
+    /* convert from q15 to float and then store the results in the destination buffer */
+    *pDst++ = ((float32_t) * pIn++ / 32768.0f);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of q15_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_q31.c
new file mode 100644
index 000000000..a0f66c29e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_q31.c
@@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q15_to_q31.c    
+*    
+* Description:	Converts the elements of the Q15 vector to Q31 vector.  
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup q15_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the Q15 vector to Q31 vector.     
+ * @param[in]       *pSrc points to the Q15 input vector    
+ * @param[out]      *pDst points to the Q31 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:   
+ *   
+ * <pre>    
+ * 	pDst[n] = (q31_t) pSrc[n] << 16;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ */
+
+
+void arm_q15_to_q31(
+  q15_t * pSrc,
+  q31_t * pDst,
+  uint32_t blockSize)
+{
+  q15_t *pIn = pSrc;                             /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1, in2;
+  q31_t out1, out2, out3, out4;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (q31_t)A << 16 */
+    /* convert from q15 to q31 and then store the results in the destination buffer */
+    in1 = *__SIMD32(pIn)++;
+    in2 = *__SIMD32(pIn)++;
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    /* extract lower 16 bits to 32 bit result */
+    out1 = in1 << 16u;
+    /* extract upper 16 bits to 32 bit result */
+    out2 = in1 & 0xFFFF0000;
+    /* extract lower 16 bits to 32 bit result */
+    out3 = in2 << 16u;
+    /* extract upper 16 bits to 32 bit result */
+    out4 = in2 & 0xFFFF0000;
+
+#else
+
+    /* extract upper 16 bits to 32 bit result */
+    out1 = in1 & 0xFFFF0000;
+    /* extract lower 16 bits to 32 bit result */
+    out2 = in1 << 16u;
+    /* extract upper 16 bits to 32 bit result */
+    out3 = in2 & 0xFFFF0000;
+    /* extract lower 16 bits to 32 bit result */
+    out4 = in2 << 16u;
+
+#endif //      #ifndef ARM_MATH_BIG_ENDIAN
+
+    *pDst++ = out1;
+    *pDst++ = out2;
+    *pDst++ = out3;
+    *pDst++ = out4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (q31_t)A << 16 */
+    /* convert from q15 to q31 and then store the results in the destination buffer */
+    *pDst++ = (q31_t) * pIn++ << 16;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**    
+ * @} end of q15_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_q7.c
new file mode 100644
index 000000000..87fe63d9e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q15_to_q7.c
@@ -0,0 +1,146 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q15_to_q7.c    
+*    
+* Description:	Converts the elements of the Q15 vector to Q7 vector.  
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup q15_to_x    
+ * @{    
+ */
+
+
+/**    
+ * @brief Converts the elements of the Q15 vector to Q7 vector.     
+ * @param[in]       *pSrc points to the Q15 input vector    
+ * @param[out]      *pDst points to the Q7 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (q7_t) pSrc[n] >> 8;   0 <= n < blockSize.    
+ * </pre>   
+ *   
+ */
+
+
+void arm_q15_to_q7(
+  q15_t * pSrc,
+  q7_t * pDst,
+  uint32_t blockSize)
+{
+  q15_t *pIn = pSrc;                             /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1, in2;
+  q31_t out1, out2;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (q7_t) A >> 8 */
+    /* convert from q15 to q7 and then store the results in the destination buffer */
+    in1 = *__SIMD32(pIn)++;
+    in2 = *__SIMD32(pIn)++;
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out1 = __PKHTB(in2, in1, 16);
+    out2 = __PKHBT(in2, in1, 16);
+
+#else
+
+    out1 = __PKHTB(in1, in2, 16);
+    out2 = __PKHBT(in1, in2, 16);
+
+#endif //      #ifndef ARM_MATH_BIG_ENDIAN
+
+    /* rotate packed value by 24 */
+    out2 = ((uint32_t) out2 << 8) | ((uint32_t) out2 >> 24);
+
+    /* anding with 0xff00ff00 to get two 8 bit values */
+    out1 = out1 & 0xFF00FF00;
+    /* anding with 0x00ff00ff to get two 8 bit values */
+    out2 = out2 & 0x00FF00FF;
+
+    /* oring two values(contains two 8 bit values) to get four packed 8 bit values */
+    out1 = out1 | out2;
+
+    /* store 4 samples at a time to destiantion buffer */
+    *__SIMD32(pDst)++ = out1;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (q7_t) A >> 8 */
+    /* convert from q15 to q7 and then store the results in the destination buffer */
+    *pDst++ = (q7_t) (*pIn++ >> 8);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**    
+ * @} end of q15_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_float.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_float.c
new file mode 100644
index 000000000..94deec9a1
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_float.c
@@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q31_to_float.c    
+*    
+* Description:	Converts the elements of the Q31 vector to floating-point vector.      
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @defgroup q31_to_x  Convert 32-bit Integer value    
+ */
+
+/**    
+ * @addtogroup q31_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the Q31 vector to floating-point vector.    
+ * @param[in]       *pSrc points to the Q31 input vector    
+ * @param[out]      *pDst points to the floating-point output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (float32_t) pSrc[n] / 2147483648;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ */
+
+
+void arm_q31_to_float(
+  q31_t * pSrc,
+  float32_t * pDst,
+  uint32_t blockSize)
+{
+  q31_t *pIn = pSrc;                             /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (float32_t) A / 2147483648 */
+    /* convert from q31 to float and then store the results in the destination buffer */
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (float32_t) A / 2147483648 */
+    /* convert from q31 to float and then store the results in the destination buffer */
+    *pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of q31_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_q15.c
new file mode 100644
index 000000000..ba79f85a1
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_q15.c
@@ -0,0 +1,137 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q31_to_q15.c    
+*    
+* Description:	Converts the elements of the Q31 vector to Q15 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup q31_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the Q31 vector to Q15 vector.    
+ * @param[in]       *pSrc points to the Q31 input vector    
+ * @param[out]      *pDst points to the Q15 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *     
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (q15_t) pSrc[n] >> 16;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ */
+
+
+void arm_q31_to_q15(
+  q31_t * pSrc,
+  q15_t * pDst,
+  uint32_t blockSize)
+{
+  q31_t *pIn = pSrc;                             /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1, in2, in3, in4;
+  q31_t out1, out2;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (q15_t) A >> 16 */
+    /* convert from q31 to q15 and then store the results in the destination buffer */
+    in1 = *pIn++;
+    in2 = *pIn++;
+    in3 = *pIn++;
+    in4 = *pIn++;
+
+    /* pack two higher 16-bit values from two 32-bit values */
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out1 = __PKHTB(in2, in1, 16);
+    out2 = __PKHTB(in4, in3, 16);
+
+#else
+
+    out1 = __PKHTB(in1, in2, 16);
+    out2 = __PKHTB(in3, in4, 16);
+
+#endif //      #ifdef ARM_MATH_BIG_ENDIAN
+
+    *__SIMD32(pDst)++ = out1;
+    *__SIMD32(pDst)++ = out2;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (q15_t) A >> 16 */
+    /* convert from q31 to q15 and then store the results in the destination buffer */
+    *pDst++ = (q15_t) (*pIn++ >> 16);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**    
+ * @} end of q31_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_q7.c
new file mode 100644
index 000000000..afc8fdbed
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q31_to_q7.c
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q31_to_q7.c    
+*    
+* Description:	Converts the elements of the Q31 vector to Q7 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup q31_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the Q31 vector to Q7 vector.    
+ * @param[in]       *pSrc points to the Q31 input vector    
+ * @param[out]      *pDst points to the Q7 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (q7_t) pSrc[n] >> 24;   0 <= n < blockSize.     
+ * </pre>    
+ *   
+ */
+
+
+void arm_q31_to_q7(
+  q31_t * pSrc,
+  q7_t * pDst,
+  uint32_t blockSize)
+{
+  q31_t *pIn = pSrc;                             /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1, in2, in3, in4;
+  q7_t out1, out2, out3, out4;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (q7_t) A >> 24 */
+    /* convert from q31 to q7 and then store the results in the destination buffer */
+    in1 = *pIn++;
+    in2 = *pIn++;
+    in3 = *pIn++;
+    in4 = *pIn++;
+
+    out1 = (q7_t) (in1 >> 24);
+    out2 = (q7_t) (in2 >> 24);
+    out3 = (q7_t) (in3 >> 24);
+    out4 = (q7_t) (in4 >> 24);
+
+    *__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (q7_t) A >> 24 */
+    /* convert from q31 to q7 and then store the results in the destination buffer */
+    *pDst++ = (q7_t) (*pIn++ >> 24);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**    
+ * @} end of q31_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_float.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_float.c
new file mode 100644
index 000000000..be4ac5fe3
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_float.c
@@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q7_to_float.c    
+*    
+* Description:	Converts the elements of the Q7 vector to floating-point vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @defgroup q7_to_x  Convert 8-bit Integer value    
+ */
+
+/**    
+ * @addtogroup q7_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the Q7 vector to floating-point vector.    
+ * @param[in]       *pSrc points to the Q7 input vector    
+ * @param[out]      *pDst points to the floating-point output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *		     
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (float32_t) pSrc[n] / 128;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ */
+
+
+void arm_q7_to_float(
+  q7_t * pSrc,
+  float32_t * pDst,
+  uint32_t blockSize)
+{
+  q7_t *pIn = pSrc;                              /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (float32_t) A / 128 */
+    /* convert from q7 to float and then store the results in the destination buffer */
+    *pDst++ = ((float32_t) * pIn++ / 128.0f);
+    *pDst++ = ((float32_t) * pIn++ / 128.0f);
+    *pDst++ = ((float32_t) * pIn++ / 128.0f);
+    *pDst++ = ((float32_t) * pIn++ / 128.0f);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (float32_t) A / 128 */
+    /* convert from q7 to float and then store the results in the destination buffer */
+    *pDst++ = ((float32_t) * pIn++ / 128.0f);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+}
+
+/**    
+ * @} end of q7_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_q15.c
new file mode 100644
index 000000000..019ca4815
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_q15.c
@@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q7_to_q15.c    
+*    
+* Description:	Converts the elements of the Q7 vector to Q15 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup q7_to_x    
+ * @{    
+ */
+
+
+
+
+/**    
+ * @brief Converts the elements of the Q7 vector to Q15 vector.    
+ * @param[in]       *pSrc points to the Q7 input vector    
+ * @param[out]      *pDst points to the Q15 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (q15_t) pSrc[n] << 8;   0 <= n < blockSize.    
+ * </pre>    
+ *   
+ */
+
+
+void arm_q7_to_q15(
+  q7_t * pSrc,
+  q15_t * pDst,
+  uint32_t blockSize)
+{
+  q7_t *pIn = pSrc;                              /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+  q31_t in;
+  q31_t in1, in2;
+  q31_t out1, out2;
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (q15_t) A << 8 */
+    /* convert from q7 to q15 and then store the results in the destination buffer */
+    in = *__SIMD32(pIn)++;
+
+    /* rotatate in by 8 and extend two q7_t values to q15_t values */
+    in1 = __SXTB16(__ROR(in, 8));
+
+    /* extend remainig two q7_t values to q15_t values */
+    in2 = __SXTB16(in);
+
+    in1 = in1 << 8u;
+    in2 = in2 << 8u;
+
+    in1 = in1 & 0xFF00FF00;
+    in2 = in2 & 0xFF00FF00;
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out2 = __PKHTB(in1, in2, 16);
+    out1 = __PKHBT(in2, in1, 16);
+
+#else
+
+    out1 = __PKHTB(in1, in2, 16);
+    out2 = __PKHBT(in2, in1, 16);
+
+#endif
+
+    *__SIMD32(pDst)++ = out1;
+    *__SIMD32(pDst)++ = out2;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (q15_t) A << 8 */
+    /* convert from q7 to q15 and then store the results in the destination buffer */
+    *pDst++ = (q15_t) * pIn++ << 8;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**    
+ * @} end of q7_to_x group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_q31.c
new file mode 100644
index 000000000..bbbcc6f4a
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/SupportFunctions/arm_q7_to_q31.c
@@ -0,0 +1,134 @@
+/* ----------------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_q7_to_q31.c    
+*    
+* Description:	Converts the elements of the Q7 vector to Q31 vector.  
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupSupport    
+ */
+
+/**    
+ * @addtogroup q7_to_x    
+ * @{    
+ */
+
+/**    
+ * @brief Converts the elements of the Q7 vector to Q31 vector.    
+ * @param[in]       *pSrc points to the Q7 input vector    
+ * @param[out]      *pDst points to the Q31 output vector   
+ * @param[in]       blockSize length of the input vector    
+ * @return none.    
+ *    
+ * \par Description:    
+ *    
+ * The equation used for the conversion process is:    
+ *   
+ * <pre>    
+ * 	pDst[n] = (q31_t) pSrc[n] << 24;   0 <= n < blockSize.   
+ * </pre>     
+ *   
+ */
+
+
+void arm_q7_to_q31(
+  q7_t * pSrc,
+  q31_t * pDst,
+  uint32_t blockSize)
+{
+  q7_t *pIn = pSrc;                              /* Src pointer */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  q31_t in;
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (q31_t) A << 24 */
+    /* convert from q7 to q31 and then store the results in the destination buffer */
+    in = *__SIMD32(pIn)++;
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    *pDst++ = (__ROR(in, 8)) & 0xFF000000;
+    *pDst++ = (__ROR(in, 16)) & 0xFF000000;
+    *pDst++ = (__ROR(in, 24)) & 0xFF000000;
+    *pDst++ = (in & 0xFF000000);
+
+#else
+
+    *pDst++ = (in & 0xFF000000);
+    *pDst++ = (__ROR(in, 24)) & 0xFF000000;
+    *pDst++ = (__ROR(in, 16)) & 0xFF000000;
+    *pDst++ = (__ROR(in, 8)) & 0xFF000000;
+
+#endif //              #ifndef ARM_MATH_BIG_ENDIAN
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (q31_t) A << 24 */
+    /* convert from q7 to q31 and then store the results in the destination buffer */
+    *pDst++ = (q31_t) * pIn++ << 24;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+}
+
+/**    
+ * @} end of q7_to_x group    
+ */