From 13fc6703862862f4263d8d5d085b7a16b87190e1 Mon Sep 17 00:00:00 2001
From: Lorenz Meier <lm@inf.ethz.ch>
Date: Sun, 28 Apr 2013 09:54:11 +0200
Subject: Moved last libs, drivers and headers, cleaned up IO build

---
 .../Source/StatisticsFunctions/arm_max_f32.c       | 178 +++++++++++++++++++
 .../Source/StatisticsFunctions/arm_max_q15.c       | 168 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_max_q31.c       | 169 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_max_q7.c        | 169 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_mean_f32.c      | 131 ++++++++++++++
 .../Source/StatisticsFunctions/arm_mean_q15.c      | 125 +++++++++++++
 .../Source/StatisticsFunctions/arm_mean_q31.c      | 128 +++++++++++++
 .../Source/StatisticsFunctions/arm_mean_q7.c       | 125 +++++++++++++
 .../Source/StatisticsFunctions/arm_min_f32.c       | 175 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_min_q15.c       | 169 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_min_q31.c       | 168 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_min_q7.c        | 170 ++++++++++++++++++
 .../Source/StatisticsFunctions/arm_power_f32.c     | 138 +++++++++++++++
 .../Source/StatisticsFunctions/arm_power_q15.c     | 144 +++++++++++++++
 .../Source/StatisticsFunctions/arm_power_q31.c     | 135 ++++++++++++++
 .../Source/StatisticsFunctions/arm_power_q7.c      | 133 ++++++++++++++
 .../Source/StatisticsFunctions/arm_rms_f32.c       | 133 ++++++++++++++
 .../Source/StatisticsFunctions/arm_rms_q15.c       | 153 ++++++++++++++++
 .../Source/StatisticsFunctions/arm_rms_q31.c       | 146 +++++++++++++++
 .../Source/StatisticsFunctions/arm_std_f32.c       | 188 ++++++++++++++++++++
 .../Source/StatisticsFunctions/arm_std_q15.c       | 197 +++++++++++++++++++++
 .../Source/StatisticsFunctions/arm_std_q31.c       | 184 +++++++++++++++++++
 .../Source/StatisticsFunctions/arm_var_f32.c       | 184 +++++++++++++++++++
 .../Source/StatisticsFunctions/arm_var_q15.c       | 180 +++++++++++++++++++
 .../Source/StatisticsFunctions/arm_var_q31.c       | 170 ++++++++++++++++++
 25 files changed, 3960 insertions(+)
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
 create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c

(limited to 'src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions')

diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
new file mode 100644
index 000000000..afd964885
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
@@ -0,0 +1,178 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_max_f32.c    
+*    
+* Description:	Maximum value of a floating-point vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup Max Maximum    
+ *    
+ * Computes the maximum value of an array of data.     
+ * The function returns both the maximum value and its position within the array.     
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.    
+ */
+
+/**    
+ * @addtogroup Max    
+ * @{    
+ */
+
+
+/**    
+ * @brief Maximum value of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult maximum value returned here    
+ * @param[out]      *pIndex index of maximum value returned here    
+ * @return none.    
+ */
+
+void arm_max_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  float32_t maxVal1, maxVal2, out;               /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 1u;
+    }
+
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 2u;
+    }
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  float32_t maxVal1, out;                        /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  blkCnt = (blockSize - 1u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal1;
+      outIndex = blockSize - blkCnt;
+    }
+
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  /* Store the maximum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**    
+ * @} end of Max group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
new file mode 100644
index 000000000..939b5236f
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
@@ -0,0 +1,168 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_max_q15.c    
+*    
+* Description:	Maximum value of a Q15 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup Max    
+ * @{    
+ */
+
+
+/**    
+ * @brief Maximum value of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult maximum value returned here    
+ * @param[out]      *pIndex index of maximum value returned here    
+ * @return none.    
+ */
+
+void arm_max_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q15_t maxVal1, maxVal2, out;                   /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 1u;
+    }
+
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 2u;
+    }
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  q15_t maxVal1, out;                            /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  blkCnt = (blockSize - 1u);
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal1;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**    
+ * @} end of Max group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
new file mode 100644
index 000000000..18e757297
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_max_q31.c    
+*    
+* Description:	Maximum value of a Q31 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup Max    
+ * @{    
+ */
+
+
+/**    
+ * @brief Maximum value of a Q31 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult maximum value returned here    
+ * @param[out]      *pIndex index of maximum value returned here    
+ * @return none.    
+ */
+
+void arm_max_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t maxVal1, maxVal2, out;                   /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 1u;
+    }
+
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 2u;
+    }
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  q31_t maxVal1, out;                            /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  blkCnt = (blockSize - 1u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal1;
+      outIndex = blockSize - blkCnt;
+    }
+
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**    
+ * @} end of Max group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
new file mode 100644
index 000000000..110a8fe5f
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_max_q7.c    
+*    
+* Description:	Maximum value of a Q7 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup Max    
+ * @{    
+ */
+
+
+/**    
+ * @brief Maximum value of a Q7 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult maximum value returned here    
+ * @param[out]      *pIndex index of maximum value returned here    
+  * @return none.    
+ */
+
+void arm_max_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q7_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q7_t maxVal1, maxVal2, out;                    /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 1u;
+    }
+
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 2u;
+    }
+
+    maxVal2 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the maximum value */
+    if(out < maxVal2)
+    {
+      /* Update the maximum value and its index */
+      out = maxVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  q7_t maxVal1, out;                             /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  blkCnt = (blockSize - 1u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* Initialize maxVal to the next consecutive values one by one */
+    maxVal1 = *pSrc++;
+
+    /* compare for the maximum value */
+    if(out < maxVal1)
+    {
+      /* Update the maximum value and it's index */
+      out = maxVal1;
+      outIndex = blockSize - blkCnt;
+    }
+    /* Decrement the loop counter */
+    blkCnt--;
+
+  }
+
+  /* Store the maximum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+
+}
+
+/**    
+ * @} end of Max group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
new file mode 100644
index 000000000..7823d746c
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_mean_f32.c    
+*    
+* Description:	Mean value of a floating-point vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup mean Mean    
+ *    
+ * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.    
+ * The underlying algorithm is used:    
+ *    
+ * <pre>    
+ * 	Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;    
+ * </pre>    
+ *    
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.    
+ */
+
+/**    
+ * @addtogroup mean    
+ * @{    
+ */
+
+
+/**    
+ * @brief Mean value of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult mean value returned here    
+ * @return none.    
+ */
+
+
+void arm_mean_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  float32_t in1, in2, in3, in4;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    in1 = *pSrc++;
+    in2 = *pSrc++;
+    in3 = *pSrc++;
+    in4 = *pSrc++;
+
+    sum += in1;
+    sum += in2;
+    sum += in3;
+    sum += in4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = sum / (float32_t) blockSize;
+}
+
+/**    
+ * @} end of mean group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
new file mode 100644
index 000000000..49ec1e62e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_mean_q15.c    
+*    
+* Description:	Mean value of a Q15 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup mean    
+ * @{    
+ */
+
+/**    
+ * @brief Mean value of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult mean value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ * \par    
+ * The function is implemented using a 32-bit internal accumulator.    
+ * The input is represented in 1.15 format and is accumulated in a 32-bit     
+ * accumulator in 17.15 format.     
+ * There is no risk of internal overflow with this approach, and the     
+ * full precision of intermediate result is preserved.     
+ * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.    
+ *    
+ */
+
+
+void arm_mean_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult)
+{
+  q31_t sum = 0;                                 /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    in = *__SIMD32(pSrc)++;
+    sum += ((in << 16) >> 16);
+    sum += (in >> 16);
+    in = *__SIMD32(pSrc)++;
+    sum += ((in << 16) >> 16);
+    sum += (in >> 16);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = (q15_t) (sum / blockSize);
+}
+
+/**    
+ * @} end of mean group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
new file mode 100644
index 000000000..88a5de299
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_mean_q31.c    
+*    
+* Description:	Mean value of a Q31 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup mean    
+ * @{    
+ */
+
+/**    
+ * @brief Mean value of a Q31 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult mean value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *\par    
+ * The function is implemented using a 64-bit internal accumulator.    
+ * The input is represented in 1.31 format and is accumulated in a 64-bit    
+ * accumulator in 33.31 format.    
+ * There is no risk of internal overflow with this approach, and the     
+ * full precision of intermediate result is preserved.     
+ * Finally, the accumulator is truncated to yield a result of 1.31 format.    
+ *    
+ */
+
+
+void arm_mean_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in1, in2, in3, in4;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    in1 = *pSrc++;
+    in2 = *pSrc++;
+    in3 = *pSrc++;
+    in4 = *pSrc++;
+
+    sum += in1;
+    sum += in2;
+    sum += in3;
+    sum += in4;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = (q31_t) (sum / (int32_t) blockSize);
+}
+
+/**    
+ * @} end of mean group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
new file mode 100644
index 000000000..a34c011bc
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_mean_q7.c    
+*    
+* Description:	Mean value of a Q7 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup mean    
+ * @{    
+ */
+
+/**    
+ * @brief Mean value of a Q7 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult mean value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ * \par    
+ * The function is implemented using a 32-bit internal accumulator.     
+ * The input is represented in 1.7 format and is accumulated in a 32-bit    
+ * accumulator in 25.7 format.    
+ * There is no risk of internal overflow with this approach, and the     
+ * full precision of intermediate result is preserved.     
+ * Finally, the accumulator is truncated to yield a result of 1.7 format.    
+ *    
+ */
+
+
+void arm_mean_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q7_t * pResult)
+{
+  q31_t sum = 0;                                 /* Temporary result storage */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t in;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    in = *__SIMD32(pSrc)++;
+
+    sum += ((in << 24) >> 24);
+    sum += ((in << 16) >> 24);
+    sum += ((in << 8) >> 24);
+    sum += (in >> 24);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    sum += *pSrc++;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize  */
+  /* Store the result to the destination */
+  *pResult = (q7_t) (sum / (int32_t) blockSize);
+}
+
+/**    
+ * @} end of mean group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
new file mode 100644
index 000000000..dc2fdf624
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
@@ -0,0 +1,175 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_min_f32.c    
+*    
+* Description:	Minimum value of a floating-point vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup Min Minimum    
+ *    
+ * Computes the minimum value of an array of data.     
+ * The function returns both the minimum value and its position within the array.     
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.    
+ */
+
+/**    
+ * @addtogroup Min    
+ * @{    
+ */
+
+
+/**    
+ * @brief Minimum value of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult minimum value returned here    
+ * @param[out]      *pIndex index of minimum value returned here    
+  * @return none.    
+ *    
+ */
+
+void arm_min_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  float32_t minVal1, minVal2, out;               /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 1u;
+    }
+
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 2u;
+    }
+
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u ) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  float32_t minVal1, out;                        /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  blkCnt = (blockSize - 1u);
+
+#endif //      #ifndef ARM_MATH_CM0
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal1;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  }
+
+  /* Store the minimum value and it's index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**    
+ * @} end of Min group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
new file mode 100644
index 000000000..35b67bb1e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_min_q15.c    
+*    
+* Description:	Minimum value of a Q15 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+
+/**    
+ * @addtogroup Min    
+ * @{    
+ */
+
+
+/**    
+ * @brief Minimum value of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult minimum value returned here    
+ * @param[out]      *pIndex index of minimum value returned here    
+ * @return none.    
+ *    
+ */
+
+void arm_min_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q15_t minVal1, minVal2, out;                   /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 1u;
+    }
+
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 2u;
+    }
+
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u ) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  q15_t minVal1, out;                            /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  blkCnt = (blockSize - 1u);
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#endif //      #ifndef ARM_MATH_CM0
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal1;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  }
+
+
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**    
+ * @} end of Min group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
new file mode 100644
index 000000000..4a9befc75
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
@@ -0,0 +1,168 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_min_q31.c    
+*    
+* Description:	Minimum value of a Q31 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+
+/**    
+ * @addtogroup Min    
+ * @{    
+ */
+
+
+/**    
+ * @brief Minimum value of a Q31 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult minimum value returned here    
+ * @param[out]      *pIndex index of minimum value returned here    
+ * @return none.    
+ *    
+ */
+
+void arm_min_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q31_t minVal1, minVal2, out;                   /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 1u;
+    }
+
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 2u;
+    }
+
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u ) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  q31_t minVal1, out;                            /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  blkCnt = (blockSize - 1u);
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+#endif //      #ifndef ARM_MATH_CM0
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal1;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  }
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+}
+
+/**    
+ * @} end of Min group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
new file mode 100644
index 000000000..aaf8ad9ff
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_min_q7.c    
+*    
+* Description:	Minimum value of a Q7 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup Min    
+ * @{    
+ */
+
+
+/**    
+ * @brief Minimum value of a Q7 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult minimum value returned here    
+ * @param[out]      *pIndex index of minimum value returned here    
+ * @return none.    
+ *    
+ */
+
+void arm_min_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q7_t * pResult,
+  uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q7_t minVal1, minVal2, out;                    /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex, count;              /* loop counter */
+
+  /* Initialise the count value. */
+  count = 0u;
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  /* Loop unrolling */
+  blkCnt = (blockSize - 1u) >> 2u;
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 1u;
+    }
+
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 2u;
+    }
+
+    minVal2 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and its index */
+      out = minVal1;
+      outIndex = count + 3u;
+    }
+
+    /* compare for the minimum value */
+    if(out > minVal2)
+    {
+      /* Update the minimum value and its index */
+      out = minVal2;
+      outIndex = count + 4u;
+    }
+
+    count += 4u;
+
+    blkCnt--;
+  }
+
+  /* if (blockSize - 1u ) is not multiple of 4 */
+  blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q7_t minVal1, out;                             /* Temporary variables to store the output value. */
+  uint32_t blkCnt, outIndex;                     /* loop counter */
+
+  /* Initialise the index value to zero. */
+  outIndex = 0u;
+  /* Load first input value that act as reference value for comparision */
+  out = *pSrc++;
+
+  blkCnt = (blockSize - 1u);
+
+#endif //      #ifndef ARM_MATH_CM0
+
+  while(blkCnt > 0)
+  {
+    /* Initialize minVal to the next consecutive values one by one */
+    minVal1 = *pSrc++;
+
+    /* compare for the minimum value */
+    if(out > minVal1)
+    {
+      /* Update the minimum value and it's index */
+      out = minVal1;
+      outIndex = blockSize - blkCnt;
+    }
+
+    blkCnt--;
+
+  }
+
+  /* Store the minimum value and its index into destination pointers */
+  *pResult = out;
+  *pIndex = outIndex;
+
+
+}
+
+/**    
+ * @} end of Min group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
new file mode 100644
index 000000000..5ee0060d8
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
@@ -0,0 +1,138 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_power_f32.c    
+*    
+* Description:	Sum of the squares of the elements of a floating-point vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+*    
+* Version 0.0.7  2010/06/10     
+*    Misra-C changes done    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup power Power    
+ *    
+ * Calculates the sum of the squares of the elements in the input vector.    
+ * The underlying algorithm is used:    
+ *    
+ * <pre>    
+ * 	Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];    
+ * </pre>    
+ *   
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.     
+ */
+
+/**    
+ * @addtogroup power    
+ * @{    
+ */
+
+
+/**    
+ * @brief Sum of the squares of the elements of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult sum of the squares value returned here    
+ * @return none.    
+ *    
+ */
+
+
+void arm_power_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* accumulator */
+  float32_t in;                                  /* Temporary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* compute power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the result to the destination */
+  *pResult = sum;
+}
+
+/**    
+ * @} end of power group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
new file mode 100644
index 000000000..aed06a17a
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
@@ -0,0 +1,144 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_power_q15.c    
+*    
+* Description:	Sum of the squares of the elements of a Q15 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup power    
+ * @{    
+ */
+
+/**    
+ * @brief Sum of the squares of the elements of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult sum of the squares value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ * \par    
+ * The function is implemented using a 64-bit internal accumulator.     
+ * The input is represented in 1.15 format.   
+ * Intermediate multiplication yields a 2.30 format, and this    
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.    
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the    
+ * full precision of the intermediate multiplication is preserved.    
+ * Finally, the return result is in 34.30 format.     
+ *    
+ */
+
+void arm_power_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q63_t * pResult)
+{
+  q63_t sum = 0;                                 /* Temporary result storage */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in32;                                    /* Temporary variable to store input value */
+  q15_t in16;                                    /* Temporary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+  /* loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in32 = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in32, in32, sum);
+    in32 = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in32, in32, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in16 = *pSrc++;
+    sum = __SMLALD(in16, in16, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q15_t in;                                      /* Temporary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q31_t) in * in);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Store the results in 34.30 format  */
+  *pResult = sum;
+}
+
+/**    
+ * @} end of power group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
new file mode 100644
index 000000000..43f03f757
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
@@ -0,0 +1,135 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_power_q31.c    
+*    
+* Description:	Sum of the squares of the elements of a Q31 vector.   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup power    
+ * @{    
+ */
+
+/**    
+ * @brief Sum of the squares of the elements of a Q31 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult sum of the squares value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ * \par    
+ * The function is implemented using a 64-bit internal accumulator.    
+ * The input is represented in 1.31 format.    
+ * Intermediate multiplication yields a 2.62 format, and this    
+ * result is truncated to 2.48 format by discarding the lower 14 bits.    
+ * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.    
+ * With 15 guard bits in the accumulator, there is no risk of overflow, and the    
+ * full precision of the intermediate multiplication is preserved.    
+ * Finally, the return result is in 16.48 format.     
+ *    
+ */
+
+void arm_power_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q63_t * pResult)
+{
+  q63_t sum = 0;                                 /* Temporary result storage */
+  q31_t in;
+  uint32_t blkCnt;                               /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q63_t) in * in) >> 14u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the results in 16.48 format  */
+  *pResult = sum;
+}
+
+/**    
+ * @} end of power group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
new file mode 100644
index 000000000..5cb99b4c2
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_power_q7.c    
+*    
+* Description:	Sum of the squares of the elements of a Q7 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup power    
+ * @{    
+ */
+
+/**    
+ * @brief Sum of the squares of the elements of a Q7 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult sum of the squares value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ * \par    
+ * The function is implemented using a 32-bit internal accumulator.     
+ * The input is represented in 1.7 format.   
+ * Intermediate multiplication yields a 2.14 format, and this    
+ * result is added without saturation to an accumulator in 18.14 format.    
+ * With 17 guard bits in the accumulator, there is no risk of overflow, and the    
+ * full precision of the intermediate multiplication is preserved.    
+ * Finally, the return result is in 18.14 format.     
+ *    
+ */
+
+void arm_power_q7(
+  q7_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q31_t sum = 0;                                 /* Temporary result storage */
+  q7_t in;                                       /* Temporary variable to store input */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t input1;                                  /* Temporary variable to store packed input */
+  q31_t in1, in2;                                /* Temporary variables to store input */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* Reading two inputs of pSrc vector and packing */
+    input1 = *__SIMD32(pSrc)++;
+
+    in1 = __SXTB16(__ROR(input1, 8));
+    in2 = __SXTB16(input1);
+
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* calculate power and accumulate to accumulator */
+    sum = __SMLAD(in1, in1, sum);
+    sum = __SMLAD(in2, in2, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute Power and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += ((q15_t) in * in);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Store the result in 18.14 format  */
+  *pResult = sum;
+}
+
+/**    
+ * @} end of power group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
new file mode 100644
index 000000000..573896ed4
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_rms_f32.c    
+*    
+* Description:	Root mean square value of an array of F32 type    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup RMS Root mean square (RMS)    
+ *    
+ *     
+ * Calculates the Root Mean Sqaure of the elements in the input vector.    
+ * The underlying algorithm is used:    
+ *    
+ * <pre>    
+ * 	Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));    
+ * </pre>    
+ *   
+ * There are separate functions for floating point, Q31, and Q15 data types.     
+ */
+
+/**    
+ * @addtogroup RMS    
+ * @{    
+ */
+
+
+/**    
+ * @brief Root Mean Square of the elements of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult rms value returned here    
+ * @return none.    
+ *    
+ */
+
+void arm_rms_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Accumulator */
+  float32_t in;                                  /* Tempoprary variable to store input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /* loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the result in a temporary variable, sum  */
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum  */
+    in = *pSrc++;
+    sum += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Rms and store the result in the destination */
+  arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+
+/**    
+ * @} end of RMS group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
new file mode 100644
index 000000000..491c652e7
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
@@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_rms_q15.c    
+*    
+* Description:	Root Mean Square of the elements of a Q15 vector.  
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @addtogroup RMS    
+ * @{    
+ */
+
+/**    
+ * @brief Root Mean Square of the elements of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult rms value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ * \par    
+ * The function is implemented using a 64-bit internal accumulator.    
+ * The input is represented in 1.15 format.    
+ * Intermediate multiplication yields a 2.30 format, and this    
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.    
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the    
+ * full precision of the intermediate multiplication is preserved.    
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower     
+ * 15 bits, and then saturated to yield a result in 1.15 format.    
+ *    
+ */
+
+void arm_rms_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult)
+{
+  q63_t sum = 0;                                 /* accumulator */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in;                                      /* temporary variable to store the input value */
+  q15_t in1;                                     /* temporary variable to store the input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+  /* loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+    in = *__SIMD32(pSrc)++;
+    sum = __SMLALD(in, in, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in1 = *pSrc++;
+    sum = __SMLALD(in1, in1, sum);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Truncating and saturating the accumulator to 1.15 format */
+  sum = __SSAT((q31_t) (sum >> 15), 16);
+
+  in1 = (q15_t) (sum / blockSize);
+
+  /* Store the result in the destination */
+  arm_sqrt_q15(in1, pResult);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q15_t in;                                      /* temporary variable to store the input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *pSrc++;
+    sum += ((q31_t) in * in);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Truncating and saturating the accumulator to 1.15 format */
+  sum = __SSAT((q31_t) (sum >> 15), 16);
+
+  in = (q15_t) (sum / blockSize);
+
+  /* Store the result in the destination */
+  arm_sqrt_q15(in, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**    
+ * @} end of RMS group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
new file mode 100644
index 000000000..54038eb12
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
@@ -0,0 +1,146 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_rms_q31.c    
+*    
+* Description:	Root Mean Square of the elements of a Q31 vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**        
+ * @addtogroup RMS        
+ * @{        
+ */
+
+
+/**        
+ * @brief Root Mean Square of the elements of a Q31 vector.        
+ * @param[in]       *pSrc points to the input vector        
+ * @param[in]       blockSize length of the input vector        
+ * @param[out]      *pResult rms value returned here        
+ * @return none.        
+ *        
+ * @details        
+ * <b>Scaling and Overflow Behavior:</b>        
+ *        
+ *\par        
+ * The function is implemented using an internal 64-bit accumulator.        
+ * The input is represented in 1.31 format, and intermediate multiplication        
+ * yields a 2.62 format.        
+ * The accumulator maintains full precision of the intermediate multiplication results,         
+ * but provides only a single guard bit.        
+ * There is no saturation on intermediate additions.        
+ * If the accumulator overflows, it wraps around and distorts the result.         
+ * In order to avoid overflows completely, the input signal must be scaled down by         
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.         
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.        
+ *        
+ */
+
+void arm_rms_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* accumulator */
+  q31_t in;                                      /* Temporary variable to store the input */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in1, in2, in3, in4;                      /* Temporary input variables */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.        
+   ** a second loop below computes the remaining 1 to 7 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the result in a temporary variable, sum */
+    /* read two samples from source buffer */
+    in1 = pSrc[0];
+    in2 = pSrc[1];
+
+    /* calculate power and accumulate to accumulator */
+    sum += (q63_t) in1 *in1;
+    sum += (q63_t) in2 *in2;
+
+    /* read two samples from source buffer */
+    in3 = pSrc[2];
+    in4 = pSrc[3];
+
+    /* calculate power and accumulate to accumulator */
+    sum += (q63_t) in3 *in3;
+    sum += (q63_t) in4 *in4;
+
+
+    /* update source buffer to process next samples */
+    pSrc += 4u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 8, compute any remaining output samples here.        
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+    /* Compute sum of the squares and then store the results in a temporary variable, sum */
+    in = *pSrc++;
+    sum += (q63_t) in *in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
+
+  sum = __SSAT(sum >> 31, 31);
+
+
+  /* Compute Rms and store the result in the destination vector */
+  arm_sqrt_q31((q31_t) ((q31_t) sum / (int32_t) blockSize), pResult);
+}
+
+/**        
+ * @} end of RMS group        
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
new file mode 100644
index 000000000..6442d5c29
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
@@ -0,0 +1,188 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_std_f32.c    
+*    
+* Description:	Standard deviation of the elements of a floating-point vector.  
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup STD Standard deviation    
+ *    
+ * Calculates the standard deviation of the elements in the input vector.     
+ * The underlying algorithm is used:    
+ *   
+ * <pre>    
+ * 	Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))   
+ *   
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]   
+ *   
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]   
+ * </pre>   
+ *    
+ * There are separate functions for floating point, Q31, and Q15 data types.    
+ */
+
+/**    
+ * @addtogroup STD    
+ * @{    
+ */
+
+
+/**    
+ * @brief Standard deviation of the elements of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult standard deviation value returned here    
+ * @return none.    
+ *    
+ */
+
+
+void arm_std_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  float32_t sumOfSquares = 0.0f;                 /* Sum of squares */
+  float32_t in;                                  /* input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  float32_t meanOfSquares, mean, squareOfMean;
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+  /* Compute mean of all input values */
+  mean = sum / (float32_t) blockSize;
+
+  /* Compute square of mean */
+  squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+                                  ((float32_t) blockSize - 1.0f));
+
+  /* Compute standard deviation and then store the result to the destination */
+  arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  float32_t squareOfSum;                         /* Square of Sum */
+  float32_t var;                                 /* Temporary varaince storage */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples     
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += in * in;
+
+    /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+    /* Compute Sum of the input samples     
+     * and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute the square of sum */
+  squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+  /* Compute the variance */
+  var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+  /* Compute standard deviation and then store the result to the destination */
+  arm_sqrt_f32(var, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**    
+ * @} end of STD group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
new file mode 100644
index 000000000..2d1a49a50
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
@@ -0,0 +1,197 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_std_q15.c    
+*    
+* Description:	Standard deviation of an array of Q15 type.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup STD    
+ * @{    
+ */
+
+/**    
+ * @brief Standard deviation of the elements of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult standard deviation value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ * \par    
+ * The function is implemented using a 64-bit internal accumulator.    
+ * The input is represented in 1.15 format.   
+ * Intermediate multiplication yields a 2.30 format, and this    
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.    
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the    
+ * full precision of the intermediate multiplication is preserved.    
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower     
+ * 15 bits, and then saturated to yield a result in 1.15 format.    
+ */
+
+void arm_std_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q15_t * pResult)
+{
+  q31_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */
+  q15_t mean;                                    /* mean */
+  uint32_t blkCnt;                               /* loop counter */
+  q15_t t;                                       /* Temporary variable */
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in;                                      /* input value */
+  q15_t in1;                                     /* input value */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *__SIMD32(pSrc)++;
+    sum += ((in << 16) >> 16);
+    sum += (in >> 16);
+    sumOfSquares = __SMLALD(in, in, sumOfSquares);
+    in = *__SIMD32(pSrc)++;
+    sum += ((in << 16) >> 16);
+    sum += (in >> 16);
+    sumOfSquares = __SMLALD(in, in, sumOfSquares);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in1 = *pSrc++;
+    sumOfSquares = __SMLALD(in1, in1, sumOfSquares);
+    sum += in1;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
+  sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+  /* Compute mean of all input values */
+  t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
+  mean = (q15_t) __SSAT(sum, 16u);
+
+  /* Compute square of mean */
+  squareOfMean = ((q31_t) mean * mean) >> 15;
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+  /* mean of the squares minus the square of the mean. */
+  in1 = (q15_t) (meanOfSquares - squareOfMean);
+
+  /* Compute standard deviation and store the result to the destination */
+  arm_sqrt_q15(in1, pResult);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  q15_t in;                                      /* input value */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples     
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += (in * in);
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples     
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
+  sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+  /* Compute mean of all input values */
+  mean = (q15_t) __SSAT(sum, 16u);
+
+  /* Compute square of mean of the input samples   
+   * and then store the result in a temporary variable, squareOfMean.*/
+  t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
+  squareOfMean = ((q31_t) mean * mean) >> 15;
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+  /* mean of the squares minus the square of the mean. */
+  in = (q15_t) (meanOfSquares - squareOfMean);
+
+  /* Compute standard deviation and store the result to the destination */
+  arm_sqrt_q15(in, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+}
+
+/**    
+ * @} end of STD group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
new file mode 100644
index 000000000..45bec01a1
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
@@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_std_q31.c    
+*    
+* Description:	Standard deviation of an array of Q31 type.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup STD    
+ * @{    
+ */
+
+
+/**    
+ * @brief Standard deviation of the elements of a Q31 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult standard deviation value returned here    
+ * @return none.    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ *\par    
+ * The function is implemented using an internal 64-bit accumulator.    
+ * The input is represented in 1.31 format, and intermediate multiplication    
+ * yields a 2.62 format.    
+ * The accumulator maintains full precision of the intermediate multiplication results,     
+ * but provides only a single guard bit.    
+ * There is no saturation on intermediate additions.    
+ * If the accumulator overflows it wraps around and distorts the result.    
+ * In order to avoid overflows completely the input signal must be scaled down by     
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.     
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.    
+ *    
+ */
+
+
+void arm_std_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q63_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */
+  q31_t mean;                                    /* mean */
+  q31_t in;                                      /* input value */
+  q31_t t;                                       /* Temporary variable */
+  uint32_t blkCnt;                               /* loop counter */
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += ((q63_t) (in) * (in));
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += ((q63_t) (in) * (in));
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += ((q63_t) (in) * (in));
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  sumOfSquares = (sumOfSquares >> 31);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples     
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += ((q63_t) (in) * (in));
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples     
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+  sumOfSquares = (sumOfSquares >> 31);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Compute mean of all input values */
+  t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
+  mean = (q31_t) (sum);
+
+  /* Compute square of mean */
+  squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
+
+
+  /* Compute standard deviation and then store the result to the destination */
+  arm_sqrt_q31(meanOfSquares - squareOfMean, pResult);
+
+}
+
+/**    
+ * @} end of STD group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
new file mode 100644
index 000000000..2adcfb443
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
@@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_var_f32.c    
+*    
+* Description:	Variance of the elements of a floating-point vector.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @defgroup variance  Variance    
+ *    
+ * Calculates the variance of the elements in the input vector.    
+ * The underlying algorithm is used:    
+ *    
+ * <pre>    
+ * 	Result = (sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)   
+ *   
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]   
+ *   
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]   
+ * </pre>   
+ *    
+ * There are separate functions for floating point, Q31, and Q15 data types.    
+ */
+
+/**    
+ * @addtogroup variance    
+ * @{    
+ */
+
+
+/**    
+ * @brief Variance of the elements of a floating-point vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult variance value returned here    
+ * @return none.    
+ *    
+ */
+
+
+void arm_var_f32(
+  float32_t * pSrc,
+  uint32_t blockSize,
+  float32_t * pResult)
+{
+
+  float32_t sum = 0.0f;                          /* Temporary result storage */
+  float32_t sumOfSquares = 0.0f;                 /* Sum of squares */
+  float32_t in;                                  /* input value */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  float32_t meanOfSquares, mean, squareOfMean;   /* Temporary variables */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sum += in;
+    sumOfSquares += in * in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+  /* Compute mean of all input values */
+  mean = sum / (float32_t) blockSize;
+
+  /* Compute square of mean */
+  squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+                                  ((float32_t) blockSize - 1.0f));
+
+  /* Compute variance and then store the result to the destination */
+  *pResult = meanOfSquares - squareOfMean;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  float32_t squareOfSum;                         /* Square of Sum */
+
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples     
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += in * in;
+
+    /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+    /* Compute Sum of the input samples     
+     * and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute the square of sum */
+  squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+  /* Compute the variance */
+  *pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**    
+ * @} end of variance group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
new file mode 100644
index 000000000..08dbc6587
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
@@ -0,0 +1,180 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_var_q15.c    
+*    
+* Description:	Variance of an array of Q15 type.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupStats    
+ */
+
+/**    
+ * @addtogroup variance    
+ * @{    
+ */
+
+/**    
+ * @brief Variance of the elements of a Q15 vector.    
+ * @param[in]       *pSrc points to the input vector    
+ * @param[in]       blockSize length of the input vector    
+ * @param[out]      *pResult variance value returned here    
+ * @return none.    
+ *    
+ * @details    
+ * <b>Scaling and Overflow Behavior:</b>    
+ *    
+ * \par    
+ * The function is implemented using a 64-bit internal accumulator.    
+ * The input is represented in 1.15 format.   
+ * Intermediate multiplication yields a 2.30 format, and this    
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.    
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the    
+ * full precision of the intermediate multiplication is preserved.    
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower     
+ * 15 bits, and then saturated to yield a result in 1.15 format.    
+ *    
+ */
+
+
+void arm_var_q15(
+  q15_t * pSrc,
+  uint32_t blockSize,
+  q31_t * pResult)
+{
+  q31_t sum = 0;                                 /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* Mean of square and square of mean */
+  q15_t mean;                                    /* mean */
+  uint32_t blkCnt;                               /* loop counter */
+  q15_t t;                                       /* Temporary variable */
+  q63_t sumOfSquares = 0;                        /* Accumulator */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  q31_t in;                                      /* Input variable */
+  q15_t in1;                                     /* Temporary variable */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in = *__SIMD32(pSrc)++;
+    sum += ((in << 16) >> 16);
+    sum += (in >> 16);
+    sumOfSquares = __SMLALD(in, in, sumOfSquares);
+    in = *__SIMD32(pSrc)++;
+    sum += ((in << 16) >> 16);
+    sum += (in >> 16);
+    sumOfSquares = __SMLALD(in, in, sumOfSquares);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples    
+     * and then store the result in a temporary variable, sum. */
+    in1 = *pSrc++;
+    sum += in1;
+    sumOfSquares = __SMLALD(in1, in1, sumOfSquares);
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples    
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
+  sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  q15_t in;                                      /* Temporary variable */
+  /* Loop over blockSize number of values */
+  blkCnt = blockSize;
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples     
+     * and then store the result in a temporary variable, sumOfSquares. */
+    in = *pSrc++;
+    sumOfSquares += (in * in);
+
+    /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+    /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* Compute Mean of squares of the input samples     
+   * and then store the result in a temporary variable, meanOfSquares. */
+  t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
+  sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+  meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  /* Compute mean of all input values */
+  t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768);
+  mean = __SSAT(sum, 16u);
+
+  /* Compute square of mean */
+  squareOfMean = ((q31_t) mean * mean) >> 15;
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+  /* Compute variance and then store the result to the destination */
+  *pResult = (meanOfSquares - squareOfMean);
+
+}
+
+/**    
+ * @} end of variance group    
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c
new file mode 100644
index 000000000..13d6c15cb
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010 ARM Limited. All rights reserved.    
+*    
+* $Date:        15. February 2012  
+* $Revision: 	V1.1.0  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:		arm_var_q31.c    
+*    
+* Description:	Variance of an array of Q31 type.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Version 1.1.0 2012/02/15 
+*    Updated with more optimizations, bug fixes and minor API changes.  
+*   
+* Version 1.0.10 2011/7/15  
+*    Big Endian support added and Merged M0 and M3/M4 Source code.   
+*    
+* Version 1.0.3 2010/11/29   
+*    Re-organized the CMSIS folders and updated documentation.    
+*     
+* Version 1.0.2 2010/11/11    
+*    Documentation updated.     
+*    
+* Version 1.0.1 2010/10/05     
+*    Production release and review comments incorporated.    
+*    
+* Version 1.0.0 2010/09/20     
+*    Production release and review comments incorporated.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**        
+ * @ingroup groupStats        
+ */
+
+/**        
+ * @addtogroup variance        
+ * @{        
+ */
+
+/**        
+ * @brief Variance of the elements of a Q31 vector.        
+ * @param[in]       *pSrc points to the input vector        
+ * @param[in]       blockSize length of the input vector        
+ * @param[out]      *pResult variance value returned here        
+ * @return none.        
+ *        
+ * @details        
+ * <b>Scaling and Overflow Behavior:</b>        
+ *        
+ *\par        
+ * The function is implemented using an internal 64-bit accumulator.        
+ * The input is represented in 1.31 format, and intermediate multiplication        
+ * yields a 2.62 format.        
+ * The accumulator maintains full precision of the intermediate multiplication results,         
+ * but provides only a single guard bit.        
+ * There is no saturation on intermediate additions.        
+ * If the accumulator overflows it wraps around and distorts the result.        
+ * In order to avoid overflows completely the input signal must be scaled down by         
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.         
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.        
+ *        
+ */
+
+
+void arm_var_q31(
+  q31_t * pSrc,
+  uint32_t blockSize,
+  q63_t * pResult)
+{
+  q63_t sum = 0, sumSquare = 0;                  /* Accumulator */
+  q31_t meanOfSquares, squareOfMean;             /* square of mean and mean of square */
+  q31_t mean;                                    /* mean */
+  q31_t in;                                      /* input value */
+  q31_t t;                                       /* Temporary variable */
+  uint32_t blkCnt;                               /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+  q63_t sumSquare1 = 0;                          /* Accumulator */
+  q31_t in1, in2, in3, in4;                      /* Temporary input variables */
+
+  /*loop Unrolling */
+  blkCnt = blockSize >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1])  */
+    /* Compute Sum of squares of the input samples        
+     * and then store the result in a temporary variable, sum. */
+    /* read input samples from source buffer */
+    in1 = pSrc[0];
+    in2 = pSrc[1];
+
+    /* calculate sum of inputs */
+    sum += in1;
+    /* calculate sum of squares */
+    sumSquare += ((q63_t) (in1) * (in1));
+    in3 = pSrc[2];
+    sum += in2;
+    sumSquare1 += ((q63_t) (in2) * (in2));
+    in4 = pSrc[3];
+    sum += in3;
+    sumSquare += ((q63_t) (in3) * (in3));
+    sum += in4;
+    sumSquare1 += ((q63_t) (in4) * (in4));
+
+    /* update input pointer to process next samples */
+    pSrc += 4u;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  /* add two accumulators */
+  sumSquare = sumSquare + sumSquare1;
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  blkCnt = blockSize % 0x4u;
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+  blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+  while(blkCnt > 0u)
+  {
+    /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+    /* Compute Sum of squares of the input samples        
+     * and then store the result in a temporary variable, sum. */
+    in = *pSrc++;
+    sumSquare += ((q63_t) (in) * (in));
+    sum += in;
+
+    /* Decrement the loop counter */
+    blkCnt--;
+  }
+
+  t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+
+  /* Compute Mean of squares of the input samples        
+   * and then store the result in a temporary variable, meanOfSquares. */
+  sumSquare = (sumSquare >> 31);
+  meanOfSquares = (q31_t) ((sumSquare * t) >> 30);
+
+  /* Compute mean of all input values */
+  t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
+  mean = (q31_t) (sum);
+
+  /* Compute square of mean */
+  squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
+  squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
+
+  /* Compute variance and then store the result to the destination */
+  *pResult = (q63_t) meanOfSquares - squareOfMean;
+
+}
+
+/**        
+ * @} end of variance group        
+ */
-- 
cgit v1.2.3