aboutsummaryrefslogtreecommitdiff
path: root/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions
diff options
context:
space:
mode:
authorLorenz Meier <lm@inf.ethz.ch>2013-04-28 09:54:11 +0200
committerLorenz Meier <lm@inf.ethz.ch>2013-04-28 09:54:11 +0200
commit13fc6703862862f4263d8d5d085b7a16b87190e1 (patch)
tree47f3a17cb6f38b1aafe22e1cdef085cd73cd3a1d /src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions
parentf57439b90e23de260259dec051d3e2ead2d61c8c (diff)
downloadpx4-firmware-13fc6703862862f4263d8d5d085b7a16b87190e1.tar.gz
px4-firmware-13fc6703862862f4263d8d5d085b7a16b87190e1.tar.bz2
px4-firmware-13fc6703862862f4263d8d5d085b7a16b87190e1.zip
Moved last libs, drivers and headers, cleaned up IO build
Diffstat (limited to 'src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions')
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c178
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c168
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c169
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c169
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c131
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c125
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c128
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c125
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c175
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c169
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c168
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c170
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c138
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c144
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c135
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c133
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c133
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c153
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c146
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c188
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c197
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c184
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c184
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c180
-rw-r--r--src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c170
25 files changed, 3960 insertions, 0 deletions
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
new file mode 100644
index 000000000..afd964885
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c
@@ -0,0 +1,178 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_max_f32.c
+*
+* Description: Maximum value of a floating-point vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup Max Maximum
+ *
+ * Computes the maximum value of an array of data.
+ * The function returns both the maximum value and its position within the array.
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1u;
+ }
+
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2u;
+ }
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ float32_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+
+ /* Decrement the loop counter */
+ blkCnt--;
+
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
new file mode 100644
index 000000000..939b5236f
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c
@@ -0,0 +1,168 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_max_q15.c
+*
+* Description: Maximum value of a Q15 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q15_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1u;
+ }
+
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2u;
+ }
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q15_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ blkCnt = (blockSize - 1u);
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+ /* Decrement the loop counter */
+ blkCnt--;
+
+ }
+
+ /* Store the maximum value and its index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
new file mode 100644
index 000000000..18e757297
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_max_q31.c
+*
+* Description: Maximum value of a Q31 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1u;
+ }
+
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2u;
+ }
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q31_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+
+ }
+
+ /* Store the maximum value and its index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
new file mode 100644
index 000000000..110a8fe5f
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_max_q7.c
+*
+* Description: Maximum value of a Q7 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q7_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1u;
+ }
+
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2u;
+ }
+
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the maximum value */
+ if(out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q7_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if(out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+ /* Decrement the loop counter */
+ blkCnt--;
+
+ }
+
+ /* Store the maximum value and its index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
new file mode 100644
index 000000000..7823d746c
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_mean_f32.c
+*
+* Description: Mean value of a floating-point vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup mean Mean
+ *
+ * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+
+/**
+ * @brief Mean value of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ */
+
+
+void arm_mean_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ float32_t in1, in2, in3, in4;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in1 = *pSrc++;
+ in2 = *pSrc++;
+ in3 = *pSrc++;
+ in4 = *pSrc++;
+
+ sum += in1;
+ sum += in2;
+ sum += in3;
+ sum += in4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = sum / (float32_t) blockSize;
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
new file mode 100644
index 000000000..49ec1e62e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_mean_q15.c
+*
+* Description: Mean value of a Q15 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+/**
+ * @brief Mean value of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function is implemented using a 32-bit internal accumulator.
+ * The input is represented in 1.15 format and is accumulated in a 32-bit
+ * accumulator in 17.15 format.
+ * There is no risk of internal overflow with this approach, and the
+ * full precision of intermediate result is preserved.
+ * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.
+ *
+ */
+
+
+void arm_mean_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q31_t sum = 0; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t in;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16) >> 16);
+ sum += (in >> 16);
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16) >> 16);
+ sum += (in >> 16);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = (q15_t) (sum / blockSize);
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
new file mode 100644
index 000000000..88a5de299
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_mean_q31.c
+*
+* Description: Mean value of a Q31 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+/**
+ * @brief Mean value of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *\par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.31 format and is accumulated in a 64-bit
+ * accumulator in 33.31 format.
+ * There is no risk of internal overflow with this approach, and the
+ * full precision of intermediate result is preserved.
+ * Finally, the accumulator is truncated to yield a result of 1.31 format.
+ *
+ */
+
+
+void arm_mean_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t in1, in2, in3, in4;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in1 = *pSrc++;
+ in2 = *pSrc++;
+ in3 = *pSrc++;
+ in4 = *pSrc++;
+
+ sum += in1;
+ sum += in2;
+ sum += in3;
+ sum += in4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = (q31_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
new file mode 100644
index 000000000..a34c011bc
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_mean_q7.c
+*
+* Description: Mean value of a Q7 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+/**
+ * @brief Mean value of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function is implemented using a 32-bit internal accumulator.
+ * The input is represented in 1.7 format and is accumulated in a 32-bit
+ * accumulator in 25.7 format.
+ * There is no risk of internal overflow with this approach, and the
+ * full precision of intermediate result is preserved.
+ * Finally, the accumulator is truncated to yield a result of 1.7 format.
+ *
+ */
+
+
+void arm_mean_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult)
+{
+ q31_t sum = 0; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t in;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = *__SIMD32(pSrc)++;
+
+ sum += ((in << 24) >> 24);
+ sum += ((in << 16) >> 24);
+ sum += ((in << 8) >> 24);
+ sum += (in >> 24);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = (q7_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
new file mode 100644
index 000000000..dc2fdf624
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c
@@ -0,0 +1,175 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_min_f32.c
+*
+* Description: Minimum value of a floating-point vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup Min Minimum
+ *
+ * Computes the minimum value of an array of data.
+ * The function returns both the minimum value and its position within the array.
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ *
+ */
+
+void arm_min_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ float32_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1u;
+ }
+
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2u;
+ }
+
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u ) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ float32_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1u);
+
+#endif // #ifndef ARM_MATH_CM0
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ blkCnt--;
+
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
new file mode 100644
index 000000000..35b67bb1e
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_min_q15.c
+*
+* Description: Minimum value of a Q15 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ *
+ */
+
+void arm_min_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q15_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1u;
+ }
+
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2u;
+ }
+
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u ) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q15_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ blkCnt = (blockSize - 1u);
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#endif // #ifndef ARM_MATH_CM0
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ blkCnt--;
+
+ }
+
+
+
+ /* Store the minimum value and its index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
new file mode 100644
index 000000000..4a9befc75
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c
@@ -0,0 +1,168 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_min_q31.c
+*
+* Description: Minimum value of a Q31 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ *
+ */
+
+void arm_min_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1u;
+ }
+
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2u;
+ }
+
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u ) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q31_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ blkCnt = (blockSize - 1u);
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+#endif // #ifndef ARM_MATH_CM0
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ blkCnt--;
+
+ }
+
+ /* Store the minimum value and its index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
new file mode 100644
index 000000000..aaf8ad9ff
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_min_q7.c
+*
+* Description: Minimum value of a Q7 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ *
+ */
+
+void arm_min_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q7_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0u;
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1u) >> 2u;
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1u;
+ }
+
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2u;
+ }
+
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3u;
+ }
+
+ /* compare for the minimum value */
+ if(out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4u;
+ }
+
+ count += 4u;
+
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1u ) is not multiple of 4 */
+ blkCnt = (blockSize - 1u) % 4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ q7_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0u;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1u);
+
+#endif // #ifndef ARM_MATH_CM0
+
+ while(blkCnt > 0)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if(out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ blkCnt--;
+
+ }
+
+ /* Store the minimum value and its index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+
+
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
new file mode 100644
index 000000000..5ee0060d8
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c
@@ -0,0 +1,138 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_power_f32.c
+*
+* Description: Sum of the squares of the elements of a floating-point vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+*
+* Version 0.0.7 2010/06/10
+* Misra-C changes done
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup power Power
+ *
+ * Calculates the sum of the squares of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+
+/**
+ * @brief Sum of the squares of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ */
+
+
+void arm_power_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the result to the destination */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
new file mode 100644
index 000000000..aed06a17a
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c
@@ -0,0 +1,144 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_power_q15.c
+*
+* Description: Sum of the squares of the elements of a Q15 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+/**
+ * @brief Sum of the squares of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the return result is in 34.30 format.
+ *
+ */
+
+void arm_power_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ q63_t sum = 0; /* Temporary result storage */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in32; /* Temporary variable to store input value */
+ q15_t in16; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+
+ /* loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in32 = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in32, in32, sum);
+ in32 = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in32, in32, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in16 = *pSrc++;
+ sum = __SMLALD(in16, in16, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ q15_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ /* Store the results in 34.30 format */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
new file mode 100644
index 000000000..43f03f757
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c
@@ -0,0 +1,135 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_power_q31.c
+*
+* Description: Sum of the squares of the elements of a Q31 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+/**
+ * @brief Sum of the squares of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.31 format.
+ * Intermediate multiplication yields a 2.62 format, and this
+ * result is truncated to 2.48 format by discarding the lower 14 bits.
+ * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
+ * With 15 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the return result is in 16.48 format.
+ *
+ */
+
+void arm_power_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ q63_t sum = 0; /* Temporary result storage */
+ q31_t in;
+ uint32_t blkCnt; /* loop counter */
+
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14u;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14u;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14u;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the results in 16.48 format */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
new file mode 100644
index 000000000..5cb99b4c2
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_power_q7.c
+*
+* Description: Sum of the squares of the elements of a Q7 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+/**
+ * @brief Sum of the squares of the elements of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 32-bit internal accumulator.
+ * The input is represented in 1.7 format.
+ * Intermediate multiplication yields a 2.14 format, and this
+ * result is added without saturation to an accumulator in 18.14 format.
+ * With 17 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the return result is in 18.14 format.
+ *
+ */
+
+void arm_power_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q31_t sum = 0; /* Temporary result storage */
+ q7_t in; /* Temporary variable to store input */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t input1; /* Temporary variable to store packed input */
+ q31_t in1, in2; /* Temporary variables to store input */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* Reading two inputs of pSrc vector and packing */
+ input1 = *__SIMD32(pSrc)++;
+
+ in1 = __SXTB16(__ROR(input1, 8));
+ in2 = __SXTB16(input1);
+
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* calculate power and accumulate to accumulator */
+ sum = __SMLAD(in1, in1, sum);
+ sum = __SMLAD(in2, in2, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the result in 18.14 format */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
new file mode 100644
index 000000000..573896ed4
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c
@@ -0,0 +1,133 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_rms_f32.c
+*
+* Description: Root mean square value of an array of F32 type
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup RMS Root mean square (RMS)
+ *
+ *
+ * Calculates the Root Mean Sqaure of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ * @addtogroup RMS
+ * @{
+ */
+
+
+/**
+ * @brief Root Mean Square of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult rms value returned here
+ * @return none.
+ *
+ */
+
+void arm_rms_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Accumulator */
+ float32_t in; /* Tempoprary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /* loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the result in a temporary variable, sum */
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Rms and store the result in the destination */
+ arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+
+/**
+ * @} end of RMS group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
new file mode 100644
index 000000000..491c652e7
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c
@@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_rms_q15.c
+*
+* Description: Root Mean Square of the elements of a Q15 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @addtogroup RMS
+ * @{
+ */
+
+/**
+ * @brief Root Mean Square of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult rms value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ * 15 bits, and then saturated to yield a result in 1.15 format.
+ *
+ */
+
+void arm_rms_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q63_t sum = 0; /* accumulator */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in; /* temporary variable to store the input value */
+ q15_t in1; /* temporary variable to store the input value */
+ uint32_t blkCnt; /* loop counter */
+
+ /* loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in, in, sum);
+ in = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in, in, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in1 = *pSrc++;
+ sum = __SMLALD(in1, in1, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Truncating and saturating the accumulator to 1.15 format */
+ sum = __SSAT((q31_t) (sum >> 15), 16);
+
+ in1 = (q15_t) (sum / blockSize);
+
+ /* Store the result in the destination */
+ arm_sqrt_q15(in1, pResult);
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ q15_t in; /* temporary variable to store the input value */
+ uint32_t blkCnt; /* loop counter */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Truncating and saturating the accumulator to 1.15 format */
+ sum = __SSAT((q31_t) (sum >> 15), 16);
+
+ in = (q15_t) (sum / blockSize);
+
+ /* Store the result in the destination */
+ arm_sqrt_q15(in, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**
+ * @} end of RMS group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
new file mode 100644
index 000000000..54038eb12
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c
@@ -0,0 +1,146 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_rms_q31.c
+*
+* Description: Root Mean Square of the elements of a Q31 vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @addtogroup RMS
+ * @{
+ */
+
+
+/**
+ * @brief Root Mean Square of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult rms value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ *\par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The input is represented in 1.31 format, and intermediate multiplication
+ * yields a 2.62 format.
+ * The accumulator maintains full precision of the intermediate multiplication results,
+ * but provides only a single guard bit.
+ * There is no saturation on intermediate additions.
+ * If the accumulator overflows, it wraps around and distorts the result.
+ * In order to avoid overflows completely, the input signal must be scaled down by
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
+ *
+ */
+
+void arm_rms_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* accumulator */
+ q31_t in; /* Temporary variable to store the input */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in1, in2, in3, in4; /* Temporary input variables */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 8 outputs at a time.
+ ** a second loop below computes the remaining 1 to 7 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the result in a temporary variable, sum */
+ /* read two samples from source buffer */
+ in1 = pSrc[0];
+ in2 = pSrc[1];
+
+ /* calculate power and accumulate to accumulator */
+ sum += (q63_t) in1 *in1;
+ sum += (q63_t) in2 *in2;
+
+ /* read two samples from source buffer */
+ in3 = pSrc[2];
+ in4 = pSrc[3];
+
+ /* calculate power and accumulate to accumulator */
+ sum += (q63_t) in3 *in3;
+ sum += (q63_t) in4 *in4;
+
+
+ /* update source buffer to process next samples */
+ pSrc += 4u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 8, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *pSrc++;
+ sum += (q63_t) in *in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
+
+ sum = __SSAT(sum >> 31, 31);
+
+
+ /* Compute Rms and store the result in the destination vector */
+ arm_sqrt_q31((q31_t) ((q31_t) sum / (int32_t) blockSize), pResult);
+}
+
+/**
+ * @} end of RMS group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
new file mode 100644
index 000000000..6442d5c29
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c
@@ -0,0 +1,188 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_std_f32.c
+*
+* Description: Standard deviation of the elements of a floating-point vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup STD Standard deviation
+ *
+ * Calculates the standard deviation of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
+ *
+ * where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+ *
+ * sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ * @addtogroup STD
+ * @{
+ */
+
+
+/**
+ * @brief Standard deviation of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult standard deviation value returned here
+ * @return none.
+ *
+ */
+
+
+void arm_std_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t sumOfSquares = 0.0f; /* Sum of squares */
+ float32_t in; /* input value */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ float32_t meanOfSquares, mean, squareOfMean;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+ /* Compute mean of all input values */
+ mean = sum / (float32_t) blockSize;
+
+ /* Compute square of mean */
+ squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+ ((float32_t) blockSize - 1.0f));
+
+ /* Compute standard deviation and then store the result to the destination */
+ arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ float32_t squareOfSum; /* Square of Sum */
+ float32_t var; /* Temporary varaince storage */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += in * in;
+
+ /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+ /* Compute Sum of the input samples
+ * and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute the square of sum */
+ squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+ /* Compute the variance */
+ var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+ /* Compute standard deviation and then store the result to the destination */
+ arm_sqrt_f32(var, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**
+ * @} end of STD group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
new file mode 100644
index 000000000..2d1a49a50
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c
@@ -0,0 +1,197 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_std_q15.c
+*
+* Description: Standard deviation of an array of Q15 type.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup STD
+ * @{
+ */
+
+/**
+ * @brief Standard deviation of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult standard deviation value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ * 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_std_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ q15_t mean; /* mean */
+ uint32_t blkCnt; /* loop counter */
+ q15_t t; /* Temporary variable */
+ q63_t sumOfSquares = 0; /* Accumulator */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in; /* input value */
+ q15_t in1; /* input value */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16) >> 16);
+ sum += (in >> 16);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16) >> 16);
+ sum += (in >> 16);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in1 = *pSrc++;
+ sumOfSquares = __SMLALD(in1, in1, sumOfSquares);
+ sum += in1;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
+ sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+
+ meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+ /* Compute mean of all input values */
+ t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
+ mean = (q15_t) __SSAT(sum, 16u);
+
+ /* Compute square of mean */
+ squareOfMean = ((q31_t) mean * mean) >> 15;
+ squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+ /* mean of the squares minus the square of the mean. */
+ in1 = (q15_t) (meanOfSquares - squareOfMean);
+
+ /* Compute standard deviation and store the result to the destination */
+ arm_sqrt_q15(in1, pResult);
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q15_t in; /* input value */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL);
+ sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+ meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+ /* Compute mean of all input values */
+ mean = (q15_t) __SSAT(sum, 16u);
+
+ /* Compute square of mean of the input samples
+ * and then store the result in a temporary variable, squareOfMean.*/
+ t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL);
+ squareOfMean = ((q31_t) mean * mean) >> 15;
+ squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+ /* mean of the squares minus the square of the mean. */
+ in = (q15_t) (meanOfSquares - squareOfMean);
+
+ /* Compute standard deviation and store the result to the destination */
+ arm_sqrt_q15(in, pResult);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+
+}
+
+/**
+ * @} end of STD group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
new file mode 100644
index 000000000..45bec01a1
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c
@@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_std_q31.c
+*
+* Description: Standard deviation of an array of Q31 type.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup STD
+ * @{
+ */
+
+
+/**
+ * @brief Standard deviation of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult standard deviation value returned here
+ * @return none.
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ *\par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The input is represented in 1.31 format, and intermediate multiplication
+ * yields a 2.62 format.
+ * The accumulator maintains full precision of the intermediate multiplication results,
+ * but provides only a single guard bit.
+ * There is no saturation on intermediate additions.
+ * If the accumulator overflows it wraps around and distorts the result.
+ * In order to avoid overflows completely the input signal must be scaled down by
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
+ *
+ */
+
+
+void arm_std_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ q31_t mean; /* mean */
+ q31_t in; /* input value */
+ q31_t t; /* Temporary variable */
+ uint32_t blkCnt; /* loop counter */
+ q63_t sumOfSquares = 0; /* Accumulator */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ sumOfSquares = (sumOfSquares >> 31);
+ meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+ sumOfSquares = (sumOfSquares >> 31);
+ meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ /* Compute mean of all input values */
+ t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
+ mean = (q31_t) (sum);
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
+ squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
+
+
+ /* Compute standard deviation and then store the result to the destination */
+ arm_sqrt_q31(meanOfSquares - squareOfMean, pResult);
+
+}
+
+/**
+ * @} end of STD group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
new file mode 100644
index 000000000..2adcfb443
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c
@@ -0,0 +1,184 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_var_f32.c
+*
+* Description: Variance of the elements of a floating-point vector.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* ---------------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup variance Variance
+ *
+ * Calculates the variance of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = (sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)
+ *
+ * where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+ *
+ * sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ * @addtogroup variance
+ * @{
+ */
+
+
+/**
+ * @brief Variance of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult variance value returned here
+ * @return none.
+ *
+ */
+
+
+void arm_var_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t sumOfSquares = 0.0f; /* Sum of squares */
+ float32_t in; /* input value */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+ /* Compute mean of all input values */
+ mean = sum / (float32_t) blockSize;
+
+ /* Compute square of mean */
+ squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+ ((float32_t) blockSize - 1.0f));
+
+ /* Compute variance and then store the result to the destination */
+ *pResult = meanOfSquares - squareOfMean;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ float32_t squareOfSum; /* Square of Sum */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += in * in;
+
+ /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+ /* Compute Sum of the input samples
+ * and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute the square of sum */
+ squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+ /* Compute the variance */
+ *pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+}
+
+/**
+ * @} end of variance group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
new file mode 100644
index 000000000..08dbc6587
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c
@@ -0,0 +1,180 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_var_q15.c
+*
+* Description: Variance of an array of Q15 type.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup variance
+ * @{
+ */
+
+/**
+ * @brief Variance of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult variance value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ * 15 bits, and then saturated to yield a result in 1.15 format.
+ *
+ */
+
+
+void arm_var_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* Mean of square and square of mean */
+ q15_t mean; /* mean */
+ uint32_t blkCnt; /* loop counter */
+ q15_t t; /* Temporary variable */
+ q63_t sumOfSquares = 0; /* Accumulator */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in; /* Input variable */
+ q15_t in1; /* Temporary variable */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16) >> 16);
+ sum += (in >> 16);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16) >> 16);
+ sum += (in >> 16);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in1 = *pSrc++;
+ sum += in1;
+ sumOfSquares = __SMLALD(in1, in1, sumOfSquares);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
+ sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+
+ meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ q15_t in; /* Temporary variable */
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384);
+ sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u);
+ meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u);
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ /* Compute mean of all input values */
+ t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768);
+ mean = __SSAT(sum, 16u);
+
+ /* Compute square of mean */
+ squareOfMean = ((q31_t) mean * mean) >> 15;
+ squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15);
+
+ /* Compute variance and then store the result to the destination */
+ *pResult = (meanOfSquares - squareOfMean);
+
+}
+
+/**
+ * @} end of variance group
+ */
diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c
new file mode 100644
index 000000000..13d6c15cb
--- /dev/null
+++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010 ARM Limited. All rights reserved.
+*
+* $Date: 15. February 2012
+* $Revision: V1.1.0
+*
+* Project: CMSIS DSP Library
+* Title: arm_var_q31.c
+*
+* Description: Variance of an array of Q31 type.
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Version 1.1.0 2012/02/15
+* Updated with more optimizations, bug fixes and minor API changes.
+*
+* Version 1.0.10 2011/7/15
+* Big Endian support added and Merged M0 and M3/M4 Source code.
+*
+* Version 1.0.3 2010/11/29
+* Re-organized the CMSIS folders and updated documentation.
+*
+* Version 1.0.2 2010/11/11
+* Documentation updated.
+*
+* Version 1.0.1 2010/10/05
+* Production release and review comments incorporated.
+*
+* Version 1.0.0 2010/09/20
+* Production release and review comments incorporated.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup variance
+ * @{
+ */
+
+/**
+ * @brief Variance of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult variance value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ *\par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The input is represented in 1.31 format, and intermediate multiplication
+ * yields a 2.62 format.
+ * The accumulator maintains full precision of the intermediate multiplication results,
+ * but provides only a single guard bit.
+ * There is no saturation on intermediate additions.
+ * If the accumulator overflows it wraps around and distorts the result.
+ * In order to avoid overflows completely the input signal must be scaled down by
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
+ *
+ */
+
+
+void arm_var_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ q63_t sum = 0, sumSquare = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ q31_t mean; /* mean */
+ q31_t in; /* input value */
+ q31_t t; /* Temporary variable */
+ uint32_t blkCnt; /* loop counter */
+
+#ifndef ARM_MATH_CM0
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q63_t sumSquare1 = 0; /* Accumulator */
+ q31_t in1, in2, in3, in4; /* Temporary input variables */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2u;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ /* read input samples from source buffer */
+ in1 = pSrc[0];
+ in2 = pSrc[1];
+
+ /* calculate sum of inputs */
+ sum += in1;
+ /* calculate sum of squares */
+ sumSquare += ((q63_t) (in1) * (in1));
+ in3 = pSrc[2];
+ sum += in2;
+ sumSquare1 += ((q63_t) (in2) * (in2));
+ in4 = pSrc[3];
+ sum += in3;
+ sumSquare += ((q63_t) (in3) * (in3));
+ sum += in4;
+ sumSquare1 += ((q63_t) (in4) * (in4));
+
+ /* update input pointer to process next samples */
+ pSrc += 4u;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* add two accumulators */
+ sumSquare = sumSquare + sumSquare1;
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4u;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = blockSize;
+
+#endif /* #ifndef ARM_MATH_CM0 */
+
+ while(blkCnt > 0u)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sumSquare += ((q63_t) (in) * (in));
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f);
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ sumSquare = (sumSquare >> 31);
+ meanOfSquares = (q31_t) ((sumSquare * t) >> 30);
+
+ /* Compute mean of all input values */
+ t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f);
+ mean = (q31_t) (sum);
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31);
+ squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31);
+
+ /* Compute variance and then store the result to the destination */
+ *pResult = (q63_t) meanOfSquares - squareOfMean;
+
+}
+
+/**
+ * @} end of variance group
+ */