From 13fc6703862862f4263d8d5d085b7a16b87190e1 Mon Sep 17 00:00:00 2001 From: Lorenz Meier Date: Sun, 28 Apr 2013 09:54:11 +0200 Subject: Moved last libs, drivers and headers, cleaned up IO build --- .../Source/StatisticsFunctions/arm_max_f32.c | 178 +++++++++++++++++++ .../Source/StatisticsFunctions/arm_max_q15.c | 168 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_max_q31.c | 169 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_max_q7.c | 169 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_mean_f32.c | 131 ++++++++++++++ .../Source/StatisticsFunctions/arm_mean_q15.c | 125 +++++++++++++ .../Source/StatisticsFunctions/arm_mean_q31.c | 128 +++++++++++++ .../Source/StatisticsFunctions/arm_mean_q7.c | 125 +++++++++++++ .../Source/StatisticsFunctions/arm_min_f32.c | 175 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_min_q15.c | 169 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_min_q31.c | 168 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_min_q7.c | 170 ++++++++++++++++++ .../Source/StatisticsFunctions/arm_power_f32.c | 138 +++++++++++++++ .../Source/StatisticsFunctions/arm_power_q15.c | 144 +++++++++++++++ .../Source/StatisticsFunctions/arm_power_q31.c | 135 ++++++++++++++ .../Source/StatisticsFunctions/arm_power_q7.c | 133 ++++++++++++++ .../Source/StatisticsFunctions/arm_rms_f32.c | 133 ++++++++++++++ .../Source/StatisticsFunctions/arm_rms_q15.c | 153 ++++++++++++++++ .../Source/StatisticsFunctions/arm_rms_q31.c | 146 +++++++++++++++ .../Source/StatisticsFunctions/arm_std_f32.c | 188 ++++++++++++++++++++ .../Source/StatisticsFunctions/arm_std_q15.c | 197 +++++++++++++++++++++ .../Source/StatisticsFunctions/arm_std_q31.c | 184 +++++++++++++++++++ .../Source/StatisticsFunctions/arm_var_f32.c | 184 +++++++++++++++++++ .../Source/StatisticsFunctions/arm_var_q15.c | 180 +++++++++++++++++++ .../Source/StatisticsFunctions/arm_var_q31.c | 170 ++++++++++++++++++ 25 files changed, 3960 insertions(+) create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c create mode 100644 src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c (limited to 'src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions') diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c new file mode 100644 index 000000000..afd964885 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_f32.c @@ -0,0 +1,178 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_max_f32.c +* +* Description: Maximum value of a floating-point vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup Max Maximum + * + * Computes the maximum value of an array of data. + * The function returns both the maximum value and its position within the array. + * There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1u; + } + + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2u; + } + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3u; + } + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4u; + } + + count += 4u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1u) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + float32_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1u); + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + + /* Decrement the loop counter */ + blkCnt--; + + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c new file mode 100644 index 000000000..939b5236f --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q15.c @@ -0,0 +1,168 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_max_q15.c +* +* Description: Maximum value of a Q15 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q15_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1u; + } + + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2u; + } + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3u; + } + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4u; + } + + count += 4u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1u) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + q15_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + blkCnt = (blockSize - 1u); + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + /* Decrement the loop counter */ + blkCnt--; + + } + + /* Store the maximum value and its index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c new file mode 100644 index 000000000..18e757297 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q31.c @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_max_q31.c +* +* Description: Maximum value of a Q31 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1u; + } + + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2u; + } + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3u; + } + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4u; + } + + count += 4u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1u) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + q31_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1u); + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + + } + + /* Store the maximum value and its index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c new file mode 100644 index 000000000..110a8fe5f --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_max_q7.c @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_max_q7.c +* +* Description: Maximum value of a Q7 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_q7( + q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q7_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1u; + } + + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2u; + } + + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3u; + } + + /* compare for the maximum value */ + if(out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4u; + } + + count += 4u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1u) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + q7_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1u); + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if(out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + /* Decrement the loop counter */ + blkCnt--; + + } + + /* Store the maximum value and its index into destination pointers */ + *pResult = out; + *pIndex = outIndex; + +} + +/** + * @} end of Max group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c new file mode 100644 index 000000000..7823d746c --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_f32.c @@ -0,0 +1,131 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_mean_f32.c +* +* Description: Mean value of a floating-point vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup mean Mean + * + * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector. + * The underlying algorithm is used: + * + *
    
+ * 	Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;    
+ * 
+ * + * There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup mean + * @{ + */ + + +/** + * @brief Mean value of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + */ + + +void arm_mean_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + float32_t in1, in2, in3, in4; + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in1 = *pSrc++; + in2 = *pSrc++; + in3 = *pSrc++; + in4 = *pSrc++; + + sum += in1; + sum += in2; + sum += in3; + sum += in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = sum / (float32_t) blockSize; +} + +/** + * @} end of mean group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c new file mode 100644 index 000000000..49ec1e62e --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q15.c @@ -0,0 +1,125 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_mean_q15.c +* +* Description: Mean value of a Q15 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup mean + * @{ + */ + +/** + * @brief Mean value of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * \par + * The function is implemented using a 32-bit internal accumulator. + * The input is represented in 1.15 format and is accumulated in a 32-bit + * accumulator in 17.15 format. + * There is no risk of internal overflow with this approach, and the + * full precision of intermediate result is preserved. + * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format. + * + */ + + +void arm_mean_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q31_t sum = 0; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t in; + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in = *__SIMD32(pSrc)++; + sum += ((in << 16) >> 16); + sum += (in >> 16); + in = *__SIMD32(pSrc)++; + sum += ((in << 16) >> 16); + sum += (in >> 16); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q15_t) (sum / blockSize); +} + +/** + * @} end of mean group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c new file mode 100644 index 000000000..88a5de299 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q31.c @@ -0,0 +1,128 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_mean_q31.c +* +* Description: Mean value of a Q31 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup mean + * @{ + */ + +/** + * @brief Mean value of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + *\par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.31 format and is accumulated in a 64-bit + * accumulator in 33.31 format. + * There is no risk of internal overflow with this approach, and the + * full precision of intermediate result is preserved. + * Finally, the accumulator is truncated to yield a result of 1.31 format. + * + */ + + +void arm_mean_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t in1, in2, in3, in4; + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in1 = *pSrc++; + in2 = *pSrc++; + in3 = *pSrc++; + in4 = *pSrc++; + + sum += in1; + sum += in2; + sum += in3; + sum += in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q31_t) (sum / (int32_t) blockSize); +} + +/** + * @} end of mean group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c new file mode 100644 index 000000000..a34c011bc --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_mean_q7.c @@ -0,0 +1,125 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_mean_q7.c +* +* Description: Mean value of a Q7 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup mean + * @{ + */ + +/** + * @brief Mean value of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * \par + * The function is implemented using a 32-bit internal accumulator. + * The input is represented in 1.7 format and is accumulated in a 32-bit + * accumulator in 25.7 format. + * There is no risk of internal overflow with this approach, and the + * full precision of intermediate result is preserved. + * Finally, the accumulator is truncated to yield a result of 1.7 format. + * + */ + + +void arm_mean_q7( + q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q31_t sum = 0; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t in; + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in = *__SIMD32(pSrc)++; + + sum += ((in << 24) >> 24); + sum += ((in << 16) >> 24); + sum += ((in << 8) >> 24); + sum += (in >> 24); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q7_t) (sum / (int32_t) blockSize); +} + +/** + * @} end of mean group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c new file mode 100644 index 000000000..dc2fdf624 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_f32.c @@ -0,0 +1,175 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_min_f32.c +* +* Description: Minimum value of a floating-point vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup Min Minimum + * + * Computes the minimum value of an array of data. + * The function returns both the minimum value and its position within the array. + * There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + * + */ + +void arm_min_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + float32_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1u; + } + + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2u; + } + + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3u; + } + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4u; + } + + count += 4u; + + blkCnt--; + } + + /* if (blockSize - 1u ) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + float32_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1u); + +#endif // #ifndef ARM_MATH_CM0 + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + blkCnt--; + + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c new file mode 100644 index 000000000..35b67bb1e --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q15.c @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_min_q15.c +* +* Description: Minimum value of a Q15 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + * + */ + +void arm_min_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q15_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1u; + } + + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2u; + } + + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3u; + } + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4u; + } + + count += 4u; + + blkCnt--; + } + + /* if (blockSize - 1u ) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + q15_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + blkCnt = (blockSize - 1u); + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#endif // #ifndef ARM_MATH_CM0 + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + blkCnt--; + + } + + + + /* Store the minimum value and its index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c new file mode 100644 index 000000000..4a9befc75 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q31.c @@ -0,0 +1,168 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_min_q31.c +* +* Description: Minimum value of a Q31 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + * + */ + +void arm_min_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1u; + } + + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2u; + } + + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3u; + } + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4u; + } + + count += 4u; + + blkCnt--; + } + + /* if (blockSize - 1u ) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + q31_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + blkCnt = (blockSize - 1u); + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + +#endif // #ifndef ARM_MATH_CM0 + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + blkCnt--; + + } + + /* Store the minimum value and its index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c new file mode 100644 index 000000000..aaf8ad9ff --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_min_q7.c @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_min_q7.c +* +* Description: Minimum value of a Q7 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + * + */ + +void arm_min_q7( + q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q7_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0u; + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1u) >> 2u; + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1u; + } + + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2u; + } + + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3u; + } + + /* compare for the minimum value */ + if(out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4u; + } + + count += 4u; + + blkCnt--; + } + + /* if (blockSize - 1u ) is not multiple of 4 */ + blkCnt = (blockSize - 1u) % 4u; + +#else + + /* Run the below code for Cortex-M0 */ + + q7_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0u; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1u); + +#endif // #ifndef ARM_MATH_CM0 + + while(blkCnt > 0) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if(out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + blkCnt--; + + } + + /* Store the minimum value and its index into destination pointers */ + *pResult = out; + *pIndex = outIndex; + + +} + +/** + * @} end of Min group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c new file mode 100644 index 000000000..5ee0060d8 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_f32.c @@ -0,0 +1,138 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_power_f32.c +* +* Description: Sum of the squares of the elements of a floating-point vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* +* Version 0.0.7 2010/06/10 +* Misra-C changes done +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup power Power + * + * Calculates the sum of the squares of the elements in the input vector. + * The underlying algorithm is used: + * + *
    
+ * 	Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];    
+ * 
+ * + * There are separate functions for floating point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup power + * @{ + */ + + +/** + * @brief Sum of the squares of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + */ + + +void arm_power_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* accumulator */ + float32_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* compute power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the result to the destination */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c new file mode 100644 index 000000000..aed06a17a --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q15.c @@ -0,0 +1,144 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_power_q15.c +* +* Description: Sum of the squares of the elements of a Q15 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup power + * @{ + */ + +/** + * @brief Sum of the squares of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the return result is in 34.30 format. + * + */ + +void arm_power_q15( + q15_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + q63_t sum = 0; /* Temporary result storage */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in32; /* Temporary variable to store input value */ + q15_t in16; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + + /* loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in32 = *__SIMD32(pSrc)++; + sum = __SMLALD(in32, in32, sum); + in32 = *__SIMD32(pSrc)++; + sum = __SMLALD(in32, in32, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in16 = *pSrc++; + sum = __SMLALD(in16, in16, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + q15_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q31_t) in * in); + + /* Decrement the loop counter */ + blkCnt--; + } + +#endif /* #ifndef ARM_MATH_CM0 */ + + /* Store the results in 34.30 format */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c new file mode 100644 index 000000000..43f03f757 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q31.c @@ -0,0 +1,135 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_power_q31.c +* +* Description: Sum of the squares of the elements of a Q31 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup power + * @{ + */ + +/** + * @brief Sum of the squares of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.31 format. + * Intermediate multiplication yields a 2.62 format, and this + * result is truncated to 2.48 format by discarding the lower 14 bits. + * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format. + * With 15 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the return result is in 16.48 format. + * + */ + +void arm_power_q31( + q31_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + q63_t sum = 0; /* Temporary result storage */ + q31_t in; + uint32_t blkCnt; /* loop counter */ + + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 14u; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14u; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14u; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 14u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the results in 16.48 format */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c new file mode 100644 index 000000000..5cb99b4c2 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_power_q7.c @@ -0,0 +1,133 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_power_q7.c +* +* Description: Sum of the squares of the elements of a Q7 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup power + * @{ + */ + +/** + * @brief Sum of the squares of the elements of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + * \par + * The function is implemented using a 32-bit internal accumulator. + * The input is represented in 1.7 format. + * Intermediate multiplication yields a 2.14 format, and this + * result is added without saturation to an accumulator in 18.14 format. + * With 17 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the return result is in 18.14 format. + * + */ + +void arm_power_q7( + q7_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t sum = 0; /* Temporary result storage */ + q7_t in; /* Temporary variable to store input */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t input1; /* Temporary variable to store packed input */ + q31_t in1, in2; /* Temporary variables to store input */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* Reading two inputs of pSrc vector and packing */ + input1 = *__SIMD32(pSrc)++; + + in1 = __SXTB16(__ROR(input1, 8)); + in2 = __SXTB16(input1); + + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* calculate power and accumulate to accumulator */ + sum = __SMLAD(in1, in1, sum); + sum = __SMLAD(in2, in2, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q15_t) in * in); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the result in 18.14 format */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c new file mode 100644 index 000000000..573896ed4 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_f32.c @@ -0,0 +1,133 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_rms_f32.c +* +* Description: Root mean square value of an array of F32 type +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup RMS Root mean square (RMS) + * + * + * Calculates the Root Mean Sqaure of the elements in the input vector. + * The underlying algorithm is used: + * + *
    
+ * 	Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));    
+ * 
+ * + * There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + * @addtogroup RMS + * @{ + */ + + +/** + * @brief Root Mean Square of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult rms value returned here + * @return none. + * + */ + +void arm_rms_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Accumulator */ + float32_t in; /* Tempoprary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /* loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the result in a temporary variable, sum */ + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Rms and store the result in the destination */ + arm_sqrt_f32(sum / (float32_t) blockSize, pResult); +} + +/** + * @} end of RMS group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c new file mode 100644 index 000000000..491c652e7 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q15.c @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_rms_q15.c +* +* Description: Root Mean Square of the elements of a Q15 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @addtogroup RMS + * @{ + */ + +/** + * @brief Root Mean Square of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult rms value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + * 15 bits, and then saturated to yield a result in 1.15 format. + * + */ + +void arm_rms_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q63_t sum = 0; /* accumulator */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in; /* temporary variable to store the input value */ + q15_t in1; /* temporary variable to store the input value */ + uint32_t blkCnt; /* loop counter */ + + /* loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *__SIMD32(pSrc)++; + sum = __SMLALD(in, in, sum); + in = *__SIMD32(pSrc)++; + sum = __SMLALD(in, in, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in1 = *pSrc++; + sum = __SMLALD(in1, in1, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Truncating and saturating the accumulator to 1.15 format */ + sum = __SSAT((q31_t) (sum >> 15), 16); + + in1 = (q15_t) (sum / blockSize); + + /* Store the result in the destination */ + arm_sqrt_q15(in1, pResult); + +#else + + /* Run the below code for Cortex-M0 */ + + q15_t in; /* temporary variable to store the input value */ + uint32_t blkCnt; /* loop counter */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *pSrc++; + sum += ((q31_t) in * in); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Truncating and saturating the accumulator to 1.15 format */ + sum = __SSAT((q31_t) (sum >> 15), 16); + + in = (q15_t) (sum / blockSize); + + /* Store the result in the destination */ + arm_sqrt_q15(in, pResult); + +#endif /* #ifndef ARM_MATH_CM0 */ + +} + +/** + * @} end of RMS group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c new file mode 100644 index 000000000..54038eb12 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_rms_q31.c @@ -0,0 +1,146 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_rms_q31.c +* +* Description: Root Mean Square of the elements of a Q31 vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @addtogroup RMS + * @{ + */ + + +/** + * @brief Root Mean Square of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult rms value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + *\par + * The function is implemented using an internal 64-bit accumulator. + * The input is represented in 1.31 format, and intermediate multiplication + * yields a 2.62 format. + * The accumulator maintains full precision of the intermediate multiplication results, + * but provides only a single guard bit. + * There is no saturation on intermediate additions. + * If the accumulator overflows, it wraps around and distorts the result. + * In order to avoid overflows completely, the input signal must be scaled down by + * log2(blockSize) bits, as a total of blockSize additions are performed internally. + * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. + * + */ + +void arm_rms_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* accumulator */ + q31_t in; /* Temporary variable to store the input */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in1, in2, in3, in4; /* Temporary input variables */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 8 outputs at a time. + ** a second loop below computes the remaining 1 to 7 samples. */ + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the result in a temporary variable, sum */ + /* read two samples from source buffer */ + in1 = pSrc[0]; + in2 = pSrc[1]; + + /* calculate power and accumulate to accumulator */ + sum += (q63_t) in1 *in1; + sum += (q63_t) in2 *in2; + + /* read two samples from source buffer */ + in3 = pSrc[2]; + in4 = pSrc[3]; + + /* calculate power and accumulate to accumulator */ + sum += (q63_t) in3 *in3; + sum += (q63_t) in4 *in4; + + + /* update source buffer to process next samples */ + pSrc += 4u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 8, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *pSrc++; + sum += (q63_t) in *in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */ + + sum = __SSAT(sum >> 31, 31); + + + /* Compute Rms and store the result in the destination vector */ + arm_sqrt_q31((q31_t) ((q31_t) sum / (int32_t) blockSize), pResult); +} + +/** + * @} end of RMS group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c new file mode 100644 index 000000000..6442d5c29 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_f32.c @@ -0,0 +1,188 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_std_f32.c +* +* Description: Standard deviation of the elements of a floating-point vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup STD Standard deviation + * + * Calculates the standard deviation of the elements in the input vector. + * The underlying algorithm is used: + * + *
    
+ * 	Result = sqrt((sumOfSquares - sum2 / blockSize) / (blockSize - 1))   
+ *   
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]   
+ *   
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]   
+ * 
+ * + * There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + * @addtogroup STD + * @{ + */ + + +/** + * @brief Standard deviation of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult standard deviation value returned here + * @return none. + * + */ + + +void arm_std_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Temporary result storage */ + float32_t sumOfSquares = 0.0f; /* Sum of squares */ + float32_t in; /* input value */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + float32_t meanOfSquares, mean, squareOfMean; + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f); + + /* Compute mean of all input values */ + mean = sum / (float32_t) blockSize; + + /* Compute square of mean */ + squareOfMean = (mean * mean) * (((float32_t) blockSize) / + ((float32_t) blockSize - 1.0f)); + + /* Compute standard deviation and then store the result to the destination */ + arm_sqrt_f32((meanOfSquares - squareOfMean), pResult); + +#else + + /* Run the below code for Cortex-M0 */ + + float32_t squareOfSum; /* Square of Sum */ + float32_t var; /* Temporary varaince storage */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += in * in; + + /* C = (A[0] + A[1] + ... + A[blockSize-1]) */ + /* Compute Sum of the input samples + * and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute the square of sum */ + squareOfSum = ((sum * sum) / (float32_t) blockSize); + + /* Compute the variance */ + var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f)); + + /* Compute standard deviation and then store the result to the destination */ + arm_sqrt_f32(var, pResult); + +#endif /* #ifndef ARM_MATH_CM0 */ + +} + +/** + * @} end of STD group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c new file mode 100644 index 000000000..2d1a49a50 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q15.c @@ -0,0 +1,197 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_std_q15.c +* +* Description: Standard deviation of an array of Q15 type. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup STD + * @{ + */ + +/** + * @brief Standard deviation of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult standard deviation value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + * 15 bits, and then saturated to yield a result in 1.15 format. + */ + +void arm_std_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q31_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q15_t mean; /* mean */ + uint32_t blkCnt; /* loop counter */ + q15_t t; /* Temporary variable */ + q63_t sumOfSquares = 0; /* Accumulator */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in; /* input value */ + q15_t in1; /* input value */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *__SIMD32(pSrc)++; + sum += ((in << 16) >> 16); + sum += (in >> 16); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + in = *__SIMD32(pSrc)++; + sum += ((in << 16) >> 16); + sum += (in >> 16); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in1 = *pSrc++; + sumOfSquares = __SMLALD(in1, in1, sumOfSquares); + sum += in1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL); + sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u); + + meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u); + + /* Compute mean of all input values */ + t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL); + mean = (q15_t) __SSAT(sum, 16u); + + /* Compute square of mean */ + squareOfMean = ((q31_t) mean * mean) >> 15; + squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); + + /* mean of the squares minus the square of the mean. */ + in1 = (q15_t) (meanOfSquares - squareOfMean); + + /* Compute standard deviation and store the result to the destination */ + arm_sqrt_q15(in1, pResult); + +#else + + /* Run the below code for Cortex-M0 */ + q15_t in; /* input value */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += (in * in); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL); + sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u); + meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u); + + /* Compute mean of all input values */ + mean = (q15_t) __SSAT(sum, 16u); + + /* Compute square of mean of the input samples + * and then store the result in a temporary variable, squareOfMean.*/ + t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL); + squareOfMean = ((q31_t) mean * mean) >> 15; + squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); + + /* mean of the squares minus the square of the mean. */ + in = (q15_t) (meanOfSquares - squareOfMean); + + /* Compute standard deviation and store the result to the destination */ + arm_sqrt_q15(in, pResult); + +#endif /* #ifndef ARM_MATH_CM0 */ + + +} + +/** + * @} end of STD group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c new file mode 100644 index 000000000..45bec01a1 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_std_q31.c @@ -0,0 +1,184 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_std_q31.c +* +* Description: Standard deviation of an array of Q31 type. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup STD + * @{ + */ + + +/** + * @brief Standard deviation of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult standard deviation value returned here + * @return none. + * @details + * Scaling and Overflow Behavior: + * + *\par + * The function is implemented using an internal 64-bit accumulator. + * The input is represented in 1.31 format, and intermediate multiplication + * yields a 2.62 format. + * The accumulator maintains full precision of the intermediate multiplication results, + * but provides only a single guard bit. + * There is no saturation on intermediate additions. + * If the accumulator overflows it wraps around and distorts the result. + * In order to avoid overflows completely the input signal must be scaled down by + * log2(blockSize) bits, as a total of blockSize additions are performed internally. + * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. + * + */ + + +void arm_std_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q31_t mean; /* mean */ + q31_t in; /* input value */ + q31_t t; /* Temporary variable */ + uint32_t blkCnt; /* loop counter */ + q63_t sumOfSquares = 0; /* Accumulator */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + + /* Decrement the loop counter */ + blkCnt--; + } + + t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f); + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + sumOfSquares = (sumOfSquares >> 31); + meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30); + +#else + + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += ((q63_t) (in) * (in)); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f); + sumOfSquares = (sumOfSquares >> 31); + meanOfSquares = (q31_t) ((sumOfSquares * t) >> 30); + +#endif /* #ifndef ARM_MATH_CM0 */ + + /* Compute mean of all input values */ + t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f); + mean = (q31_t) (sum); + + /* Compute square of mean */ + squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31); + squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31); + + + /* Compute standard deviation and then store the result to the destination */ + arm_sqrt_q31(meanOfSquares - squareOfMean, pResult); + +} + +/** + * @} end of STD group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c new file mode 100644 index 000000000..2adcfb443 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_f32.c @@ -0,0 +1,184 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_var_f32.c +* +* Description: Variance of the elements of a floating-point vector. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* ---------------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup variance Variance + * + * Calculates the variance of the elements in the input vector. + * The underlying algorithm is used: + * + *
    
+ * 	Result = (sumOfSquares - sum2 / blockSize) / (blockSize - 1)   
+ *   
+ *	   where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]   
+ *   
+ *	                   sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]   
+ * 
+ * + * There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + * @addtogroup variance + * @{ + */ + + +/** + * @brief Variance of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult variance value returned here + * @return none. + * + */ + + +void arm_var_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + + float32_t sum = 0.0f; /* Temporary result storage */ + float32_t sumOfSquares = 0.0f; /* Sum of squares */ + float32_t in; /* input value */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f); + + /* Compute mean of all input values */ + mean = sum / (float32_t) blockSize; + + /* Compute square of mean */ + squareOfMean = (mean * mean) * (((float32_t) blockSize) / + ((float32_t) blockSize - 1.0f)); + + /* Compute variance and then store the result to the destination */ + *pResult = meanOfSquares - squareOfMean; + +#else + + /* Run the below code for Cortex-M0 */ + float32_t squareOfSum; /* Square of Sum */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += in * in; + + /* C = (A[0] + A[1] + ... + A[blockSize-1]) */ + /* Compute Sum of the input samples + * and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute the square of sum */ + squareOfSum = ((sum * sum) / (float32_t) blockSize); + + /* Compute the variance */ + *pResult = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f)); + +#endif /* #ifndef ARM_MATH_CM0 */ + +} + +/** + * @} end of variance group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c new file mode 100644 index 000000000..08dbc6587 --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q15.c @@ -0,0 +1,180 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_var_q15.c +* +* Description: Variance of an array of Q15 type. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup variance + * @{ + */ + +/** + * @brief Variance of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult variance value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + * 15 bits, and then saturated to yield a result in 1.15 format. + * + */ + + +void arm_var_q15( + q15_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* Mean of square and square of mean */ + q15_t mean; /* mean */ + uint32_t blkCnt; /* loop counter */ + q15_t t; /* Temporary variable */ + q63_t sumOfSquares = 0; /* Accumulator */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in; /* Input variable */ + q15_t in1; /* Temporary variable */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *__SIMD32(pSrc)++; + sum += ((in << 16) >> 16); + sum += (in >> 16); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + in = *__SIMD32(pSrc)++; + sum += ((in << 16) >> 16); + sum += (in >> 16); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in1 = *pSrc++; + sum += in1; + sumOfSquares = __SMLALD(in1, in1, sumOfSquares); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384); + sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u); + + meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u); + +#else + + /* Run the below code for Cortex-M0 */ + + q15_t in; /* Temporary variable */ + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += (in * in); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + t = (q15_t) ((1.0f / (float32_t) (blockSize - 1u)) * 16384); + sumOfSquares = __SSAT((sumOfSquares >> 15u), 16u); + meanOfSquares = (q31_t) ((sumOfSquares * t) >> 14u); + +#endif /* #ifndef ARM_MATH_CM0 */ + + /* Compute mean of all input values */ + t = (q15_t) ((1.0f / (float32_t) (blockSize * (blockSize - 1u))) * 32768); + mean = __SSAT(sum, 16u); + + /* Compute square of mean */ + squareOfMean = ((q31_t) mean * mean) >> 15; + squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); + + /* Compute variance and then store the result to the destination */ + *pResult = (meanOfSquares - squareOfMean); + +} + +/** + * @} end of variance group + */ diff --git a/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c new file mode 100644 index 000000000..13d6c15cb --- /dev/null +++ b/src/modules/mathlib/CMSIS/DSP_Lib/Source/StatisticsFunctions/arm_var_q31.c @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- +* Copyright (C) 2010 ARM Limited. All rights reserved. +* +* $Date: 15. February 2012 +* $Revision: V1.1.0 +* +* Project: CMSIS DSP Library +* Title: arm_var_q31.c +* +* Description: Variance of an array of Q31 type. +* +* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 +* +* Version 1.1.0 2012/02/15 +* Updated with more optimizations, bug fixes and minor API changes. +* +* Version 1.0.10 2011/7/15 +* Big Endian support added and Merged M0 and M3/M4 Source code. +* +* Version 1.0.3 2010/11/29 +* Re-organized the CMSIS folders and updated documentation. +* +* Version 1.0.2 2010/11/11 +* Documentation updated. +* +* Version 1.0.1 2010/10/05 +* Production release and review comments incorporated. +* +* Version 1.0.0 2010/09/20 +* Production release and review comments incorporated. +* -------------------------------------------------------------------- */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup variance + * @{ + */ + +/** + * @brief Variance of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult variance value returned here + * @return none. + * + * @details + * Scaling and Overflow Behavior: + * + *\par + * The function is implemented using an internal 64-bit accumulator. + * The input is represented in 1.31 format, and intermediate multiplication + * yields a 2.62 format. + * The accumulator maintains full precision of the intermediate multiplication results, + * but provides only a single guard bit. + * There is no saturation on intermediate additions. + * If the accumulator overflows it wraps around and distorts the result. + * In order to avoid overflows completely the input signal must be scaled down by + * log2(blockSize) bits, as a total of blockSize additions are performed internally. + * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. + * + */ + + +void arm_var_q31( + q31_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + q63_t sum = 0, sumSquare = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q31_t mean; /* mean */ + q31_t in; /* input value */ + q31_t t; /* Temporary variable */ + uint32_t blkCnt; /* loop counter */ + +#ifndef ARM_MATH_CM0 + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q63_t sumSquare1 = 0; /* Accumulator */ + q31_t in1, in2, in3, in4; /* Temporary input variables */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2u; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + /* read input samples from source buffer */ + in1 = pSrc[0]; + in2 = pSrc[1]; + + /* calculate sum of inputs */ + sum += in1; + /* calculate sum of squares */ + sumSquare += ((q63_t) (in1) * (in1)); + in3 = pSrc[2]; + sum += in2; + sumSquare1 += ((q63_t) (in2) * (in2)); + in4 = pSrc[3]; + sum += in3; + sumSquare += ((q63_t) (in3) * (in3)); + sum += in4; + sumSquare1 += ((q63_t) (in4) * (in4)); + + /* update input pointer to process next samples */ + pSrc += 4u; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* add two accumulators */ + sumSquare = sumSquare + sumSquare1; + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4u; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = blockSize; + +#endif /* #ifndef ARM_MATH_CM0 */ + + while(blkCnt > 0u) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sumSquare += ((q63_t) (in) * (in)); + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + t = (q31_t) ((1.0f / (float32_t) (blockSize - 1u)) * 1073741824.0f); + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + sumSquare = (sumSquare >> 31); + meanOfSquares = (q31_t) ((sumSquare * t) >> 30); + + /* Compute mean of all input values */ + t = (q31_t) ((1.0f / (blockSize * (blockSize - 1u))) * 2147483648.0f); + mean = (q31_t) (sum); + + /* Compute square of mean */ + squareOfMean = (q31_t) (((q63_t) mean * mean) >> 31); + squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 31); + + /* Compute variance and then store the result to the destination */ + *pResult = (q63_t) meanOfSquares - squareOfMean; + +} + +/** + * @} end of variance group + */ -- cgit v1.2.3