diff options
Diffstat (limited to 'DSP/Source/StatisticsFunctions')
25 files changed, 3780 insertions, 0 deletions
diff --git a/DSP/Source/StatisticsFunctions/arm_max_f32.c b/DSP/Source/StatisticsFunctions/arm_max_f32.c new file mode 100644 index 0000000..a0a68ac --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_max_f32.c @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_f32.c + * Description: Maximum value of a floating-point vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup Max Maximum + * + * Computes the maximum value of an array of data. + * The function returns both the maximum value and its position within the array. + * There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2U; + } + + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + float32_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_max_q15.c b/DSP/Source/StatisticsFunctions/arm_max_q15.c new file mode 100644 index 0000000..67d5e34 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_max_q15.c @@ -0,0 +1,162 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_q15.c + * Description: Maximum value of a Q15 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q15_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2U; + } + + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + q15_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_max_q31.c b/DSP/Source/StatisticsFunctions/arm_max_q31.c new file mode 100644 index 0000000..5d34bbd --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_max_q31.c @@ -0,0 +1,162 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_q31.c + * Description: Maximum value of a Q31 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2U; + } + + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + q31_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_max_q7.c b/DSP/Source/StatisticsFunctions/arm_max_q7.c new file mode 100644 index 0000000..72f6e5e --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_max_q7.c @@ -0,0 +1,162 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_max_q7.c + * Description: Maximum value of a Q7 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup Max + * @{ + */ + + +/** + * @brief Maximum value of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult maximum value returned here + * @param[out] *pIndex index of maximum value returned here + * @return none. + */ + +void arm_max_q7( + q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q7_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 1U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 2U; + } + + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + maxVal2 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and its index */ + out = maxVal1; + outIndex = count + 3U; + } + + /* compare for the maximum value */ + if (out < maxVal2) + { + /* Update the maximum value and its index */ + out = maxVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + q7_t maxVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize maxVal to the next consecutive values one by one */ + maxVal1 = *pSrc++; + + /* compare for the maximum value */ + if (out < maxVal1) + { + /* Update the maximum value and it's index */ + out = maxVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the maximum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Max group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_mean_f32.c b/DSP/Source/StatisticsFunctions/arm_mean_f32.c new file mode 100644 index 0000000..85a3b16 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_mean_f32.c @@ -0,0 +1,125 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_f32.c + * Description: Mean value of a floating-point vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup mean Mean + * + * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector. + * The underlying algorithm is used: + * + * <pre> + * Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize; + * </pre> + * + * There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup mean + * @{ + */ + + +/** + * @brief Mean value of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + */ + +void arm_mean_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + float32_t in1, in2, in3, in4; + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in1 = *pSrc++; + in2 = *pSrc++; + in3 = *pSrc++; + in4 = *pSrc++; + + sum += in1; + sum += in2; + sum += in3; + sum += in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = sum / (float32_t) blockSize; +} + +/** + * @} end of mean group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_mean_q15.c b/DSP/Source/StatisticsFunctions/arm_mean_q15.c new file mode 100644 index 0000000..7bf55c2 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_mean_q15.c @@ -0,0 +1,120 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_q15.c + * Description: Mean value of a Q15 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup mean + * @{ + */ + + +/** + * @brief Mean value of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function is implemented using a 32-bit internal accumulator. + * The input is represented in 1.15 format and is accumulated in a 32-bit + * accumulator in 17.15 format. + * There is no risk of internal overflow with this approach, and the + * full precision of intermediate result is preserved. + * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format. + * + */ + +void arm_mean_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q31_t sum = 0; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in; + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in = *__SIMD32(pSrc)++; + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + in = *__SIMD32(pSrc)++; + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q15_t) (sum / (q31_t)blockSize); +} + +/** + * @} end of mean group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_mean_q31.c b/DSP/Source/StatisticsFunctions/arm_mean_q31.c new file mode 100644 index 0000000..ea83ced --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_mean_q31.c @@ -0,0 +1,123 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_q31.c + * Description: Mean value of a Q31 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup mean + * @{ + */ + + +/** + * @brief Mean value of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + *\par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.31 format and is accumulated in a 64-bit + * accumulator in 33.31 format. + * There is no risk of internal overflow with this approach, and the + * full precision of intermediate result is preserved. + * Finally, the accumulator is truncated to yield a result of 1.31 format. + * + */ + +void arm_mean_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in1, in2, in3, in4; + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in1 = *pSrc++; + in2 = *pSrc++; + in3 = *pSrc++; + in4 = *pSrc++; + + sum += in1; + sum += in2; + sum += in3; + sum += in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q31_t) (sum / (int32_t) blockSize); +} + +/** + * @} end of mean group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_mean_q7.c b/DSP/Source/StatisticsFunctions/arm_mean_q7.c new file mode 100644 index 0000000..a7bdfb8 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_mean_q7.c @@ -0,0 +1,120 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mean_q7.c + * Description: Mean value of a Q7 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup mean + * @{ + */ + + +/** + * @brief Mean value of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult mean value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function is implemented using a 32-bit internal accumulator. + * The input is represented in 1.7 format and is accumulated in a 32-bit + * accumulator in 25.7 format. + * There is no risk of internal overflow with this approach, and the + * full precision of intermediate result is preserved. + * Finally, the accumulator is truncated to yield a result of 1.7 format. + * + */ + +void arm_mean_q7( + q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult) +{ + q31_t sum = 0; /* Temporary result storage */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in; + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in = *__SIMD32(pSrc)++; + + sum += ((in << 24U) >> 24U); + sum += ((in << 16U) >> 24U); + sum += ((in << 8U) >> 24U); + sum += (in >> 24U); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + /* Store the result to the destination */ + *pResult = (q7_t) (sum / (int32_t) blockSize); +} + +/** + * @} end of mean group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_min_f32.c b/DSP/Source/StatisticsFunctions/arm_min_f32.c new file mode 100644 index 0000000..858b0a2 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_min_f32.c @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_f32.c + * Description: Minimum value of a floating-point vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup Min Minimum + * + * Computes the minimum value of an array of data. + * The function returns both the minimum value and its position within the array. + * There are separate functions for floating-point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + */ + +void arm_min_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + float32_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2U; + } + + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + float32_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_min_q15.c b/DSP/Source/StatisticsFunctions/arm_min_q15.c new file mode 100644 index 0000000..fdc32b7 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_min_q15.c @@ -0,0 +1,163 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_q15.c + * Description: Minimum value of a Q15 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + */ + +void arm_min_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q15_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2U; + } + + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + q15_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_min_q31.c b/DSP/Source/StatisticsFunctions/arm_min_q31.c new file mode 100644 index 0000000..fc4c155 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_min_q31.c @@ -0,0 +1,163 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_q31.c + * Description: Minimum value of a Q31 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + */ + +void arm_min_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2U; + } + + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + q31_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_min_q7.c b/DSP/Source/StatisticsFunctions/arm_min_q7.c new file mode 100644 index 0000000..50362e6 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_min_q7.c @@ -0,0 +1,163 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_min_q7.c + * Description: Minimum value of a Q7 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + + +/** + * @addtogroup Min + * @{ + */ + + +/** + * @brief Minimum value of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult minimum value returned here + * @param[out] *pIndex index of minimum value returned here + * @return none. + */ + +void arm_min_q7( + q7_t * pSrc, + uint32_t blockSize, + q7_t * pResult, + uint32_t * pIndex) +{ +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q7_t minVal1, minVal2, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex, count; /* loop counter */ + + /* Initialise the count value. */ + count = 0U; + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + /* Loop unrolling */ + blkCnt = (blockSize - 1U) >> 2U; + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 1U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 2U; + } + + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + minVal2 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and its index */ + out = minVal1; + outIndex = count + 3U; + } + + /* compare for the minimum value */ + if (out > minVal2) + { + /* Update the minimum value and its index */ + out = minVal2; + outIndex = count + 4U; + } + + count += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* if (blockSize - 1U) is not multiple of 4 */ + blkCnt = (blockSize - 1U) % 4U; + +#else + /* Run the below code for Cortex-M0 */ + + q7_t minVal1, out; /* Temporary variables to store the output value. */ + uint32_t blkCnt, outIndex; /* loop counter */ + + /* Initialise the index value to zero. */ + outIndex = 0U; + /* Load first input value that act as reference value for comparision */ + out = *pSrc++; + + blkCnt = (blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* Initialize minVal to the next consecutive values one by one */ + minVal1 = *pSrc++; + + /* compare for the minimum value */ + if (out > minVal1) + { + /* Update the minimum value and it's index */ + out = minVal1; + outIndex = blockSize - blkCnt; + } + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the minimum value and it's index into destination pointers */ + *pResult = out; + *pIndex = outIndex; +} + +/** + * @} end of Min group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_power_f32.c b/DSP/Source/StatisticsFunctions/arm_power_f32.c new file mode 100644 index 0000000..1426735 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_power_f32.c @@ -0,0 +1,129 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_f32.c + * Description: Sum of the squares of the elements of a floating-point vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup power Power + * + * Calculates the sum of the squares of the elements in the input vector. + * The underlying algorithm is used: + * + * <pre> + * Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]; + * </pre> + * + * There are separate functions for floating point, Q31, Q15, and Q7 data types. + */ + +/** + * @addtogroup power + * @{ + */ + + +/** + * @brief Sum of the squares of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + */ + + +void arm_power_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* accumulator */ + float32_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* compute power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the result to the destination */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_power_q15.c b/DSP/Source/StatisticsFunctions/arm_power_q15.c new file mode 100644 index 0000000..6d95f4d --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_power_q15.c @@ -0,0 +1,138 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_q15.c + * Description: Sum of the squares of the elements of a Q15 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup power + * @{ + */ + +/** + * @brief Sum of the squares of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the return result is in 34.30 format. + * + */ + +void arm_power_q15( + q15_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + q63_t sum = 0; /* Temporary result storage */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in32; /* Temporary variable to store input value */ + q15_t in16; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + + /* loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in32 = *__SIMD32(pSrc)++; + sum = __SMLALD(in32, in32, sum); + in32 = *__SIMD32(pSrc)++; + sum = __SMLALD(in32, in32, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in16 = *pSrc++; + sum = __SMLALD(in16, in16, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + /* Run the below code for Cortex-M0 */ + + q15_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q31_t) in * in); + + /* Decrement the loop counter */ + blkCnt--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Store the results in 34.30 format */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_power_q31.c b/DSP/Source/StatisticsFunctions/arm_power_q31.c new file mode 100644 index 0000000..16be249 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_power_q31.c @@ -0,0 +1,129 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_q31.c + * Description: Sum of the squares of the elements of a Q31 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup power + * @{ + */ + +/** + * @brief Sum of the squares of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.31 format. + * Intermediate multiplication yields a 2.62 format, and this + * result is truncated to 2.48 format by discarding the lower 14 bits. + * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format. + * With 15 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the return result is in 16.48 format. + * + */ + +void arm_power_q31( + q31_t * pSrc, + uint32_t blockSize, + q63_t * pResult) +{ + q63_t sum = 0; /* Temporary result storage */ + q31_t in; + uint32_t blkCnt; /* loop counter */ + + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q63_t) in * in) >> 14U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the results in 16.48 format */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_power_q7.c b/DSP/Source/StatisticsFunctions/arm_power_q7.c new file mode 100644 index 0000000..24306cd --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_power_q7.c @@ -0,0 +1,127 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_power_q7.c + * Description: Sum of the squares of the elements of a Q7 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup power + * @{ + */ + +/** + * @brief Sum of the squares of the elements of a Q7 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult sum of the squares value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * + * \par + * The function is implemented using a 32-bit internal accumulator. + * The input is represented in 1.7 format. + * Intermediate multiplication yields a 2.14 format, and this + * result is added without saturation to an accumulator in 18.14 format. + * With 17 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the return result is in 18.14 format. + * + */ + +void arm_power_q7( + q7_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q31_t sum = 0; /* Temporary result storage */ + q7_t in; /* Temporary variable to store input */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t input1; /* Temporary variable to store packed input */ + q31_t in1, in2; /* Temporary variables to store input */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* Reading two inputs of pSrc vector and packing */ + input1 = *__SIMD32(pSrc)++; + + in1 = __SXTB16(__ROR(input1, 8)); + in2 = __SXTB16(input1); + + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* calculate power and accumulate to accumulator */ + sum = __SMLAD(in1, in1, sum); + sum = __SMLAD(in2, in2, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute Power and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += ((q15_t) in * in); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Store the result in 18.14 format */ + *pResult = sum; +} + +/** + * @} end of power group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_rms_f32.c b/DSP/Source/StatisticsFunctions/arm_rms_f32.c new file mode 100644 index 0000000..8d1b708 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_rms_f32.c @@ -0,0 +1,127 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_f32.c + * Description: Root mean square value of an array of F32 type + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup RMS Root mean square (RMS) + * + * + * Calculates the Root Mean Sqaure of the elements in the input vector. + * The underlying algorithm is used: + * + * <pre> + * Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize)); + * </pre> + * + * There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + * @addtogroup RMS + * @{ + */ + + +/** + * @brief Root Mean Square of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult rms value returned here + * @return none. + * + */ + +void arm_rms_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Accumulator */ + float32_t in; /* Tempoprary variable to store input value */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /* loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the result in a temporary variable, sum */ + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *pSrc++; + sum += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Rms and store the result in the destination */ + arm_sqrt_f32(sum / (float32_t) blockSize, pResult); +} + +/** + * @} end of RMS group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_rms_q15.c b/DSP/Source/StatisticsFunctions/arm_rms_q15.c new file mode 100644 index 0000000..d0e61ca --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_rms_q15.c @@ -0,0 +1,139 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_q15.c + * Description: Root Mean Square of the elements of a Q15 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @addtogroup RMS + * @{ + */ + +/** + * @brief Root Mean Square of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult rms value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + * 15 bits, and then saturated to yield a result in 1.15 format. + * + */ + +void arm_rms_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q63_t sum = 0; /* accumulator */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in; /* temporary variable to store the input value */ + q15_t in1; /* temporary variable to store the input value */ + uint32_t blkCnt; /* loop counter */ + + /* loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *__SIMD32(pSrc)++; + sum = __SMLALD(in, in, sum); + in = *__SIMD32(pSrc)++; + sum = __SMLALD(in, in, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in1 = *pSrc++; + sum = __SMLALD(in1, in1, sum); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Truncating and saturating the accumulator to 1.15 format */ + /* Store the result in the destination */ + arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult); + +#else + /* Run the below code for Cortex-M0 */ + + q15_t in; /* temporary variable to store the input value */ + uint32_t blkCnt; /* loop counter */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *pSrc++; + sum += ((q31_t) in * in); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Truncating and saturating the accumulator to 1.15 format */ + /* Store the result in the destination */ + arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult); + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of RMS group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_rms_q31.c b/DSP/Source/StatisticsFunctions/arm_rms_q31.c new file mode 100644 index 0000000..cb3c58e --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_rms_q31.c @@ -0,0 +1,137 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_rms_q31.c + * Description: Root Mean Square of the elements of a Q31 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @addtogroup RMS + * @{ + */ + + +/** + * @brief Root Mean Square of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult rms value returned here + * @return none. + * + * @details + * <b>Scaling and Overflow Behavior:</b> + * + *\par + * The function is implemented using an internal 64-bit accumulator. + * The input is represented in 1.31 format, and intermediate multiplication + * yields a 2.62 format. + * The accumulator maintains full precision of the intermediate multiplication results, + * but provides only a single guard bit. + * There is no saturation on intermediate additions. + * If the accumulator overflows, it wraps around and distorts the result. + * In order to avoid overflows completely, the input signal must be scaled down by + * log2(blockSize) bits, as a total of blockSize additions are performed internally. + * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value. + * + */ + +void arm_rms_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* accumulator */ + q31_t in; /* Temporary variable to store the input */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + q31_t in1, in2, in3, in4; /* Temporary input variables */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 8 outputs at a time. + ** a second loop below computes the remaining 1 to 7 samples. */ + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the result in a temporary variable, sum */ + /* read two samples from source buffer */ + in1 = pSrc[0]; + in2 = pSrc[1]; + + /* calculate power and accumulate to accumulator */ + sum += (q63_t) in1 *in1; + sum += (q63_t) in2 *in2; + + /* read two samples from source buffer */ + in3 = pSrc[2]; + in4 = pSrc[3]; + + /* calculate power and accumulate to accumulator */ + sum += (q63_t) in3 *in3; + sum += (q63_t) in4 *in4; + + + /* update source buffer to process next samples */ + pSrc += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 8, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + +#else + /* Run the below code for Cortex-M0 */ + + blkCnt = blockSize; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */ + /* Compute sum of the squares and then store the results in a temporary variable, sum */ + in = *pSrc++; + sum += (q63_t) in *in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */ + /* Compute Rms and store the result in the destination vector */ + arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult); +} + +/** + * @} end of RMS group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_std_f32.c b/DSP/Source/StatisticsFunctions/arm_std_f32.c new file mode 100644 index 0000000..9750b88 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_std_f32.c @@ -0,0 +1,186 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_f32.c + * Description: Standard deviation of the elements of a floating-point vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup STD Standard deviation + * + * Calculates the standard deviation of the elements in the input vector. + * The underlying algorithm is used: + * + * <pre> + * Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1)) + * + * where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1] + * + * sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1] + * </pre> + * + * There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + * @addtogroup STD + * @{ + */ + + +/** + * @brief Standard deviation of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult standard deviation value returned here + * @return none. + */ + +void arm_std_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t sum = 0.0f; /* Temporary result storage */ + float32_t sumOfSquares = 0.0f; /* Sum of squares */ + float32_t in; /* input value */ + uint32_t blkCnt; /* loop counter */ +#if defined (ARM_MATH_DSP) + float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */ +#else + float32_t squareOfSum; /* Square of Sum */ + float32_t var; /* Temporary varaince storage */ +#endif + + if (blockSize == 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++; + sum += in; + sumOfSquares += in * in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f); + + /* Compute mean of all input values */ + mean = sum / (float32_t) blockSize; + + /* Compute square of mean */ + squareOfMean = (mean * mean) * (((float32_t) blockSize) / + ((float32_t) blockSize - 1.0f)); + + /* Compute standard deviation and then store the result to the destination */ + arm_sqrt_f32((meanOfSquares - squareOfMean), pResult); + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += in * in; + + /* C = (A[0] + A[1] + ... + A[blockSize-1]) */ + /* Compute Sum of the input samples + * and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute the square of sum */ + squareOfSum = ((sum * sum) / (float32_t) blockSize); + + /* Compute the variance */ + var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f)); + + /* Compute standard deviation and then store the result to the destination */ + arm_sqrt_f32(var, pResult); + +#endif /* #if defined (ARM_MATH_DSP) */ +} + +/** + * @} end of STD group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_std_q15.c b/DSP/Source/StatisticsFunctions/arm_std_q15.c new file mode 100644 index 0000000..2f2f52e --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_std_q15.c @@ -0,0 +1,174 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_q15.c + * Description: Standard deviation of an array of Q15 vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup STD + * @{ + */ + +/** + * @brief Standard deviation of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult standard deviation value returned here + * @return none. + * @details + * <b>Scaling and Overflow Behavior:</b> + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + * 15 bits, and then saturated to yield a result in 1.15 format. + */ + +void arm_std_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q31_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + uint32_t blkCnt; /* loop counter */ + q63_t sumOfSquares = 0; /* Accumulator */ +#if defined (ARM_MATH_DSP) + q31_t in; /* input value */ + q15_t in1; /* input value */ +#else + q15_t in; /* input value */ +#endif + + if (blockSize == 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *__SIMD32(pSrc)++; + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + in = *__SIMD32(pSrc)++; + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in1 = *pSrc++; + sumOfSquares = __SMLALD(in1, in1, sumOfSquares); + sum += in1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* mean of the squares minus the square of the mean. */ + /* Compute standard deviation and store the result to the destination */ + arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult); + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += (in * in); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* mean of the squares minus the square of the mean. */ + /* Compute standard deviation and store the result to the destination */ + arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult); + +#endif /* #if defined (ARM_MATH_DSP) */ +} + +/** + * @} end of STD group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_std_q31.c b/DSP/Source/StatisticsFunctions/arm_std_q31.c new file mode 100644 index 0000000..f02cbdd --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_std_q31.c @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_std_q31.c + * Description: Standard deviation of an array of Q31 type. + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup STD + * @{ + */ + +/** + * @brief Standard deviation of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult standard deviation value returned here + * @return none. + * @details + * <b>Scaling and Overflow Behavior:</b> + * + *\par + * The function is implemented using an internal 64-bit accumulator. + * The input is represented in 1.31 format, which is then downshifted by 8 bits + * which yields 1.23, and intermediate multiplication yields a 2.46 format. + * The accumulator maintains full precision of the intermediate multiplication results, + * but provides only a 16 guard bits. + * There is no saturation on intermediate additions. + * If the accumulator overflows it wraps around and distorts the result. + * In order to avoid overflows completely the input signal must be scaled down by + * log2(blockSize)-8 bits, as a total of blockSize additions are performed internally. + * After division, internal variables should be Q18.46 + * Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. + * + */ + +void arm_std_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* Accumulator */ + q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q31_t in; /* input value */ + uint32_t blkCnt; /* loop counter */ + q63_t sumOfSquares = 0; /* Accumulator */ + + if (blockSize == 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U); + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Compute square of mean */ + squareOfMean = sum * sum / (q63_t)(blockSize * (blockSize - 1U)); + + /* Compute standard deviation and then store the result to the destination */ + arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult); +} + +/** + * @} end of STD group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_var_f32.c b/DSP/Source/StatisticsFunctions/arm_var_f32.c new file mode 100644 index 0000000..c0f731d --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_var_f32.c @@ -0,0 +1,181 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_f32.c + * Description: Variance of the elements of a floating-point vector + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @defgroup variance Variance + * + * Calculates the variance of the elements in the input vector. + * The underlying algorithm used is the direct method sometimes referred to as the two-pass method: + * + * <pre> + * Result = sum(element - meanOfElements)^2) / numElement - 1 + * + * where, meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize + * + * </pre> + * + * There are separate functions for floating point, Q31, and Q15 data types. + */ + +/** + * @addtogroup variance + * @{ + */ + + +/** + * @brief Variance of the elements of a floating-point vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult variance value returned here + * @return none. + */ + +void arm_var_f32( + float32_t * pSrc, + uint32_t blockSize, + float32_t * pResult) +{ + float32_t fMean, fValue; + uint32_t blkCnt; /* loop counter */ + float32_t * pInput = pSrc; + float32_t sum = 0.0f; + float32_t fSum = 0.0f; + #if defined(ARM_MATH_DSP) + float32_t in1, in2, in3, in4; + #endif + + if (blockSize <= 1U) + { + *pResult = 0; + return; + } + + #if defined(ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M7 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + in1 = *pInput++; + in2 = *pInput++; + in3 = *pInput++; + in4 = *pInput++; + + sum += in1; + sum += in2; + sum += in3; + sum += in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + #else + /* Run the below code for Cortex-M0 or Cortex-M3 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + #endif + + while (blkCnt > 0U) + { + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + sum += *pInput++; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */ + fMean = sum / (float32_t) blockSize; + + pInput = pSrc; + + #if defined(ARM_MATH_DSP) + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement the loop counter */ + blkCnt--; + } + + blkCnt = blockSize % 0x4U; + #else + /* Run the below code for Cortex-M0 or Cortex-M3 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + #endif + + while (blkCnt > 0U) + { + fValue = *pInput++ - fMean; + fSum += fValue * fValue; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Variance */ + *pResult = fSum / (float32_t)(blockSize - 1.0f); +} + +/** + * @} end of variance group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_var_q15.c b/DSP/Source/StatisticsFunctions/arm_var_q15.c new file mode 100644 index 0000000..5ba61f7 --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_var_q15.c @@ -0,0 +1,172 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_q15.c + * Description: Variance of an array of Q15 type + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup variance + * @{ + */ + +/** + * @brief Variance of the elements of a Q15 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult variance value returned here + * @return none. + * @details + * <b>Scaling and Overflow Behavior:</b> + * + * \par + * The function is implemented using a 64-bit internal accumulator. + * The input is represented in 1.15 format. + * Intermediate multiplication yields a 2.30 format, and this + * result is added without saturation to a 64-bit accumulator in 34.30 format. + * With 33 guard bits in the accumulator, there is no risk of overflow, and the + * full precision of the intermediate multiplication is preserved. + * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower + * 15 bits, and then saturated to yield a result in 1.15 format. + */ + +void arm_var_q15( + q15_t * pSrc, + uint32_t blockSize, + q15_t * pResult) +{ + q31_t sum = 0; /* Accumulator */ + q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + uint32_t blkCnt; /* loop counter */ + q63_t sumOfSquares = 0; /* Accumulator */ +#if defined (ARM_MATH_DSP) + q31_t in; /* input value */ + q15_t in1; /* input value */ +#else + q15_t in; /* input value */ +#endif + + if (blockSize == 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *__SIMD32(pSrc)++; + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + in = *__SIMD32(pSrc)++; + sum += ((in << 16U) >> 16U); + sum += (in >> 16U); + sumOfSquares = __SMLALD(in, in, sumOfSquares); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in1 = *pSrc++; + sumOfSquares = __SMLALD(in1, in1, sumOfSquares); + sum += in1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* mean of the squares minus the square of the mean. */ + *pResult = (meanOfSquares - squareOfMean) >> 15U; + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++; + sumOfSquares += (in * in); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U)); + + /* Compute square of mean */ + squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U))); + + /* mean of the squares minus the square of the mean. */ + *pResult = (meanOfSquares - squareOfMean) >> 15; + +#endif /* #if defined (ARM_MATH_DSP) */ +} + +/** + * @} end of variance group + */ diff --git a/DSP/Source/StatisticsFunctions/arm_var_q31.c b/DSP/Source/StatisticsFunctions/arm_var_q31.c new file mode 100644 index 0000000..526c6cd --- /dev/null +++ b/DSP/Source/StatisticsFunctions/arm_var_q31.c @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_var_q31.c + * Description: Variance of an array of Q31 type + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupStats + */ + +/** + * @addtogroup variance + * @{ + */ + +/** + * @brief Variance of the elements of a Q31 vector. + * @param[in] *pSrc points to the input vector + * @param[in] blockSize length of the input vector + * @param[out] *pResult variance value returned here + * @return none. + * @details + * <b>Scaling and Overflow Behavior:</b> + * + *\par + * The function is implemented using an internal 64-bit accumulator. + * The input is represented in 1.31 format, which is then downshifted by 8 bits + * which yields 1.23, and intermediate multiplication yields a 2.46 format. + * The accumulator maintains full precision of the intermediate multiplication results, + * but provides only a 16 guard bits. + * There is no saturation on intermediate additions. + * If the accumulator overflows it wraps around and distorts the result. + * In order to avoid overflows completely the input signal must be scaled down by + * log2(blockSize)-8 bits, as a total of blockSize additions are performed internally. + * After division, internal variables should be Q18.46 + * Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value. + * + */ + +void arm_var_q31( + q31_t * pSrc, + uint32_t blockSize, + q31_t * pResult) +{ + q63_t sum = 0; /* Accumulator */ + q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ + q31_t in; /* input value */ + uint32_t blkCnt; /* loop counter */ + q63_t sumOfSquares = 0; /* Accumulator */ + + if (blockSize == 1U) + { + *pResult = 0; + return; + } + +#if defined (ARM_MATH_DSP) + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /*loop Unrolling */ + blkCnt = blockSize >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize % 0x4U; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sum. */ + in = *pSrc++ >> 8U; + sum += in; + sumOfSquares += ((q63_t) (in) * (in)); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U); + +#else + /* Run the below code for Cortex-M0 */ + + /* Loop over blockSize number of values */ + blkCnt = blockSize; + + while (blkCnt > 0U) + { + /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ + /* Compute Sum of squares of the input samples + * and then store the result in a temporary variable, sumOfSquares. */ + in = *pSrc++ >> 8U; + sumOfSquares += ((q63_t) (in) * (in)); + + /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */ + /* Compute sum of all input values and then store the result in a temporary variable, sum. */ + sum += in; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* Compute Mean of squares of the input samples + * and then store the result in a temporary variable, meanOfSquares. */ + meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U); + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Compute square of mean */ + squareOfMean = sum * sum / (q63_t)(blockSize * (blockSize - 1U)); + + /* Compute standard deviation and then store the result to the destination */ + *pResult = (meanOfSquares - squareOfMean) >> 15U; +} + +/** + * @} end of variance group + */ |