summaryrefslogtreecommitdiff
path: root/DSP/Source/StatisticsFunctions
diff options
context:
space:
mode:
Diffstat (limited to 'DSP/Source/StatisticsFunctions')
-rw-r--r--DSP/Source/StatisticsFunctions/arm_max_f32.c170
-rw-r--r--DSP/Source/StatisticsFunctions/arm_max_q15.c162
-rw-r--r--DSP/Source/StatisticsFunctions/arm_max_q31.c162
-rw-r--r--DSP/Source/StatisticsFunctions/arm_max_q7.c162
-rw-r--r--DSP/Source/StatisticsFunctions/arm_mean_f32.c125
-rw-r--r--DSP/Source/StatisticsFunctions/arm_mean_q15.c120
-rw-r--r--DSP/Source/StatisticsFunctions/arm_mean_q31.c123
-rw-r--r--DSP/Source/StatisticsFunctions/arm_mean_q7.c120
-rw-r--r--DSP/Source/StatisticsFunctions/arm_min_f32.c170
-rw-r--r--DSP/Source/StatisticsFunctions/arm_min_q15.c163
-rw-r--r--DSP/Source/StatisticsFunctions/arm_min_q31.c163
-rw-r--r--DSP/Source/StatisticsFunctions/arm_min_q7.c163
-rw-r--r--DSP/Source/StatisticsFunctions/arm_power_f32.c129
-rw-r--r--DSP/Source/StatisticsFunctions/arm_power_q15.c138
-rw-r--r--DSP/Source/StatisticsFunctions/arm_power_q31.c129
-rw-r--r--DSP/Source/StatisticsFunctions/arm_power_q7.c127
-rw-r--r--DSP/Source/StatisticsFunctions/arm_rms_f32.c127
-rw-r--r--DSP/Source/StatisticsFunctions/arm_rms_q15.c139
-rw-r--r--DSP/Source/StatisticsFunctions/arm_rms_q31.c137
-rw-r--r--DSP/Source/StatisticsFunctions/arm_std_f32.c186
-rw-r--r--DSP/Source/StatisticsFunctions/arm_std_q15.c174
-rw-r--r--DSP/Source/StatisticsFunctions/arm_std_q31.c169
-rw-r--r--DSP/Source/StatisticsFunctions/arm_var_f32.c181
-rw-r--r--DSP/Source/StatisticsFunctions/arm_var_q15.c172
-rw-r--r--DSP/Source/StatisticsFunctions/arm_var_q31.c169
25 files changed, 3780 insertions, 0 deletions
diff --git a/DSP/Source/StatisticsFunctions/arm_max_f32.c b/DSP/Source/StatisticsFunctions/arm_max_f32.c
new file mode 100644
index 0000000..a0a68ac
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_max_f32.c
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_f32.c
+ * Description: Maximum value of a floating-point vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup Max Maximum
+ *
+ * Computes the maximum value of an array of data.
+ * The function returns both the maximum value and its position within the array.
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ float32_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ float32_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_max_q15.c b/DSP/Source/StatisticsFunctions/arm_max_q15.c
new file mode 100644
index 0000000..67d5e34
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_max_q15.c
@@ -0,0 +1,162 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q15.c
+ * Description: Maximum value of a Q15 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q15_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q15_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_max_q31.c b/DSP/Source/StatisticsFunctions/arm_max_q31.c
new file mode 100644
index 0000000..5d34bbd
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_max_q31.c
@@ -0,0 +1,162 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q31.c
+ * Description: Maximum value of a Q31 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q31_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_max_q7.c b/DSP/Source/StatisticsFunctions/arm_max_q7.c
new file mode 100644
index 0000000..72f6e5e
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_max_q7.c
@@ -0,0 +1,162 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_max_q7.c
+ * Description: Maximum value of a Q7 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup Max
+ * @{
+ */
+
+
+/**
+ * @brief Maximum value of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult maximum value returned here
+ * @param[out] *pIndex index of maximum value returned here
+ * @return none.
+ */
+
+void arm_max_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q7_t maxVal1, maxVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+ maxVal2 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the maximum value */
+ if (out < maxVal2)
+ {
+ /* Update the maximum value and its index */
+ out = maxVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q7_t maxVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize maxVal to the next consecutive values one by one */
+ maxVal1 = *pSrc++;
+
+ /* compare for the maximum value */
+ if (out < maxVal1)
+ {
+ /* Update the maximum value and it's index */
+ out = maxVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the maximum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Max group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_mean_f32.c b/DSP/Source/StatisticsFunctions/arm_mean_f32.c
new file mode 100644
index 0000000..85a3b16
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_mean_f32.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_f32.c
+ * Description: Mean value of a floating-point vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup mean Mean
+ *
+ * Calculates the mean of the input vector. Mean is defined as the average of the elements in the vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = (pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]) / blockSize;
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+
+/**
+ * @brief Mean value of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ */
+
+void arm_mean_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ float32_t in1, in2, in3, in4;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in1 = *pSrc++;
+ in2 = *pSrc++;
+ in3 = *pSrc++;
+ in4 = *pSrc++;
+
+ sum += in1;
+ sum += in2;
+ sum += in3;
+ sum += in4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = sum / (float32_t) blockSize;
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_mean_q15.c b/DSP/Source/StatisticsFunctions/arm_mean_q15.c
new file mode 100644
index 0000000..7bf55c2
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_mean_q15.c
@@ -0,0 +1,120 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q15.c
+ * Description: Mean value of a Q15 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+
+/**
+ * @brief Mean value of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function is implemented using a 32-bit internal accumulator.
+ * The input is represented in 1.15 format and is accumulated in a 32-bit
+ * accumulator in 17.15 format.
+ * There is no risk of internal overflow with this approach, and the
+ * full precision of intermediate result is preserved.
+ * Finally, the accumulator is saturated and truncated to yield a result of 1.15 format.
+ *
+ */
+
+void arm_mean_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q31_t sum = 0; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = (q15_t) (sum / (q31_t)blockSize);
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_mean_q31.c b/DSP/Source/StatisticsFunctions/arm_mean_q31.c
new file mode 100644
index 0000000..ea83ced
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_mean_q31.c
@@ -0,0 +1,123 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q31.c
+ * Description: Mean value of a Q31 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+
+/**
+ * @brief Mean value of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *\par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.31 format and is accumulated in a 64-bit
+ * accumulator in 33.31 format.
+ * There is no risk of internal overflow with this approach, and the
+ * full precision of intermediate result is preserved.
+ * Finally, the accumulator is truncated to yield a result of 1.31 format.
+ *
+ */
+
+void arm_mean_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in1, in2, in3, in4;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in1 = *pSrc++;
+ in2 = *pSrc++;
+ in3 = *pSrc++;
+ in4 = *pSrc++;
+
+ sum += in1;
+ sum += in2;
+ sum += in3;
+ sum += in4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = (q31_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_mean_q7.c b/DSP/Source/StatisticsFunctions/arm_mean_q7.c
new file mode 100644
index 0000000..a7bdfb8
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_mean_q7.c
@@ -0,0 +1,120 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_mean_q7.c
+ * Description: Mean value of a Q7 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup mean
+ * @{
+ */
+
+
+/**
+ * @brief Mean value of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult mean value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function is implemented using a 32-bit internal accumulator.
+ * The input is represented in 1.7 format and is accumulated in a 32-bit
+ * accumulator in 25.7 format.
+ * There is no risk of internal overflow with this approach, and the
+ * full precision of intermediate result is preserved.
+ * Finally, the accumulator is truncated to yield a result of 1.7 format.
+ *
+ */
+
+void arm_mean_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult)
+{
+ q31_t sum = 0; /* Temporary result storage */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in;
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in = *__SIMD32(pSrc)++;
+
+ sum += ((in << 24U) >> 24U);
+ sum += ((in << 16U) >> 24U);
+ sum += ((in << 8U) >> 24U);
+ sum += (in >> 24U);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ /* Store the result to the destination */
+ *pResult = (q7_t) (sum / (int32_t) blockSize);
+}
+
+/**
+ * @} end of mean group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_min_f32.c b/DSP/Source/StatisticsFunctions/arm_min_f32.c
new file mode 100644
index 0000000..858b0a2
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_min_f32.c
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_f32.c
+ * Description: Minimum value of a floating-point vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup Min Minimum
+ *
+ * Computes the minimum value of an array of data.
+ * The function returns both the minimum value and its position within the array.
+ * There are separate functions for floating-point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ */
+
+void arm_min_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ float32_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ float32_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_min_q15.c b/DSP/Source/StatisticsFunctions/arm_min_q15.c
new file mode 100644
index 0000000..fdc32b7
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_min_q15.c
@@ -0,0 +1,163 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q15.c
+ * Description: Minimum value of a Q15 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ */
+
+void arm_min_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q15_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q15_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_min_q31.c b/DSP/Source/StatisticsFunctions/arm_min_q31.c
new file mode 100644
index 0000000..fc4c155
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_min_q31.c
@@ -0,0 +1,163 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q31.c
+ * Description: Minimum value of a Q31 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ */
+
+void arm_min_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q31_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_min_q7.c b/DSP/Source/StatisticsFunctions/arm_min_q7.c
new file mode 100644
index 0000000..50362e6
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_min_q7.c
@@ -0,0 +1,163 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_min_q7.c
+ * Description: Minimum value of a Q7 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+
+/**
+ * @addtogroup Min
+ * @{
+ */
+
+
+/**
+ * @brief Minimum value of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult minimum value returned here
+ * @param[out] *pIndex index of minimum value returned here
+ * @return none.
+ */
+
+void arm_min_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q7_t * pResult,
+ uint32_t * pIndex)
+{
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q7_t minVal1, minVal2, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex, count; /* loop counter */
+
+ /* Initialise the count value. */
+ count = 0U;
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ /* Loop unrolling */
+ blkCnt = (blockSize - 1U) >> 2U;
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 1U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 2U;
+ }
+
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+ minVal2 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and its index */
+ out = minVal1;
+ outIndex = count + 3U;
+ }
+
+ /* compare for the minimum value */
+ if (out > minVal2)
+ {
+ /* Update the minimum value and its index */
+ out = minVal2;
+ outIndex = count + 4U;
+ }
+
+ count += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* if (blockSize - 1U) is not multiple of 4 */
+ blkCnt = (blockSize - 1U) % 4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q7_t minVal1, out; /* Temporary variables to store the output value. */
+ uint32_t blkCnt, outIndex; /* loop counter */
+
+ /* Initialise the index value to zero. */
+ outIndex = 0U;
+ /* Load first input value that act as reference value for comparision */
+ out = *pSrc++;
+
+ blkCnt = (blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* Initialize minVal to the next consecutive values one by one */
+ minVal1 = *pSrc++;
+
+ /* compare for the minimum value */
+ if (out > minVal1)
+ {
+ /* Update the minimum value and it's index */
+ out = minVal1;
+ outIndex = blockSize - blkCnt;
+ }
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the minimum value and it's index into destination pointers */
+ *pResult = out;
+ *pIndex = outIndex;
+}
+
+/**
+ * @} end of Min group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_power_f32.c b/DSP/Source/StatisticsFunctions/arm_power_f32.c
new file mode 100644
index 0000000..1426735
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_power_f32.c
@@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_f32.c
+ * Description: Sum of the squares of the elements of a floating-point vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup power Power
+ *
+ * Calculates the sum of the squares of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + pSrc[2] * pSrc[2] + ... + pSrc[blockSize-1] * pSrc[blockSize-1];
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, Q15, and Q7 data types.
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+
+/**
+ * @brief Sum of the squares of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ */
+
+
+void arm_power_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* accumulator */
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* compute power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the result to the destination */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_power_q15.c b/DSP/Source/StatisticsFunctions/arm_power_q15.c
new file mode 100644
index 0000000..6d95f4d
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_power_q15.c
@@ -0,0 +1,138 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q15.c
+ * Description: Sum of the squares of the elements of a Q15 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+/**
+ * @brief Sum of the squares of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the return result is in 34.30 format.
+ *
+ */
+
+void arm_power_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ q63_t sum = 0; /* Temporary result storage */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in32; /* Temporary variable to store input value */
+ q15_t in16; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+
+ /* loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in32 = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in32, in32, sum);
+ in32 = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in32, in32, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in16 = *pSrc++;
+ sum = __SMLALD(in16, in16, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q15_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Store the results in 34.30 format */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_power_q31.c b/DSP/Source/StatisticsFunctions/arm_power_q31.c
new file mode 100644
index 0000000..16be249
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_power_q31.c
@@ -0,0 +1,129 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q31.c
+ * Description: Sum of the squares of the elements of a Q31 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+/**
+ * @brief Sum of the squares of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.31 format.
+ * Intermediate multiplication yields a 2.62 format, and this
+ * result is truncated to 2.48 format by discarding the lower 14 bits.
+ * The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
+ * With 15 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the return result is in 16.48 format.
+ *
+ */
+
+void arm_power_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q63_t * pResult)
+{
+ q63_t sum = 0; /* Temporary result storage */
+ q31_t in;
+ uint32_t blkCnt; /* loop counter */
+
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power then shift intermediate results by 14 bits to maintain 16.48 format and then store the result in a temporary variable sum, providing 15 guard bits. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q63_t) in * in) >> 14U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the results in 16.48 format */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_power_q7.c b/DSP/Source/StatisticsFunctions/arm_power_q7.c
new file mode 100644
index 0000000..24306cd
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_power_q7.c
@@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_power_q7.c
+ * Description: Sum of the squares of the elements of a Q7 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup power
+ * @{
+ */
+
+/**
+ * @brief Sum of the squares of the elements of a Q7 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult sum of the squares value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 32-bit internal accumulator.
+ * The input is represented in 1.7 format.
+ * Intermediate multiplication yields a 2.14 format, and this
+ * result is added without saturation to an accumulator in 18.14 format.
+ * With 17 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the return result is in 18.14 format.
+ *
+ */
+
+void arm_power_q7(
+ q7_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q31_t sum = 0; /* Temporary result storage */
+ q7_t in; /* Temporary variable to store input */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t input1; /* Temporary variable to store packed input */
+ q31_t in1, in2; /* Temporary variables to store input */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* Reading two inputs of pSrc vector and packing */
+ input1 = *__SIMD32(pSrc)++;
+
+ in1 = __SXTB16(__ROR(input1, 8));
+ in2 = __SXTB16(input1);
+
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* calculate power and accumulate to accumulator */
+ sum = __SMLAD(in1, in1, sum);
+ sum = __SMLAD(in2, in2, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute Power and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += ((q15_t) in * in);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Store the result in 18.14 format */
+ *pResult = sum;
+}
+
+/**
+ * @} end of power group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_rms_f32.c b/DSP/Source/StatisticsFunctions/arm_rms_f32.c
new file mode 100644
index 0000000..8d1b708
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_rms_f32.c
@@ -0,0 +1,127 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_f32.c
+ * Description: Root mean square value of an array of F32 type
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup RMS Root mean square (RMS)
+ *
+ *
+ * Calculates the Root Mean Sqaure of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = sqrt(((pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]) / blockSize));
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ * @addtogroup RMS
+ * @{
+ */
+
+
+/**
+ * @brief Root Mean Square of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult rms value returned here
+ * @return none.
+ *
+ */
+
+void arm_rms_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Accumulator */
+ float32_t in; /* Tempoprary variable to store input value */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /* loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the result in a temporary variable, sum */
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *pSrc++;
+ sum += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Rms and store the result in the destination */
+ arm_sqrt_f32(sum / (float32_t) blockSize, pResult);
+}
+
+/**
+ * @} end of RMS group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_rms_q15.c b/DSP/Source/StatisticsFunctions/arm_rms_q15.c
new file mode 100644
index 0000000..d0e61ca
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_rms_q15.c
@@ -0,0 +1,139 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_q15.c
+ * Description: Root Mean Square of the elements of a Q15 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @addtogroup RMS
+ * @{
+ */
+
+/**
+ * @brief Root Mean Square of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult rms value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ * 15 bits, and then saturated to yield a result in 1.15 format.
+ *
+ */
+
+void arm_rms_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q63_t sum = 0; /* accumulator */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in; /* temporary variable to store the input value */
+ q15_t in1; /* temporary variable to store the input value */
+ uint32_t blkCnt; /* loop counter */
+
+ /* loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in, in, sum);
+ in = *__SIMD32(pSrc)++;
+ sum = __SMLALD(in, in, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in1 = *pSrc++;
+ sum = __SMLALD(in1, in1, sum);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Truncating and saturating the accumulator to 1.15 format */
+ /* Store the result in the destination */
+ arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ q15_t in; /* temporary variable to store the input value */
+ uint32_t blkCnt; /* loop counter */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *pSrc++;
+ sum += ((q31_t) in * in);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Truncating and saturating the accumulator to 1.15 format */
+ /* Store the result in the destination */
+ arm_sqrt_q15(__SSAT((sum / (q63_t)blockSize) >> 15, 16), pResult);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of RMS group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_rms_q31.c b/DSP/Source/StatisticsFunctions/arm_rms_q31.c
new file mode 100644
index 0000000..cb3c58e
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_rms_q31.c
@@ -0,0 +1,137 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_rms_q31.c
+ * Description: Root Mean Square of the elements of a Q31 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @addtogroup RMS
+ * @{
+ */
+
+
+/**
+ * @brief Root Mean Square of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult rms value returned here
+ * @return none.
+ *
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ *\par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The input is represented in 1.31 format, and intermediate multiplication
+ * yields a 2.62 format.
+ * The accumulator maintains full precision of the intermediate multiplication results,
+ * but provides only a single guard bit.
+ * There is no saturation on intermediate additions.
+ * If the accumulator overflows, it wraps around and distorts the result.
+ * In order to avoid overflows completely, the input signal must be scaled down by
+ * log2(blockSize) bits, as a total of blockSize additions are performed internally.
+ * Finally, the 2.62 accumulator is right shifted by 31 bits to yield a 1.31 format value.
+ *
+ */
+
+void arm_rms_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* accumulator */
+ q31_t in; /* Temporary variable to store the input */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ q31_t in1, in2, in3, in4; /* Temporary input variables */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 8 outputs at a time.
+ ** a second loop below computes the remaining 1 to 7 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the result in a temporary variable, sum */
+ /* read two samples from source buffer */
+ in1 = pSrc[0];
+ in2 = pSrc[1];
+
+ /* calculate power and accumulate to accumulator */
+ sum += (q63_t) in1 *in1;
+ sum += (q63_t) in2 *in2;
+
+ /* read two samples from source buffer */
+ in3 = pSrc[2];
+ in4 = pSrc[3];
+
+ /* calculate power and accumulate to accumulator */
+ sum += (q63_t) in3 *in3;
+ sum += (q63_t) in4 *in4;
+
+
+ /* update source buffer to process next samples */
+ pSrc += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 8, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ blkCnt = blockSize;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C = A[0] * A[0] + A[1] * A[1] + A[2] * A[2] + ... + A[blockSize-1] * A[blockSize-1] */
+ /* Compute sum of the squares and then store the results in a temporary variable, sum */
+ in = *pSrc++;
+ sum += (q63_t) in *in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Convert data in 2.62 to 1.31 by 31 right shifts and saturate */
+ /* Compute Rms and store the result in the destination vector */
+ arm_sqrt_q31(clip_q63_to_q31((sum / (q63_t) blockSize) >> 31), pResult);
+}
+
+/**
+ * @} end of RMS group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_std_f32.c b/DSP/Source/StatisticsFunctions/arm_std_f32.c
new file mode 100644
index 0000000..9750b88
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_std_f32.c
@@ -0,0 +1,186 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_f32.c
+ * Description: Standard deviation of the elements of a floating-point vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup STD Standard deviation
+ *
+ * Calculates the standard deviation of the elements in the input vector.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * Result = sqrt((sumOfSquares - sum<sup>2</sup> / blockSize) / (blockSize - 1))
+ *
+ * where, sumOfSquares = pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] * pSrc[blockSize-1]
+ *
+ * sum = pSrc[0] + pSrc[1] + pSrc[2] + ... + pSrc[blockSize-1]
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ * @addtogroup STD
+ * @{
+ */
+
+
+/**
+ * @brief Standard deviation of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult standard deviation value returned here
+ * @return none.
+ */
+
+void arm_std_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t sum = 0.0f; /* Temporary result storage */
+ float32_t sumOfSquares = 0.0f; /* Sum of squares */
+ float32_t in; /* input value */
+ uint32_t blkCnt; /* loop counter */
+#if defined (ARM_MATH_DSP)
+ float32_t meanOfSquares, mean, squareOfMean; /* Temporary variables */
+#else
+ float32_t squareOfSum; /* Square of Sum */
+ float32_t var; /* Temporary varaince storage */
+#endif
+
+ if (blockSize == 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++;
+ sum += in;
+ sumOfSquares += in * in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / ((float32_t) blockSize - 1.0f);
+
+ /* Compute mean of all input values */
+ mean = sum / (float32_t) blockSize;
+
+ /* Compute square of mean */
+ squareOfMean = (mean * mean) * (((float32_t) blockSize) /
+ ((float32_t) blockSize - 1.0f));
+
+ /* Compute standard deviation and then store the result to the destination */
+ arm_sqrt_f32((meanOfSquares - squareOfMean), pResult);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += in * in;
+
+ /* C = (A[0] + A[1] + ... + A[blockSize-1]) */
+ /* Compute Sum of the input samples
+ * and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute the square of sum */
+ squareOfSum = ((sum * sum) / (float32_t) blockSize);
+
+ /* Compute the variance */
+ var = ((sumOfSquares - squareOfSum) / (float32_t) (blockSize - 1.0f));
+
+ /* Compute standard deviation and then store the result to the destination */
+ arm_sqrt_f32(var, pResult);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+}
+
+/**
+ * @} end of STD group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_std_q15.c b/DSP/Source/StatisticsFunctions/arm_std_q15.c
new file mode 100644
index 0000000..2f2f52e
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_std_q15.c
@@ -0,0 +1,174 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_q15.c
+ * Description: Standard deviation of an array of Q15 vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup STD
+ * @{
+ */
+
+/**
+ * @brief Standard deviation of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult standard deviation value returned here
+ * @return none.
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ * 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_std_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ uint32_t blkCnt; /* loop counter */
+ q63_t sumOfSquares = 0; /* Accumulator */
+#if defined (ARM_MATH_DSP)
+ q31_t in; /* input value */
+ q15_t in1; /* input value */
+#else
+ q15_t in; /* input value */
+#endif
+
+ if (blockSize == 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in1 = *pSrc++;
+ sumOfSquares = __SMLALD(in1, in1, sumOfSquares);
+ sum += in1;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of the squares minus the square of the mean. */
+ /* Compute standard deviation and store the result to the destination */
+ arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of the squares minus the square of the mean. */
+ /* Compute standard deviation and store the result to the destination */
+ arm_sqrt_q15(__SSAT((meanOfSquares - squareOfMean) >> 15U, 16U), pResult);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+}
+
+/**
+ * @} end of STD group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_std_q31.c b/DSP/Source/StatisticsFunctions/arm_std_q31.c
new file mode 100644
index 0000000..f02cbdd
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_std_q31.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_std_q31.c
+ * Description: Standard deviation of an array of Q31 type.
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup STD
+ * @{
+ */
+
+/**
+ * @brief Standard deviation of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult standard deviation value returned here
+ * @return none.
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ *\par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The input is represented in 1.31 format, which is then downshifted by 8 bits
+ * which yields 1.23, and intermediate multiplication yields a 2.46 format.
+ * The accumulator maintains full precision of the intermediate multiplication results,
+ * but provides only a 16 guard bits.
+ * There is no saturation on intermediate additions.
+ * If the accumulator overflows it wraps around and distorts the result.
+ * In order to avoid overflows completely the input signal must be scaled down by
+ * log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
+ * After division, internal variables should be Q18.46
+ * Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
+ *
+ */
+
+void arm_std_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* Accumulator */
+ q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ q31_t in; /* input value */
+ uint32_t blkCnt; /* loop counter */
+ q63_t sumOfSquares = 0; /* Accumulator */
+
+ if (blockSize == 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Compute square of mean */
+ squareOfMean = sum * sum / (q63_t)(blockSize * (blockSize - 1U));
+
+ /* Compute standard deviation and then store the result to the destination */
+ arm_sqrt_q31((meanOfSquares - squareOfMean) >> 15U, pResult);
+}
+
+/**
+ * @} end of STD group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_var_f32.c b/DSP/Source/StatisticsFunctions/arm_var_f32.c
new file mode 100644
index 0000000..c0f731d
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_var_f32.c
@@ -0,0 +1,181 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_f32.c
+ * Description: Variance of the elements of a floating-point vector
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @defgroup variance Variance
+ *
+ * Calculates the variance of the elements in the input vector.
+ * The underlying algorithm used is the direct method sometimes referred to as the two-pass method:
+ *
+ * <pre>
+ * Result = sum(element - meanOfElements)^2) / numElement - 1
+ *
+ * where, meanOfElements = ( pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1] + ... + pSrc[blockSize-1] ) / blockSize
+ *
+ * </pre>
+ *
+ * There are separate functions for floating point, Q31, and Q15 data types.
+ */
+
+/**
+ * @addtogroup variance
+ * @{
+ */
+
+
+/**
+ * @brief Variance of the elements of a floating-point vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult variance value returned here
+ * @return none.
+ */
+
+void arm_var_f32(
+ float32_t * pSrc,
+ uint32_t blockSize,
+ float32_t * pResult)
+{
+ float32_t fMean, fValue;
+ uint32_t blkCnt; /* loop counter */
+ float32_t * pInput = pSrc;
+ float32_t sum = 0.0f;
+ float32_t fSum = 0.0f;
+ #if defined(ARM_MATH_DSP)
+ float32_t in1, in2, in3, in4;
+ #endif
+
+ if (blockSize <= 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+ #if defined(ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M7 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ in1 = *pInput++;
+ in2 = *pInput++;
+ in3 = *pInput++;
+ in4 = *pInput++;
+
+ sum += in1;
+ sum += in2;
+ sum += in3;
+ sum += in4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ #else
+ /* Run the below code for Cortex-M0 or Cortex-M3 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ #endif
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ sum += *pInput++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) / blockSize */
+ fMean = sum / (float32_t) blockSize;
+
+ pInput = pSrc;
+
+ #if defined(ARM_MATH_DSP)
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ blkCnt = blockSize % 0x4U;
+ #else
+ /* Run the below code for Cortex-M0 or Cortex-M3 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+ #endif
+
+ while (blkCnt > 0U)
+ {
+ fValue = *pInput++ - fMean;
+ fSum += fValue * fValue;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Variance */
+ *pResult = fSum / (float32_t)(blockSize - 1.0f);
+}
+
+/**
+ * @} end of variance group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_var_q15.c b/DSP/Source/StatisticsFunctions/arm_var_q15.c
new file mode 100644
index 0000000..5ba61f7
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_var_q15.c
@@ -0,0 +1,172 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_q15.c
+ * Description: Variance of an array of Q15 type
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup variance
+ * @{
+ */
+
+/**
+ * @brief Variance of the elements of a Q15 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult variance value returned here
+ * @return none.
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ * \par
+ * The function is implemented using a 64-bit internal accumulator.
+ * The input is represented in 1.15 format.
+ * Intermediate multiplication yields a 2.30 format, and this
+ * result is added without saturation to a 64-bit accumulator in 34.30 format.
+ * With 33 guard bits in the accumulator, there is no risk of overflow, and the
+ * full precision of the intermediate multiplication is preserved.
+ * Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
+ * 15 bits, and then saturated to yield a result in 1.15 format.
+ */
+
+void arm_var_q15(
+ q15_t * pSrc,
+ uint32_t blockSize,
+ q15_t * pResult)
+{
+ q31_t sum = 0; /* Accumulator */
+ q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ uint32_t blkCnt; /* loop counter */
+ q63_t sumOfSquares = 0; /* Accumulator */
+#if defined (ARM_MATH_DSP)
+ q31_t in; /* input value */
+ q15_t in1; /* input value */
+#else
+ q15_t in; /* input value */
+#endif
+
+ if (blockSize == 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+ in = *__SIMD32(pSrc)++;
+ sum += ((in << 16U) >> 16U);
+ sum += (in >> 16U);
+ sumOfSquares = __SMLALD(in, in, sumOfSquares);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in1 = *pSrc++;
+ sumOfSquares = __SMLALD(in1, in1, sumOfSquares);
+ sum += in1;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of the squares minus the square of the mean. */
+ *pResult = (meanOfSquares - squareOfMean) >> 15U;
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++;
+ sumOfSquares += (in * in);
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = (q31_t)(sumOfSquares / (q63_t)(blockSize - 1U));
+
+ /* Compute square of mean */
+ squareOfMean = (q31_t)((q63_t)sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
+
+ /* mean of the squares minus the square of the mean. */
+ *pResult = (meanOfSquares - squareOfMean) >> 15;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+}
+
+/**
+ * @} end of variance group
+ */
diff --git a/DSP/Source/StatisticsFunctions/arm_var_q31.c b/DSP/Source/StatisticsFunctions/arm_var_q31.c
new file mode 100644
index 0000000..526c6cd
--- /dev/null
+++ b/DSP/Source/StatisticsFunctions/arm_var_q31.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_var_q31.c
+ * Description: Variance of an array of Q31 type
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupStats
+ */
+
+/**
+ * @addtogroup variance
+ * @{
+ */
+
+/**
+ * @brief Variance of the elements of a Q31 vector.
+ * @param[in] *pSrc points to the input vector
+ * @param[in] blockSize length of the input vector
+ * @param[out] *pResult variance value returned here
+ * @return none.
+ * @details
+ * <b>Scaling and Overflow Behavior:</b>
+ *
+ *\par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The input is represented in 1.31 format, which is then downshifted by 8 bits
+ * which yields 1.23, and intermediate multiplication yields a 2.46 format.
+ * The accumulator maintains full precision of the intermediate multiplication results,
+ * but provides only a 16 guard bits.
+ * There is no saturation on intermediate additions.
+ * If the accumulator overflows it wraps around and distorts the result.
+ * In order to avoid overflows completely the input signal must be scaled down by
+ * log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
+ * After division, internal variables should be Q18.46
+ * Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
+ *
+ */
+
+void arm_var_q31(
+ q31_t * pSrc,
+ uint32_t blockSize,
+ q31_t * pResult)
+{
+ q63_t sum = 0; /* Accumulator */
+ q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
+ q31_t in; /* input value */
+ uint32_t blkCnt; /* loop counter */
+ q63_t sumOfSquares = 0; /* Accumulator */
+
+ if (blockSize == 1U)
+ {
+ *pResult = 0;
+ return;
+ }
+
+#if defined (ARM_MATH_DSP)
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /*loop Unrolling */
+ blkCnt = blockSize >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sum. */
+ in = *pSrc++ >> 8U;
+ sum += in;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U);
+
+#else
+ /* Run the below code for Cortex-M0 */
+
+ /* Loop over blockSize number of values */
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
+ /* Compute Sum of squares of the input samples
+ * and then store the result in a temporary variable, sumOfSquares. */
+ in = *pSrc++ >> 8U;
+ sumOfSquares += ((q63_t) (in) * (in));
+
+ /* C = (A[0] + A[1] + A[2] + ... + A[blockSize-1]) */
+ /* Compute sum of all input values and then store the result in a temporary variable, sum. */
+ sum += in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* Compute Mean of squares of the input samples
+ * and then store the result in a temporary variable, meanOfSquares. */
+ meanOfSquares = sumOfSquares / (q63_t)(blockSize - 1U);
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Compute square of mean */
+ squareOfMean = sum * sum / (q63_t)(blockSize * (blockSize - 1U));
+
+ /* Compute standard deviation and then store the result to the destination */
+ *pResult = (meanOfSquares - squareOfMean) >> 15U;
+}
+
+/**
+ * @} end of variance group
+ */