summaryrefslogtreecommitdiff
path: root/DSP/Source/ComplexMathFunctions
diff options
context:
space:
mode:
authorAli Labbene <ali.labbene@st.com>2019-12-11 08:59:21 +0100
committerAli Labbene <ali.labbene@st.com>2019-12-16 16:35:24 +0100
commit9f95ff5b6ba01db09552b84a0ab79607060a2666 (patch)
tree8a6e0dda832555c692307869aed49d07ee7facfe /DSP/Source/ComplexMathFunctions
parent76177aa280494bb36d7a0bcbda1078d4db717020 (diff)
downloadst-cmsis-core-lowfat-9f95ff5b6ba01db09552b84a0ab79607060a2666.tar.gz
st-cmsis-core-lowfat-9f95ff5b6ba01db09552b84a0ab79607060a2666.tar.bz2
st-cmsis-core-lowfat-9f95ff5b6ba01db09552b84a0ab79607060a2666.zip
Official ARM version: v5.4.0
Add CMSIS V5.4.0, please refer to index.html available under \docs folder. Note: content of \CMSIS\Core\Include has been copied under \Include to keep the same structure used in existing projects, and thus avoid projects mass update Note: the following components have been removed from ARM original delivery (as not used in ST packages) - CMSIS_EW2018.pdf - .gitattributes - .gitignore - \Device - \CMSIS - \CoreValidation - \DAP - \Documentation - \DoxyGen - \Driver - \Pack - \RTOS\CMSIS_RTOS_Tutorial.pdf - \RTOS\RTX - \RTOS\Template - \RTOS2\RTX - \Utilities - All ARM/GCC projects files are deleted from \DSP, \RTOS and \RTOS2 Change-Id: Ia026c3f0f0d016627a4fb5a9032852c33d24b4d3
Diffstat (limited to 'DSP/Source/ComplexMathFunctions')
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c171
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c149
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c169
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c191
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c177
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c175
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c153
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c141
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c173
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c204
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c136
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c149
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c196
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c181
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c314
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c213
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c191
-rw-r--r--DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c211
18 files changed, 3294 insertions, 0 deletions
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
new file mode 100644
index 0000000..cfb6f1f
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c
@@ -0,0 +1,171 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_conj_f32.c
+ * Description: Floating-point complex conjugate
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @defgroup cmplx_conj Complex Conjugate
+ *
+ * Conjugates the elements of a complex data vector.
+ *
+ * The <code>pSrc</code> points to the source data and
+ * <code>pDst</code> points to the where the result should be written.
+ * <code>numSamples</code> specifies the number of complex samples
+ * and the data in each array is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * Each array has a total of <code>2*numSamples</code> values.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * for(n=0; n<numSamples; n++) {
+ * pDst[(2*n)+0)] = pSrc[(2*n)+0]; // real part
+ * pDst[(2*n)+1)] = -pSrc[(2*n)+1]; // imag part
+ * }
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+ * @addtogroup cmplx_conj
+ * @{
+ */
+
+/**
+ * @brief Floating-point complex conjugate.
+ * @param *pSrc points to the input vector
+ * @param *pDst points to the output vector
+ * @param numSamples number of complex samples in each vector
+ * @return none.
+ */
+
+void arm_cmplx_conj_f32(
+ float32_t * pSrc,
+ float32_t * pDst,
+ uint32_t numSamples)
+{
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ float32_t inR1, inR2, inR3, inR4;
+ float32_t inI1, inI2, inI3, inI4;
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ /* read real input samples */
+ inR1 = pSrc[0];
+ /* store real samples to destination */
+ pDst[0] = inR1;
+ inR2 = pSrc[2];
+ pDst[2] = inR2;
+ inR3 = pSrc[4];
+ pDst[4] = inR3;
+ inR4 = pSrc[6];
+ pDst[6] = inR4;
+
+ /* read imaginary input samples */
+ inI1 = pSrc[1];
+ inI2 = pSrc[3];
+
+ /* conjugate input */
+ inI1 = -inI1;
+
+ /* read imaginary input samples */
+ inI3 = pSrc[5];
+
+ /* conjugate input */
+ inI2 = -inI2;
+
+ /* read imaginary input samples */
+ inI4 = pSrc[7];
+
+ /* conjugate input */
+ inI3 = -inI3;
+
+ /* store imaginary samples to destination */
+ pDst[1] = inI1;
+ pDst[3] = inI2;
+
+ /* conjugate input */
+ inI4 = -inI4;
+
+ /* store imaginary samples to destination */
+ pDst[5] = inI3;
+
+ /* increment source pointer by 8 to process next sampels */
+ pSrc += 8U;
+
+ /* store imaginary sample to destination */
+ pDst[7] = inI4;
+
+ /* increment destination pointer by 8 to store next samples */
+ pDst += 8U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* realOut + j (imagOut) = realIn + j (-1) imagIn */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ *pDst++ = *pSrc++;
+ *pDst++ = -*pSrc++;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+}
+
+/**
+ * @} end of cmplx_conj group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
new file mode 100644
index 0000000..7950229
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c
@@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_conj_q15.c
+ * Description: Q15 complex conjugate
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_conj
+ * @{
+ */
+
+/**
+ * @brief Q15 complex conjugate.
+ * @param *pSrc points to the input vector
+ * @param *pDst points to the output vector
+ * @param numSamples number of complex samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function uses saturating arithmetic.
+ * The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
+ */
+
+void arm_cmplx_conj_q15(
+ q15_t * pSrc,
+ q15_t * pDst,
+ uint32_t numSamples)
+{
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+ q31_t in1, in2, in3, in4;
+ q31_t zero = 0;
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ in1 = *__SIMD32(pSrc)++;
+ in2 = *__SIMD32(pSrc)++;
+ in3 = *__SIMD32(pSrc)++;
+ in4 = *__SIMD32(pSrc)++;
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+ in1 = __QASX(zero, in1);
+ in2 = __QASX(zero, in2);
+ in3 = __QASX(zero, in3);
+ in4 = __QASX(zero, in4);
+
+#else
+
+ in1 = __QSAX(zero, in1);
+ in2 = __QSAX(zero, in2);
+ in3 = __QSAX(zero, in3);
+ in4 = __QSAX(zero, in4);
+
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
+ in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
+ in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
+ in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
+
+ *__SIMD32(pDst)++ = in1;
+ *__SIMD32(pDst)++ = in2;
+ *__SIMD32(pDst)++ = in3;
+ *__SIMD32(pDst)++ = in4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ *pDst++ = *pSrc++;
+ *pDst++ = __SSAT(-*pSrc++, 16);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ q15_t in;
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ /* realOut + j (imagOut) = realIn+ j (-1) imagIn */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ *pDst++ = *pSrc++;
+ in = *pSrc++;
+ *pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of cmplx_conj group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
new file mode 100644
index 0000000..709ce0e
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c
@@ -0,0 +1,169 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_conj_q31.c
+ * Description: Q31 complex conjugate
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_conj
+ * @{
+ */
+
+/**
+ * @brief Q31 complex conjugate.
+ * @param *pSrc points to the input vector
+ * @param *pDst points to the output vector
+ * @param numSamples number of complex samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function uses saturating arithmetic.
+ * The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
+ */
+
+void arm_cmplx_conj_q31(
+ q31_t * pSrc,
+ q31_t * pDst,
+ uint32_t numSamples)
+{
+ uint32_t blkCnt; /* loop counter */
+ q31_t in; /* Input value */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t inR1, inR2, inR3, inR4; /* Temporary real variables */
+ q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ /* Saturated to 0x7fffffff if the input is -1(0x80000000) */
+ /* read real input sample */
+ inR1 = pSrc[0];
+ /* store real input sample */
+ pDst[0] = inR1;
+
+ /* read imaginary input sample */
+ inI1 = pSrc[1];
+
+ /* read real input sample */
+ inR2 = pSrc[2];
+ /* store real input sample */
+ pDst[2] = inR2;
+
+ /* read imaginary input sample */
+ inI2 = pSrc[3];
+
+ /* negate imaginary input sample */
+ inI1 = __QSUB(0, inI1);
+
+ /* read real input sample */
+ inR3 = pSrc[4];
+ /* store real input sample */
+ pDst[4] = inR3;
+
+ /* read imaginary input sample */
+ inI3 = pSrc[5];
+
+ /* negate imaginary input sample */
+ inI2 = __QSUB(0, inI2);
+
+ /* read real input sample */
+ inR4 = pSrc[6];
+ /* store real input sample */
+ pDst[6] = inR4;
+
+ /* negate imaginary input sample */
+ inI3 = __QSUB(0, inI3);
+
+ /* store imaginary input sample */
+ inI4 = pSrc[7];
+
+ /* store imaginary input samples */
+ pDst[1] = inI1;
+
+ /* negate imaginary input sample */
+ inI4 = __QSUB(0, inI4);
+
+ /* store imaginary input samples */
+ pDst[3] = inI2;
+
+ /* increment source pointer by 8 to proecess next samples */
+ pSrc += 8U;
+
+ /* store imaginary input samples */
+ pDst[5] = inI3;
+ pDst[7] = inI4;
+
+ /* increment destination pointer by 8 to process next samples */
+ pDst += 8U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = numSamples;
+
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C[0]+jC[1] = A[0]+ j (-1) A[1] */
+ /* Calculate Complex Conjugate and then store the results in the destination buffer. */
+ /* Saturated to 0x7fffffff if the input is -1(0x80000000) */
+ *pDst++ = *pSrc++;
+ in = *pSrc++;
+ *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+}
+
+/**
+ * @} end of cmplx_conj group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
new file mode 100644
index 0000000..bfc352b
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c
@@ -0,0 +1,191 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_dot_prod_f32.c
+ * Description: Floating-point complex dot product
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @defgroup cmplx_dot_prod Complex Dot Product
+ *
+ * Computes the dot product of two complex vectors.
+ * The vectors are multiplied element-by-element and then summed.
+ *
+ * The <code>pSrcA</code> points to the first complex input vector and
+ * <code>pSrcB</code> points to the second complex input vector.
+ * <code>numSamples</code> specifies the number of complex samples
+ * and the data in each array is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * Each array has a total of <code>2*numSamples</code> values.
+ *
+ * The underlying algorithm is used:
+ * <pre>
+ * realResult=0;
+ * imagResult=0;
+ * for(n=0; n<numSamples; n++) {
+ * realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
+ * imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
+ * }
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+ * @addtogroup cmplx_dot_prod
+ * @{
+ */
+
+/**
+ * @brief Floating-point complex dot product
+ * @param *pSrcA points to the first input vector
+ * @param *pSrcB points to the second input vector
+ * @param numSamples number of complex samples in each vector
+ * @param *realResult real part of the result returned here
+ * @param *imagResult imaginary part of the result returned here
+ * @return none.
+ */
+
+void arm_cmplx_dot_prod_f32(
+ float32_t * pSrcA,
+ float32_t * pSrcB,
+ uint32_t numSamples,
+ float32_t * realResult,
+ float32_t * imagResult)
+{
+ float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result storage */
+ float32_t a0,b0,c0,d0;
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples & 0x3U;
+
+ while (blkCnt > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Store the real and imaginary results in the destination buffers */
+ *realResult = real_sum;
+ *imagResult = imag_sum;
+}
+
+/**
+ * @} end of cmplx_dot_prod group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
new file mode 100644
index 0000000..9e23a01
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c
@@ -0,0 +1,177 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_dot_prod_q15.c
+ * Description: Processing function for the Q15 Complex Dot product
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_dot_prod
+ * @{
+ */
+
+/**
+ * @brief Q15 complex dot product
+ * @param *pSrcA points to the first input vector
+ * @param *pSrcB points to the second input vector
+ * @param numSamples number of complex samples in each vector
+ * @param *realResult real part of the result returned here
+ * @param *imagResult imaginary part of the result returned here
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
+ * These are accumulated in a 64-bit accumulator with 34.30 precision.
+ * As a final step, the accumulators are converted to 8.24 format.
+ * The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
+ */
+
+void arm_cmplx_dot_prod_q15(
+ q15_t * pSrcA,
+ q15_t * pSrcB,
+ uint32_t numSamples,
+ q31_t * realResult,
+ q31_t * imagResult)
+{
+ q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
+ q15_t a0,b0,c0,d0;
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += (q31_t)a0 * c0;
+ imag_sum += (q31_t)a0 * d0;
+ real_sum -= (q31_t)b0 * d0;
+ imag_sum += (q31_t)b0 * c0;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += (q31_t)a0 * c0;
+ imag_sum += (q31_t)a0 * d0;
+ real_sum -= (q31_t)b0 * d0;
+ imag_sum += (q31_t)b0 * c0;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += (q31_t)a0 * c0;
+ imag_sum += (q31_t)a0 * d0;
+ real_sum -= (q31_t)b0 * d0;
+ imag_sum += (q31_t)b0 * c0;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += (q31_t)a0 * c0;
+ imag_sum += (q31_t)a0 * d0;
+ real_sum -= (q31_t)b0 * d0;
+ imag_sum += (q31_t)b0 * c0;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += (q31_t)a0 * c0;
+ imag_sum += (q31_t)a0 * d0;
+ real_sum -= (q31_t)b0 * d0;
+ imag_sum += (q31_t)b0 * c0;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += a0 * c0;
+ imag_sum += a0 * d0;
+ real_sum -= b0 * d0;
+ imag_sum += b0 * c0;
+
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Store the real and imaginary results in 8.24 format */
+ /* Convert real data in 34.30 to 8.24 by 6 right shifts */
+ *realResult = (q31_t) (real_sum >> 6);
+ /* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
+ *imagResult = (q31_t) (imag_sum >> 6);
+}
+
+/**
+ * @} end of cmplx_dot_prod group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
new file mode 100644
index 0000000..6eb5b6e
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c
@@ -0,0 +1,175 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_dot_prod_q31.c
+ * Description: Q31 complex dot product
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_dot_prod
+ * @{
+ */
+
+/**
+ * @brief Q31 complex dot product
+ * @param *pSrcA points to the first input vector
+ * @param *pSrcB points to the second input vector
+ * @param numSamples number of complex samples in each vector
+ * @param *realResult real part of the result returned here
+ * @param *imagResult imaginary part of the result returned here
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function is implemented using an internal 64-bit accumulator.
+ * The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
+ * The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
+ * Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
+ * The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
+ * Input down scaling is not required.
+ */
+
+void arm_cmplx_dot_prod_q31(
+ q31_t * pSrcA,
+ q31_t * pSrcB,
+ uint32_t numSamples,
+ q63_t * realResult,
+ q63_t * imagResult)
+{
+ q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
+ q31_t a0,b0,c0,d0;
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += ((q63_t)a0 * c0) >> 14;
+ imag_sum += ((q63_t)a0 * d0) >> 14;
+ real_sum -= ((q63_t)b0 * d0) >> 14;
+ imag_sum += ((q63_t)b0 * c0) >> 14;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += ((q63_t)a0 * c0) >> 14;
+ imag_sum += ((q63_t)a0 * d0) >> 14;
+ real_sum -= ((q63_t)b0 * d0) >> 14;
+ imag_sum += ((q63_t)b0 * c0) >> 14;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += ((q63_t)a0 * c0) >> 14;
+ imag_sum += ((q63_t)a0 * d0) >> 14;
+ real_sum -= ((q63_t)b0 * d0) >> 14;
+ imag_sum += ((q63_t)b0 * c0) >> 14;
+
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += ((q63_t)a0 * c0) >> 14;
+ imag_sum += ((q63_t)a0 * d0) >> 14;
+ real_sum -= ((q63_t)b0 * d0) >> 14;
+ imag_sum += ((q63_t)b0 * c0) >> 14;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += ((q63_t)a0 * c0) >> 14;
+ imag_sum += ((q63_t)a0 * d0) >> 14;
+ real_sum -= ((q63_t)b0 * d0) >> 14;
+ imag_sum += ((q63_t)b0 * c0) >> 14;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ a0 = *pSrcA++;
+ b0 = *pSrcA++;
+ c0 = *pSrcB++;
+ d0 = *pSrcB++;
+
+ real_sum += ((q63_t)a0 * c0) >> 14;
+ imag_sum += ((q63_t)a0 * d0) >> 14;
+ real_sum -= ((q63_t)b0 * d0) >> 14;
+ imag_sum += ((q63_t)b0 * c0) >> 14;
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ /* Store the real and imaginary results in 16.48 format */
+ *realResult = real_sum;
+ *imagResult = imag_sum;
+}
+
+/**
+ * @} end of cmplx_dot_prod group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
new file mode 100644
index 0000000..95aaf1e
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c
@@ -0,0 +1,153 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mag_f32.c
+ * Description: Floating-point complex magnitude
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @defgroup cmplx_mag Complex Magnitude
+ *
+ * Computes the magnitude of the elements of a complex data vector.
+ *
+ * The <code>pSrc</code> points to the source data and
+ * <code>pDst</code> points to the where the result should be written.
+ * <code>numSamples</code> specifies the number of complex samples
+ * in the input array and the data is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * The input array has a total of <code>2*numSamples</code> values;
+ * the output array has a total of <code>numSamples</code> values.
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * for(n=0; n<numSamples; n++) {
+ * pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
+ * }
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+ * @addtogroup cmplx_mag
+ * @{
+ */
+/**
+ * @brief Floating-point complex magnitude.
+ * @param[in] *pSrc points to complex input buffer
+ * @param[out] *pDst points to real output buffer
+ * @param[in] numSamples number of complex samples in the input vector
+ * @return none.
+ *
+ */
+
+
+void arm_cmplx_mag_f32(
+ float32_t * pSrc,
+ float32_t * pDst,
+ uint32_t numSamples)
+{
+ float32_t realIn, imagIn; /* Temporary variables to hold input values */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+
+ /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+ realIn = *pSrc++;
+ imagIn = *pSrc++;
+ /* store the result in the destination buffer. */
+ arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
+
+ realIn = *pSrc++;
+ imagIn = *pSrc++;
+ arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
+
+ realIn = *pSrc++;
+ imagIn = *pSrc++;
+ arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
+
+ realIn = *pSrc++;
+ imagIn = *pSrc++;
+ arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
+
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+ realIn = *pSrc++;
+ imagIn = *pSrc++;
+ /* store the result in the destination buffer. */
+ arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ /* out = sqrt((real * real) + (imag * imag)) */
+ realIn = *pSrc++;
+ imagIn = *pSrc++;
+ /* store the result in the destination buffer. */
+ arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of cmplx_mag group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
new file mode 100644
index 0000000..03d9b2a
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c
@@ -0,0 +1,141 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mag_q15.c
+ * Description: Q15 complex magnitude
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_mag
+ * @{
+ */
+
+
+/**
+ * @brief Q15 complex magnitude
+ * @param *pSrc points to the complex input vector
+ * @param *pDst points to the real output vector
+ * @param numSamples number of complex samples in the input vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
+ */
+
+void arm_cmplx_mag_q15(
+ q15_t * pSrc,
+ q15_t * pDst,
+ uint32_t numSamples)
+{
+ q31_t acc0, acc1; /* Accumulators */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+ q31_t in1, in2, in3, in4;
+ q31_t acc2, acc3;
+
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+
+ /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+ in1 = *__SIMD32(pSrc)++;
+ in2 = *__SIMD32(pSrc)++;
+ in3 = *__SIMD32(pSrc)++;
+ in4 = *__SIMD32(pSrc)++;
+
+ acc0 = __SMUAD(in1, in1);
+ acc1 = __SMUAD(in2, in2);
+ acc2 = __SMUAD(in3, in3);
+ acc3 = __SMUAD(in4, in4);
+
+ /* store the result in 2.14 format in the destination buffer. */
+ arm_sqrt_q15((q15_t) ((acc0) >> 17), pDst++);
+ arm_sqrt_q15((q15_t) ((acc1) >> 17), pDst++);
+ arm_sqrt_q15((q15_t) ((acc2) >> 17), pDst++);
+ arm_sqrt_q15((q15_t) ((acc3) >> 17), pDst++);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+ in1 = *__SIMD32(pSrc)++;
+ acc0 = __SMUAD(in1, in1);
+
+ /* store the result in 2.14 format in the destination buffer. */
+ arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q15_t real, imag; /* Temporary variables to hold input values */
+
+ while (numSamples > 0U)
+ {
+ /* out = sqrt(real * real + imag * imag) */
+ real = *pSrc++;
+ imag = *pSrc++;
+
+ acc0 = (real * real);
+ acc1 = (imag * imag);
+
+ /* store the result in 2.14 format in the destination buffer. */
+ arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of cmplx_mag group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
new file mode 100644
index 0000000..830ecb9
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c
@@ -0,0 +1,173 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mag_q31.c
+ * Description: Q31 complex magnitude
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_mag
+ * @{
+ */
+
+/**
+ * @brief Q31 complex magnitude
+ * @param *pSrc points to the complex input vector
+ * @param *pDst points to the real output vector
+ * @param numSamples number of complex samples in the input vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
+ * Input down scaling is not required.
+ */
+
+void arm_cmplx_mag_q31(
+ q31_t * pSrc,
+ q31_t * pDst,
+ uint32_t numSamples)
+{
+ q31_t real, imag; /* Temporary variables to hold input values */
+ q31_t acc0, acc1; /* Accumulators */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ q31_t real1, real2, imag1, imag2; /* Temporary variables to hold input values */
+ q31_t out1, out2, out3, out4; /* Accumulators */
+ q63_t mul1, mul2, mul3, mul4; /* Temporary variables */
+
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* read complex input from source buffer */
+ real1 = pSrc[0];
+ imag1 = pSrc[1];
+ real2 = pSrc[2];
+ imag2 = pSrc[3];
+
+ /* calculate power of input values */
+ mul1 = (q63_t) real1 *real1;
+ mul2 = (q63_t) imag1 *imag1;
+ mul3 = (q63_t) real2 *real2;
+ mul4 = (q63_t) imag2 *imag2;
+
+ /* get the result to 3.29 format */
+ out1 = (q31_t) (mul1 >> 33);
+ out2 = (q31_t) (mul2 >> 33);
+ out3 = (q31_t) (mul3 >> 33);
+ out4 = (q31_t) (mul4 >> 33);
+
+ /* add real and imaginary accumulators */
+ out1 = out1 + out2;
+ out3 = out3 + out4;
+
+ /* read complex input from source buffer */
+ real1 = pSrc[4];
+ imag1 = pSrc[5];
+ real2 = pSrc[6];
+ imag2 = pSrc[7];
+
+ /* calculate square root */
+ arm_sqrt_q31(out1, &pDst[0]);
+
+ /* calculate power of input values */
+ mul1 = (q63_t) real1 *real1;
+
+ /* calculate square root */
+ arm_sqrt_q31(out3, &pDst[1]);
+
+ /* calculate power of input values */
+ mul2 = (q63_t) imag1 *imag1;
+ mul3 = (q63_t) real2 *real2;
+ mul4 = (q63_t) imag2 *imag2;
+
+ /* get the result to 3.29 format */
+ out1 = (q31_t) (mul1 >> 33);
+ out2 = (q31_t) (mul2 >> 33);
+ out3 = (q31_t) (mul3 >> 33);
+ out4 = (q31_t) (mul4 >> 33);
+
+ /* add real and imaginary accumulators */
+ out1 = out1 + out2;
+ out3 = out3 + out4;
+
+ /* calculate square root */
+ arm_sqrt_q31(out1, &pDst[2]);
+
+ /* increment destination by 8 to process next samples */
+ pSrc += 8U;
+
+ /* calculate square root */
+ arm_sqrt_q31(out3, &pDst[3]);
+
+ /* increment destination by 4 to process next samples */
+ pDst += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 2.30 format in the destination buffer. */
+ arm_sqrt_q31(acc0 + acc1, pDst++);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+}
+
+/**
+ * @} end of cmplx_mag group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
new file mode 100644
index 0000000..59127a2
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c
@@ -0,0 +1,204 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mag_squared_f32.c
+ * Description: Floating-point complex magnitude squared
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @defgroup cmplx_mag_squared Complex Magnitude Squared
+ *
+ * Computes the magnitude squared of the elements of a complex data vector.
+ *
+ * The <code>pSrc</code> points to the source data and
+ * <code>pDst</code> points to the where the result should be written.
+ * <code>numSamples</code> specifies the number of complex samples
+ * in the input array and the data is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * The input array has a total of <code>2*numSamples</code> values;
+ * the output array has a total of <code>numSamples</code> values.
+ *
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * for(n=0; n<numSamples; n++) {
+ * pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
+ * }
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+ * @addtogroup cmplx_mag_squared
+ * @{
+ */
+
+
+/**
+ * @brief Floating-point complex magnitude squared
+ * @param[in] *pSrc points to the complex input vector
+ * @param[out] *pDst points to the real output vector
+ * @param[in] numSamples number of complex samples in the input vector
+ * @return none.
+ */
+
+void arm_cmplx_mag_squared_f32(
+ float32_t * pSrc,
+ float32_t * pDst,
+ uint32_t numSamples)
+{
+ float32_t real, imag; /* Temporary variables to store real and imaginary values */
+ uint32_t blkCnt; /* loop counter */
+
+#if defined (ARM_MATH_DSP)
+ float32_t real1, real2, real3, real4; /* Temporary variables to hold real values */
+ float32_t imag1, imag2, imag3, imag4; /* Temporary variables to hold imaginary values */
+ float32_t mul1, mul2, mul3, mul4; /* Temporary variables */
+ float32_t mul5, mul6, mul7, mul8; /* Temporary variables */
+ float32_t out1, out2, out3, out4; /* Temporary variables to hold output values */
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+ /* read real input sample from source buffer */
+ real1 = pSrc[0];
+ /* read imaginary input sample from source buffer */
+ imag1 = pSrc[1];
+
+ /* calculate power of real value */
+ mul1 = real1 * real1;
+
+ /* read real input sample from source buffer */
+ real2 = pSrc[2];
+
+ /* calculate power of imaginary value */
+ mul2 = imag1 * imag1;
+
+ /* read imaginary input sample from source buffer */
+ imag2 = pSrc[3];
+
+ /* calculate power of real value */
+ mul3 = real2 * real2;
+
+ /* read real input sample from source buffer */
+ real3 = pSrc[4];
+
+ /* calculate power of imaginary value */
+ mul4 = imag2 * imag2;
+
+ /* read imaginary input sample from source buffer */
+ imag3 = pSrc[5];
+
+ /* calculate power of real value */
+ mul5 = real3 * real3;
+ /* calculate power of imaginary value */
+ mul6 = imag3 * imag3;
+
+ /* read real input sample from source buffer */
+ real4 = pSrc[6];
+
+ /* accumulate real and imaginary powers */
+ out1 = mul1 + mul2;
+
+ /* read imaginary input sample from source buffer */
+ imag4 = pSrc[7];
+
+ /* accumulate real and imaginary powers */
+ out2 = mul3 + mul4;
+
+ /* calculate power of real value */
+ mul7 = real4 * real4;
+ /* calculate power of imaginary value */
+ mul8 = imag4 * imag4;
+
+ /* store output to destination */
+ pDst[0] = out1;
+
+ /* accumulate real and imaginary powers */
+ out3 = mul5 + mul6;
+
+ /* store output to destination */
+ pDst[1] = out2;
+
+ /* accumulate real and imaginary powers */
+ out4 = mul7 + mul8;
+
+ /* store output to destination */
+ pDst[2] = out3;
+
+ /* increment destination pointer by 8 to process next samples */
+ pSrc += 8U;
+
+ /* store output to destination */
+ pDst[3] = out4;
+
+ /* increment destination pointer by 4 to process next samples */
+ pDst += 4U;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+ real = *pSrc++;
+ imag = *pSrc++;
+
+ /* out = (real * real) + (imag * imag) */
+ /* store the result in the destination buffer. */
+ *pDst++ = (real * real) + (imag * imag);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+}
+
+/**
+ * @} end of cmplx_mag_squared group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
new file mode 100644
index 0000000..3f740c3
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c
@@ -0,0 +1,136 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mag_squared_q15.c
+ * Description: Q15 complex magnitude squared
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_mag_squared
+ * @{
+ */
+
+/**
+ * @brief Q15 complex magnitude squared
+ * @param *pSrc points to the complex input vector
+ * @param *pDst points to the real output vector
+ * @param numSamples number of complex samples in the input vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
+ */
+
+void arm_cmplx_mag_squared_q15(
+ q15_t * pSrc,
+ q15_t * pDst,
+ uint32_t numSamples)
+{
+ q31_t acc0, acc1; /* Accumulators */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+ q31_t in1, in2, in3, in4;
+ q31_t acc2, acc3;
+
+ /*loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+ in1 = *__SIMD32(pSrc)++;
+ in2 = *__SIMD32(pSrc)++;
+ in3 = *__SIMD32(pSrc)++;
+ in4 = *__SIMD32(pSrc)++;
+
+ acc0 = __SMUAD(in1, in1);
+ acc1 = __SMUAD(in2, in2);
+ acc2 = __SMUAD(in3, in3);
+ acc3 = __SMUAD(in4, in4);
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ = (q15_t) (acc0 >> 17);
+ *pDst++ = (q15_t) (acc1 >> 17);
+ *pDst++ = (q15_t) (acc2 >> 17);
+ *pDst++ = (q15_t) (acc3 >> 17);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+ in1 = *__SIMD32(pSrc)++;
+ acc0 = __SMUAD(in1, in1);
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ = (q15_t) (acc0 >> 17);
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ q15_t real, imag; /* Temporary variables to store real and imaginary values */
+
+ while (numSamples > 0U)
+ {
+ /* out = ((real * real) + (imag * imag)) */
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (real * real);
+ acc1 = (imag * imag);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of cmplx_mag_squared group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
new file mode 100644
index 0000000..c2b2c50
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c
@@ -0,0 +1,149 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mag_squared_q31.c
+ * Description: Q31 complex magnitude squared
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup cmplx_mag_squared
+ * @{
+ */
+
+
+/**
+ * @brief Q31 complex magnitude squared
+ * @param *pSrc points to the complex input vector
+ * @param *pDst points to the real output vector
+ * @param numSamples number of complex samples in the input vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
+ * Input down scaling is not required.
+ */
+
+void arm_cmplx_mag_squared_q31(
+ q31_t * pSrc,
+ q31_t * pDst,
+ uint32_t numSamples)
+{
+ q31_t real, imag; /* Temporary variables to store real and imaginary values */
+ q31_t acc0, acc1; /* Accumulators */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counter */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 3.29 format in the destination buffer. */
+ *pDst++ = acc0 + acc1;
+
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 3.29 format in the destination buffer. */
+ *pDst++ = acc0 + acc1;
+
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 3.29 format in the destination buffer. */
+ *pDst++ = acc0 + acc1;
+
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 3.29 format in the destination buffer. */
+ *pDst++ = acc0 + acc1;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 3.29 format in the destination buffer. */
+ *pDst++ = acc0 + acc1;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ /* out = ((real * real) + (imag * imag)) */
+ real = *pSrc++;
+ imag = *pSrc++;
+ acc0 = (q31_t) (((q63_t) real * real) >> 33);
+ acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
+ /* store the result in 3.29 format in the destination buffer. */
+ *pDst++ = acc0 + acc1;
+
+ /* Decrement the loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of cmplx_mag_squared group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
new file mode 100644
index 0000000..3717591
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c
@@ -0,0 +1,196 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mult_cmplx_f32.c
+ * Description: Floating-point complex-by-complex multiplication
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
+ *
+ * Multiplies a complex vector by another complex vector and generates a complex result.
+ * The data in the complex arrays is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * The parameter <code>numSamples</code> represents the number of complex
+ * samples processed. The complex arrays have a total of <code>2*numSamples</code>
+ * real values.
+ *
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * for(n=0; n<numSamples; n++) {
+ * pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
+ * pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
+ * }
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+ * @addtogroup CmplxByCmplxMult
+ * @{
+ */
+
+
+/**
+ * @brief Floating-point complex-by-complex multiplication
+ * @param[in] *pSrcA points to the first input vector
+ * @param[in] *pSrcB points to the second input vector
+ * @param[out] *pDst points to the output vector
+ * @param[in] numSamples number of complex samples in each vector
+ * @return none.
+ */
+
+void arm_cmplx_mult_cmplx_f32(
+ float32_t * pSrcA,
+ float32_t * pSrcB,
+ float32_t * pDst,
+ uint32_t numSamples)
+{
+ float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */
+ uint32_t blkCnt; /* loop counters */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */
+ float32_t acc1, acc2, acc3, acc4;
+
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a1 = *pSrcA; /* A[2 * i] */
+ c1 = *pSrcB; /* B[2 * i] */
+
+ b1 = *(pSrcA + 1); /* A[2 * i + 1] */
+ acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */
+
+ a2 = *(pSrcA + 2); /* A[2 * i + 2] */
+ acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */
+
+ d1 = *(pSrcB + 1); /* B[2 * i + 1] */
+ c2 = *(pSrcB + 2); /* B[2 * i + 2] */
+ acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
+
+ d2 = *(pSrcB + 3); /* B[2 * i + 3] */
+ acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */
+
+ b2 = *(pSrcA + 3); /* A[2 * i + 3] */
+ acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
+
+ a1 = *(pSrcA + 4); /* A[2 * i + 4] */
+ acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */
+
+ c1 = *(pSrcB + 4); /* B[2 * i + 4] */
+ acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
+ *pDst = acc1; /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
+
+ b1 = *(pSrcA + 5); /* A[2 * i + 5] */
+ acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
+
+ *(pDst + 1) = acc2; /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
+ acc1 = (a1 * c1);
+
+ d1 = *(pSrcB + 5);
+ acc2 = (b1 * c1);
+
+ *(pDst + 2) = acc3;
+ *(pDst + 3) = acc4;
+
+ a2 = *(pSrcA + 6);
+ acc1 -= (b1 * d1);
+
+ c2 = *(pSrcB + 6);
+ acc2 += (a1 * d1);
+
+ b2 = *(pSrcA + 7);
+ acc3 = (a2 * c2);
+
+ d2 = *(pSrcB + 7);
+ acc4 = (b2 * c2);
+
+ *(pDst + 4) = acc1;
+ pSrcA += 8U;
+
+ acc3 -= (b2 * d2);
+ acc4 += (a2 * d2);
+
+ *(pDst + 5) = acc2;
+ pSrcB += 8U;
+
+ *(pDst + 6) = acc3;
+ *(pDst + 7) = acc4;
+
+ pDst += 8U;
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a1 = *pSrcA++;
+ b1 = *pSrcA++;
+ c1 = *pSrcB++;
+ d1 = *pSrcB++;
+
+ /* store the result in the destination buffer. */
+ *pDst++ = (a1 * c1) - (b1 * d1);
+ *pDst++ = (a1 * d1) + (b1 * c1);
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+}
+
+/**
+ * @} end of CmplxByCmplxMult group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
new file mode 100644
index 0000000..2869837
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c
@@ -0,0 +1,181 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mult_cmplx_q15.c
+ * Description: Q15 complex-by-complex multiplication
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup CmplxByCmplxMult
+ * @{
+ */
+
+/**
+ * @brief Q15 complex-by-complex multiplication
+ * @param[in] *pSrcA points to the first input vector
+ * @param[in] *pSrcB points to the second input vector
+ * @param[out] *pDst points to the output vector
+ * @param[in] numSamples number of complex samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
+ */
+
+void arm_cmplx_mult_cmplx_q15(
+ q15_t * pSrcA,
+ q15_t * pSrcB,
+ q15_t * pDst,
+ uint32_t numSamples)
+{
+ q15_t a, b, c, d; /* Temporary variables to store real and imaginary values */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counters */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
+
+ /* Decrement the blockSize loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
+
+ /* Decrement the blockSize loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
+ /* store the result in 3.13 format in the destination buffer. */
+ *pDst++ =
+ (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
+
+ /* Decrement the blockSize loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of CmplxByCmplxMult group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
new file mode 100644
index 0000000..b01c4f6
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c
@@ -0,0 +1,314 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mult_cmplx_q31.c
+ * Description: Q31 complex-by-complex multiplication
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup CmplxByCmplxMult
+ * @{
+ */
+
+
+/**
+ * @brief Q31 complex-by-complex multiplication
+ * @param[in] *pSrcA points to the first input vector
+ * @param[in] *pSrcB points to the second input vector
+ * @param[out] *pDst points to the output vector
+ * @param[in] numSamples number of complex samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
+ * Input down scaling is not required.
+ */
+
+void arm_cmplx_mult_cmplx_q31(
+ q31_t * pSrcA,
+ q31_t * pSrcB,
+ q31_t * pDst,
+ uint32_t numSamples)
+{
+ q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
+ uint32_t blkCnt; /* loop counters */
+ q31_t mul1, mul2, mul3, mul4;
+ q31_t out1, out2;
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ /* Decrement the blockSize loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ /* Decrement the blockSize loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 1U;
+
+ /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
+ ** a second loop below computes the remaining 1 sample. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ /* Decrement the blockSize loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x2U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
+ /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
+ a = *pSrcA++;
+ b = *pSrcA++;
+ c = *pSrcB++;
+ d = *pSrcB++;
+
+ mul1 = (q31_t) (((q63_t) a * c) >> 32);
+ mul2 = (q31_t) (((q63_t) b * d) >> 32);
+ mul3 = (q31_t) (((q63_t) a * d) >> 32);
+ mul4 = (q31_t) (((q63_t) b * c) >> 32);
+
+ mul1 = (mul1 >> 1);
+ mul2 = (mul2 >> 1);
+ mul3 = (mul3 >> 1);
+ mul4 = (mul4 >> 1);
+
+ out1 = mul1 - mul2;
+ out2 = mul3 + mul4;
+
+ /* store the real result in 3.29 format in the destination buffer. */
+ *pDst++ = out1;
+ /* store the imag result in 3.29 format in the destination buffer. */
+ *pDst++ = out2;
+
+ /* Decrement the blockSize loop counter */
+ blkCnt--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of CmplxByCmplxMult group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
new file mode 100644
index 0000000..8c7ca31
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c
@@ -0,0 +1,213 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mult_real_f32.c
+ * Description: Floating-point complex by real multiplication
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @defgroup CmplxByRealMult Complex-by-Real Multiplication
+ *
+ * Multiplies a complex vector by a real vector and generates a complex result.
+ * The data in the complex arrays is stored in an interleaved fashion
+ * (real, imag, real, imag, ...).
+ * The parameter <code>numSamples</code> represents the number of complex
+ * samples processed. The complex arrays have a total of <code>2*numSamples</code>
+ * real values while the real array has a total of <code>numSamples</code>
+ * real values.
+ *
+ * The underlying algorithm is used:
+ *
+ * <pre>
+ * for(n=0; n<numSamples; n++) {
+ * pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
+ * pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
+ * }
+ * </pre>
+ *
+ * There are separate functions for floating-point, Q15, and Q31 data types.
+ */
+
+/**
+ * @addtogroup CmplxByRealMult
+ * @{
+ */
+
+
+/**
+ * @brief Floating-point complex-by-real multiplication
+ * @param[in] *pSrcCmplx points to the complex input vector
+ * @param[in] *pSrcReal points to the real input vector
+ * @param[out] *pCmplxDst points to the complex output vector
+ * @param[in] numSamples number of samples in each vector
+ * @return none.
+ */
+
+void arm_cmplx_mult_real_f32(
+ float32_t * pSrcCmplx,
+ float32_t * pSrcReal,
+ float32_t * pCmplxDst,
+ uint32_t numSamples)
+{
+ float32_t in; /* Temporary variable to store input value */
+ uint32_t blkCnt; /* loop counters */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ float32_t inA1, inA2, inA3, inA4; /* Temporary variables to hold input data */
+ float32_t inA5, inA6, inA7, inA8; /* Temporary variables to hold input data */
+ float32_t inB1, inB2, inB3, inB4; /* Temporary variables to hold input data */
+ float32_t out1, out2, out3, out4; /* Temporary variables to hold output data */
+ float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ /* read input from complex input buffer */
+ inA1 = pSrcCmplx[0];
+ inA2 = pSrcCmplx[1];
+ /* read input from real input buffer */
+ inB1 = pSrcReal[0];
+
+ /* read input from complex input buffer */
+ inA3 = pSrcCmplx[2];
+
+ /* multiply complex buffer real input with real buffer input */
+ out1 = inA1 * inB1;
+
+ /* read input from complex input buffer */
+ inA4 = pSrcCmplx[3];
+
+ /* multiply complex buffer imaginary input with real buffer input */
+ out2 = inA2 * inB1;
+
+ /* read input from real input buffer */
+ inB2 = pSrcReal[1];
+ /* read input from complex input buffer */
+ inA5 = pSrcCmplx[4];
+
+ /* multiply complex buffer real input with real buffer input */
+ out3 = inA3 * inB2;
+
+ /* read input from complex input buffer */
+ inA6 = pSrcCmplx[5];
+ /* read input from real input buffer */
+ inB3 = pSrcReal[2];
+
+ /* multiply complex buffer imaginary input with real buffer input */
+ out4 = inA4 * inB2;
+
+ /* read input from complex input buffer */
+ inA7 = pSrcCmplx[6];
+
+ /* multiply complex buffer real input with real buffer input */
+ out5 = inA5 * inB3;
+
+ /* read input from complex input buffer */
+ inA8 = pSrcCmplx[7];
+
+ /* multiply complex buffer imaginary input with real buffer input */
+ out6 = inA6 * inB3;
+
+ /* read input from real input buffer */
+ inB4 = pSrcReal[3];
+
+ /* store result to destination bufer */
+ pCmplxDst[0] = out1;
+
+ /* multiply complex buffer real input with real buffer input */
+ out7 = inA7 * inB4;
+
+ /* store result to destination bufer */
+ pCmplxDst[1] = out2;
+
+ /* multiply complex buffer imaginary input with real buffer input */
+ out8 = inA8 * inB4;
+
+ /* store result to destination bufer */
+ pCmplxDst[2] = out3;
+ pCmplxDst[3] = out4;
+ pCmplxDst[4] = out5;
+
+ /* incremnet complex input buffer by 8 to process next samples */
+ pSrcCmplx += 8U;
+
+ /* store result to destination bufer */
+ pCmplxDst[5] = out6;
+
+ /* increment real input buffer by 4 to process next samples */
+ pSrcReal += 4U;
+
+ /* store result to destination bufer */
+ pCmplxDst[6] = out7;
+ pCmplxDst[7] = out8;
+
+ /* increment destination buffer by 8 to process next sampels */
+ pCmplxDst += 8U;
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+ blkCnt = numSamples;
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ in = *pSrcReal++;
+ /* store the result in the destination buffer. */
+ *pCmplxDst++ = (*pSrcCmplx++) * (in);
+ *pCmplxDst++ = (*pSrcCmplx++) * (in);
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+}
+
+/**
+ * @} end of CmplxByRealMult group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
new file mode 100644
index 0000000..340d852
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c
@@ -0,0 +1,191 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mult_real_q15.c
+ * Description: Q15 complex by real multiplication
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup CmplxByRealMult
+ * @{
+ */
+
+
+/**
+ * @brief Q15 complex-by-real multiplication
+ * @param[in] *pSrcCmplx points to the complex input vector
+ * @param[in] *pSrcReal points to the real input vector
+ * @param[out] *pCmplxDst points to the complex output vector
+ * @param[in] numSamples number of samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function uses saturating arithmetic.
+ * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
+ */
+
+void arm_cmplx_mult_real_q15(
+ q15_t * pSrcCmplx,
+ q15_t * pSrcReal,
+ q15_t * pCmplxDst,
+ uint32_t numSamples)
+{
+ q15_t in; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counters */
+ q31_t inA1, inA2; /* Temporary variables to hold input data */
+ q31_t inB1; /* Temporary variables to hold input data */
+ q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */
+ q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ /* read complex number both real and imaginary from complex input buffer */
+ inA1 = *__SIMD32(pSrcCmplx)++;
+ /* read two real values at a time from real input buffer */
+ inB1 = *__SIMD32(pSrcReal)++;
+ /* read complex number both real and imaginary from complex input buffer */
+ inA2 = *__SIMD32(pSrcCmplx)++;
+
+ /* multiply complex number with real numbers */
+#ifndef ARM_MATH_BIG_ENDIAN
+
+ mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
+ mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
+ mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
+ mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
+
+#else
+
+ mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
+ mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
+ mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
+ mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
+
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ /* saturate the result */
+ out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
+ out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
+ out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
+ out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
+
+ /* pack real and imaginary outputs and store them to destination */
+ *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
+ *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
+
+ inA1 = *__SIMD32(pSrcCmplx)++;
+ inB1 = *__SIMD32(pSrcReal)++;
+ inA2 = *__SIMD32(pSrcCmplx)++;
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+ mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
+ mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
+ mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
+ mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
+
+#else
+
+ mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
+ mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
+ mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
+ mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
+
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
+
+ out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
+ out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
+ out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
+ out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
+
+ *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
+ *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ in = *pSrcReal++;
+ /* store the result in the destination buffer. */
+ *pCmplxDst++ =
+ (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
+ *pCmplxDst++ =
+ (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ /* realOut = realA * realB. */
+ /* imagOut = imagA * realB. */
+ in = *pSrcReal++;
+ /* store the result in the destination buffer. */
+ *pCmplxDst++ =
+ (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
+ *pCmplxDst++ =
+ (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
+
+ /* Decrement the numSamples loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of CmplxByRealMult group
+ */
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
new file mode 100644
index 0000000..19fc55b
--- /dev/null
+++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c
@@ -0,0 +1,211 @@
+/* ----------------------------------------------------------------------
+ * Project: CMSIS DSP Library
+ * Title: arm_cmplx_mult_real_q31.c
+ * Description: Q31 complex by real multiplication
+ *
+ * $Date: 27. January 2017
+ * $Revision: V.1.5.1
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+
+/**
+ * @ingroup groupCmplxMath
+ */
+
+/**
+ * @addtogroup CmplxByRealMult
+ * @{
+ */
+
+
+/**
+ * @brief Q31 complex-by-real multiplication
+ * @param[in] *pSrcCmplx points to the complex input vector
+ * @param[in] *pSrcReal points to the real input vector
+ * @param[out] *pCmplxDst points to the complex output vector
+ * @param[in] numSamples number of samples in each vector
+ * @return none.
+ *
+ * <b>Scaling and Overflow Behavior:</b>
+ * \par
+ * The function uses saturating arithmetic.
+ * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
+ */
+
+void arm_cmplx_mult_real_q31(
+ q31_t * pSrcCmplx,
+ q31_t * pSrcReal,
+ q31_t * pCmplxDst,
+ uint32_t numSamples)
+{
+ q31_t inA1; /* Temporary variable to store input value */
+
+#if defined (ARM_MATH_DSP)
+
+ /* Run the below code for Cortex-M4 and Cortex-M3 */
+ uint32_t blkCnt; /* loop counters */
+ q31_t inA2, inA3, inA4; /* Temporary variables to hold input data */
+ q31_t inB1, inB2; /* Temporary variabels to hold input data */
+ q31_t out1, out2, out3, out4; /* Temporary variables to hold output data */
+
+ /* loop Unrolling */
+ blkCnt = numSamples >> 2U;
+
+ /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
+ ** a second loop below computes the remaining 1 to 3 samples. */
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ /* read real input from complex input buffer */
+ inA1 = *pSrcCmplx++;
+ inA2 = *pSrcCmplx++;
+ /* read input from real input bufer */
+ inB1 = *pSrcReal++;
+ inB2 = *pSrcReal++;
+ /* read imaginary input from complex input buffer */
+ inA3 = *pSrcCmplx++;
+ inA4 = *pSrcCmplx++;
+
+ /* multiply complex input with real input */
+ out1 = ((q63_t) inA1 * inB1) >> 32;
+ out2 = ((q63_t) inA2 * inB1) >> 32;
+ out3 = ((q63_t) inA3 * inB2) >> 32;
+ out4 = ((q63_t) inA4 * inB2) >> 32;
+
+ /* sature the result */
+ out1 = __SSAT(out1, 31);
+ out2 = __SSAT(out2, 31);
+ out3 = __SSAT(out3, 31);
+ out4 = __SSAT(out4, 31);
+
+ /* get result in 1.31 format */
+ out1 = out1 << 1;
+ out2 = out2 << 1;
+ out3 = out3 << 1;
+ out4 = out4 << 1;
+
+ /* store the result to destination buffer */
+ *pCmplxDst++ = out1;
+ *pCmplxDst++ = out2;
+ *pCmplxDst++ = out3;
+ *pCmplxDst++ = out4;
+
+ /* read real input from complex input buffer */
+ inA1 = *pSrcCmplx++;
+ inA2 = *pSrcCmplx++;
+ /* read input from real input bufer */
+ inB1 = *pSrcReal++;
+ inB2 = *pSrcReal++;
+ /* read imaginary input from complex input buffer */
+ inA3 = *pSrcCmplx++;
+ inA4 = *pSrcCmplx++;
+
+ /* multiply complex input with real input */
+ out1 = ((q63_t) inA1 * inB1) >> 32;
+ out2 = ((q63_t) inA2 * inB1) >> 32;
+ out3 = ((q63_t) inA3 * inB2) >> 32;
+ out4 = ((q63_t) inA4 * inB2) >> 32;
+
+ /* sature the result */
+ out1 = __SSAT(out1, 31);
+ out2 = __SSAT(out2, 31);
+ out3 = __SSAT(out3, 31);
+ out4 = __SSAT(out4, 31);
+
+ /* get result in 1.31 format */
+ out1 = out1 << 1;
+ out2 = out2 << 1;
+ out3 = out3 << 1;
+ out4 = out4 << 1;
+
+ /* store the result to destination buffer */
+ *pCmplxDst++ = out1;
+ *pCmplxDst++ = out2;
+ *pCmplxDst++ = out3;
+ *pCmplxDst++ = out4;
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+
+ /* If the numSamples is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = numSamples % 0x4U;
+
+ while (blkCnt > 0U)
+ {
+ /* C[2 * i] = A[2 * i] * B[i]. */
+ /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
+ /* read real input from complex input buffer */
+ inA1 = *pSrcCmplx++;
+ inA2 = *pSrcCmplx++;
+ /* read input from real input bufer */
+ inB1 = *pSrcReal++;
+
+ /* multiply complex input with real input */
+ out1 = ((q63_t) inA1 * inB1) >> 32;
+ out2 = ((q63_t) inA2 * inB1) >> 32;
+
+ /* sature the result */
+ out1 = __SSAT(out1, 31);
+ out2 = __SSAT(out2, 31);
+
+ /* get result in 1.31 format */
+ out1 = out1 << 1;
+ out2 = out2 << 1;
+
+ /* store the result to destination buffer */
+ *pCmplxDst++ = out1;
+ *pCmplxDst++ = out2;
+
+ /* Decrement the numSamples loop counter */
+ blkCnt--;
+ }
+
+#else
+
+ /* Run the below code for Cortex-M0 */
+
+ while (numSamples > 0U)
+ {
+ /* realOut = realA * realB. */
+ /* imagReal = imagA * realB. */
+ inA1 = *pSrcReal++;
+ /* store the result in the destination buffer. */
+ *pCmplxDst++ =
+ (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
+ *pCmplxDst++ =
+ (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
+
+ /* Decrement the numSamples loop counter */
+ numSamples--;
+ }
+
+#endif /* #if defined (ARM_MATH_DSP) */
+
+}
+
+/**
+ * @} end of CmplxByRealMult group
+ */