diff options
author | Ali Labbene <ali.labbene@st.com> | 2019-12-11 08:59:21 +0100 |
---|---|---|
committer | Ali Labbene <ali.labbene@st.com> | 2019-12-16 16:35:24 +0100 |
commit | 9f95ff5b6ba01db09552b84a0ab79607060a2666 (patch) | |
tree | 8a6e0dda832555c692307869aed49d07ee7facfe /DSP/Source/ComplexMathFunctions | |
parent | 76177aa280494bb36d7a0bcbda1078d4db717020 (diff) | |
download | st-cmsis-core-lowfat-9f95ff5b6ba01db09552b84a0ab79607060a2666.tar.gz st-cmsis-core-lowfat-9f95ff5b6ba01db09552b84a0ab79607060a2666.tar.bz2 st-cmsis-core-lowfat-9f95ff5b6ba01db09552b84a0ab79607060a2666.zip |
Official ARM version: v5.4.0
Add CMSIS V5.4.0, please refer to index.html available under \docs folder.
Note: content of \CMSIS\Core\Include has been copied under \Include to keep the same structure
used in existing projects, and thus avoid projects mass update
Note: the following components have been removed from ARM original delivery (as not used in ST packages)
- CMSIS_EW2018.pdf
- .gitattributes
- .gitignore
- \Device
- \CMSIS
- \CoreValidation
- \DAP
- \Documentation
- \DoxyGen
- \Driver
- \Pack
- \RTOS\CMSIS_RTOS_Tutorial.pdf
- \RTOS\RTX
- \RTOS\Template
- \RTOS2\RTX
- \Utilities
- All ARM/GCC projects files are deleted from \DSP, \RTOS and \RTOS2
Change-Id: Ia026c3f0f0d016627a4fb5a9032852c33d24b4d3
Diffstat (limited to 'DSP/Source/ComplexMathFunctions')
18 files changed, 3294 insertions, 0 deletions
diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c new file mode 100644 index 0000000..cfb6f1f --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_f32.c @@ -0,0 +1,171 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_conj_f32.c + * Description: Floating-point complex conjugate + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @defgroup cmplx_conj Complex Conjugate + * + * Conjugates the elements of a complex data vector. + * + * The <code>pSrc</code> points to the source data and + * <code>pDst</code> points to the where the result should be written. + * <code>numSamples</code> specifies the number of complex samples + * and the data in each array is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * Each array has a total of <code>2*numSamples</code> values. + * The underlying algorithm is used: + * + * <pre> + * for(n=0; n<numSamples; n++) { + * pDst[(2*n)+0)] = pSrc[(2*n)+0]; // real part + * pDst[(2*n)+1)] = -pSrc[(2*n)+1]; // imag part + * } + * </pre> + * + * There are separate functions for floating-point, Q15, and Q31 data types. + */ + +/** + * @addtogroup cmplx_conj + * @{ + */ + +/** + * @brief Floating-point complex conjugate. + * @param *pSrc points to the input vector + * @param *pDst points to the output vector + * @param numSamples number of complex samples in each vector + * @return none. + */ + +void arm_cmplx_conj_f32( + float32_t * pSrc, + float32_t * pDst, + uint32_t numSamples) +{ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + float32_t inR1, inR2, inR3, inR4; + float32_t inI1, inI2, inI3, inI4; + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + /* read real input samples */ + inR1 = pSrc[0]; + /* store real samples to destination */ + pDst[0] = inR1; + inR2 = pSrc[2]; + pDst[2] = inR2; + inR3 = pSrc[4]; + pDst[4] = inR3; + inR4 = pSrc[6]; + pDst[6] = inR4; + + /* read imaginary input samples */ + inI1 = pSrc[1]; + inI2 = pSrc[3]; + + /* conjugate input */ + inI1 = -inI1; + + /* read imaginary input samples */ + inI3 = pSrc[5]; + + /* conjugate input */ + inI2 = -inI2; + + /* read imaginary input samples */ + inI4 = pSrc[7]; + + /* conjugate input */ + inI3 = -inI3; + + /* store imaginary samples to destination */ + pDst[1] = inI1; + pDst[3] = inI2; + + /* conjugate input */ + inI4 = -inI4; + + /* store imaginary samples to destination */ + pDst[5] = inI3; + + /* increment source pointer by 8 to process next sampels */ + pSrc += 8U; + + /* store imaginary sample to destination */ + pDst[7] = inI4; + + /* increment destination pointer by 8 to store next samples */ + pDst += 8U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = numSamples; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* realOut + j (imagOut) = realIn + j (-1) imagIn */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + *pDst++ = *pSrc++; + *pDst++ = -*pSrc++; + + /* Decrement the loop counter */ + blkCnt--; + } +} + +/** + * @} end of cmplx_conj group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c new file mode 100644 index 0000000..7950229 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q15.c @@ -0,0 +1,149 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_conj_q15.c + * Description: Q15 complex conjugate + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_conj + * @{ + */ + +/** + * @brief Q15 complex conjugate. + * @param *pSrc points to the input vector + * @param *pDst points to the output vector + * @param numSamples number of complex samples in each vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function uses saturating arithmetic. + * The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF. + */ + +void arm_cmplx_conj_q15( + q15_t * pSrc, + q15_t * pDst, + uint32_t numSamples) +{ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + q31_t in1, in2, in3, in4; + q31_t zero = 0; + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + in1 = *__SIMD32(pSrc)++; + in2 = *__SIMD32(pSrc)++; + in3 = *__SIMD32(pSrc)++; + in4 = *__SIMD32(pSrc)++; + +#ifndef ARM_MATH_BIG_ENDIAN + + in1 = __QASX(zero, in1); + in2 = __QASX(zero, in2); + in3 = __QASX(zero, in3); + in4 = __QASX(zero, in4); + +#else + + in1 = __QSAX(zero, in1); + in2 = __QSAX(zero, in2); + in3 = __QSAX(zero, in3); + in4 = __QSAX(zero, in4); + +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ + + in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16); + in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16); + in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16); + in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16); + + *__SIMD32(pDst)++ = in1; + *__SIMD32(pDst)++ = in2; + *__SIMD32(pDst)++ = in3; + *__SIMD32(pDst)++ = in4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + *pDst++ = *pSrc++; + *pDst++ = __SSAT(-*pSrc++, 16); + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + q15_t in; + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + /* realOut + j (imagOut) = realIn+ j (-1) imagIn */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + *pDst++ = *pSrc++; + in = *pSrc++; + *pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in; + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of cmplx_conj group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c new file mode 100644 index 0000000..709ce0e --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_conj_q31.c @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_conj_q31.c + * Description: Q31 complex conjugate + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_conj + * @{ + */ + +/** + * @brief Q31 complex conjugate. + * @param *pSrc points to the input vector + * @param *pDst points to the output vector + * @param numSamples number of complex samples in each vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function uses saturating arithmetic. + * The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF. + */ + +void arm_cmplx_conj_q31( + q31_t * pSrc, + q31_t * pDst, + uint32_t numSamples) +{ + uint32_t blkCnt; /* loop counter */ + q31_t in; /* Input value */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t inR1, inR2, inR3, inR4; /* Temporary real variables */ + q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */ + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + /* Saturated to 0x7fffffff if the input is -1(0x80000000) */ + /* read real input sample */ + inR1 = pSrc[0]; + /* store real input sample */ + pDst[0] = inR1; + + /* read imaginary input sample */ + inI1 = pSrc[1]; + + /* read real input sample */ + inR2 = pSrc[2]; + /* store real input sample */ + pDst[2] = inR2; + + /* read imaginary input sample */ + inI2 = pSrc[3]; + + /* negate imaginary input sample */ + inI1 = __QSUB(0, inI1); + + /* read real input sample */ + inR3 = pSrc[4]; + /* store real input sample */ + pDst[4] = inR3; + + /* read imaginary input sample */ + inI3 = pSrc[5]; + + /* negate imaginary input sample */ + inI2 = __QSUB(0, inI2); + + /* read real input sample */ + inR4 = pSrc[6]; + /* store real input sample */ + pDst[6] = inR4; + + /* negate imaginary input sample */ + inI3 = __QSUB(0, inI3); + + /* store imaginary input sample */ + inI4 = pSrc[7]; + + /* store imaginary input samples */ + pDst[1] = inI1; + + /* negate imaginary input sample */ + inI4 = __QSUB(0, inI4); + + /* store imaginary input samples */ + pDst[3] = inI2; + + /* increment source pointer by 8 to proecess next samples */ + pSrc += 8U; + + /* store imaginary input samples */ + pDst[5] = inI3; + pDst[7] = inI4; + + /* increment destination pointer by 8 to process next samples */ + pDst += 8U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = numSamples; + + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C[0]+jC[1] = A[0]+ j (-1) A[1] */ + /* Calculate Complex Conjugate and then store the results in the destination buffer. */ + /* Saturated to 0x7fffffff if the input is -1(0x80000000) */ + *pDst++ = *pSrc++; + in = *pSrc++; + *pDst++ = (in == INT32_MIN) ? INT32_MAX : -in; + + /* Decrement the loop counter */ + blkCnt--; + } +} + +/** + * @} end of cmplx_conj group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c new file mode 100644 index 0000000..bfc352b --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_f32.c @@ -0,0 +1,191 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_dot_prod_f32.c + * Description: Floating-point complex dot product + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @defgroup cmplx_dot_prod Complex Dot Product + * + * Computes the dot product of two complex vectors. + * The vectors are multiplied element-by-element and then summed. + * + * The <code>pSrcA</code> points to the first complex input vector and + * <code>pSrcB</code> points to the second complex input vector. + * <code>numSamples</code> specifies the number of complex samples + * and the data in each array is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * Each array has a total of <code>2*numSamples</code> values. + * + * The underlying algorithm is used: + * <pre> + * realResult=0; + * imagResult=0; + * for(n=0; n<numSamples; n++) { + * realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1]; + * imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0]; + * } + * </pre> + * + * There are separate functions for floating-point, Q15, and Q31 data types. + */ + +/** + * @addtogroup cmplx_dot_prod + * @{ + */ + +/** + * @brief Floating-point complex dot product + * @param *pSrcA points to the first input vector + * @param *pSrcB points to the second input vector + * @param numSamples number of complex samples in each vector + * @param *realResult real part of the result returned here + * @param *imagResult imaginary part of the result returned here + * @return none. + */ + +void arm_cmplx_dot_prod_f32( + float32_t * pSrcA, + float32_t * pSrcB, + uint32_t numSamples, + float32_t * realResult, + float32_t * imagResult) +{ + float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result storage */ + float32_t a0,b0,c0,d0; + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples & 0x3U; + + while (blkCnt > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Store the real and imaginary results in the destination buffers */ + *realResult = real_sum; + *imagResult = imag_sum; +} + +/** + * @} end of cmplx_dot_prod group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c new file mode 100644 index 0000000..9e23a01 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q15.c @@ -0,0 +1,177 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_dot_prod_q15.c + * Description: Processing function for the Q15 Complex Dot product + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_dot_prod + * @{ + */ + +/** + * @brief Q15 complex dot product + * @param *pSrcA points to the first input vector + * @param *pSrcB points to the second input vector + * @param numSamples number of complex samples in each vector + * @param *realResult real part of the result returned here + * @param *imagResult imaginary part of the result returned here + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function is implemented using an internal 64-bit accumulator. + * The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result. + * These are accumulated in a 64-bit accumulator with 34.30 precision. + * As a final step, the accumulators are converted to 8.24 format. + * The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format. + */ + +void arm_cmplx_dot_prod_q15( + q15_t * pSrcA, + q15_t * pSrcB, + uint32_t numSamples, + q31_t * realResult, + q31_t * imagResult) +{ + q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */ + q15_t a0,b0,c0,d0; + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += (q31_t)a0 * c0; + imag_sum += (q31_t)a0 * d0; + real_sum -= (q31_t)b0 * d0; + imag_sum += (q31_t)b0 * c0; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += (q31_t)a0 * c0; + imag_sum += (q31_t)a0 * d0; + real_sum -= (q31_t)b0 * d0; + imag_sum += (q31_t)b0 * c0; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += (q31_t)a0 * c0; + imag_sum += (q31_t)a0 * d0; + real_sum -= (q31_t)b0 * d0; + imag_sum += (q31_t)b0 * c0; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += (q31_t)a0 * c0; + imag_sum += (q31_t)a0 * d0; + real_sum -= (q31_t)b0 * d0; + imag_sum += (q31_t)b0 * c0; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += (q31_t)a0 * c0; + imag_sum += (q31_t)a0 * d0; + real_sum -= (q31_t)b0 * d0; + imag_sum += (q31_t)b0 * c0; + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += a0 * c0; + imag_sum += a0 * d0; + real_sum -= b0 * d0; + imag_sum += b0 * c0; + + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Store the real and imaginary results in 8.24 format */ + /* Convert real data in 34.30 to 8.24 by 6 right shifts */ + *realResult = (q31_t) (real_sum >> 6); + /* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */ + *imagResult = (q31_t) (imag_sum >> 6); +} + +/** + * @} end of cmplx_dot_prod group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c new file mode 100644 index 0000000..6eb5b6e --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_dot_prod_q31.c @@ -0,0 +1,175 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_dot_prod_q31.c + * Description: Q31 complex dot product + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_dot_prod + * @{ + */ + +/** + * @brief Q31 complex dot product + * @param *pSrcA points to the first input vector + * @param *pSrcB points to the second input vector + * @param numSamples number of complex samples in each vector + * @param *realResult real part of the result returned here + * @param *imagResult imaginary part of the result returned here + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function is implemented using an internal 64-bit accumulator. + * The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format. + * The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits. + * Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768. + * The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format. + * Input down scaling is not required. + */ + +void arm_cmplx_dot_prod_q31( + q31_t * pSrcA, + q31_t * pSrcB, + uint32_t numSamples, + q63_t * realResult, + q63_t * imagResult) +{ + q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */ + q31_t a0,b0,c0,d0; + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + a0 = *pSrcA++; + b0 = *pSrcA++; + c0 = *pSrcB++; + d0 = *pSrcB++; + + real_sum += ((q63_t)a0 * c0) >> 14; + imag_sum += ((q63_t)a0 * d0) >> 14; + real_sum -= ((q63_t)b0 * d0) >> 14; + imag_sum += ((q63_t)b0 * c0) >> 14; + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + + /* Store the real and imaginary results in 16.48 format */ + *realResult = real_sum; + *imagResult = imag_sum; +} + +/** + * @} end of cmplx_dot_prod group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c new file mode 100644 index 0000000..95aaf1e --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_f32.c @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mag_f32.c + * Description: Floating-point complex magnitude + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @defgroup cmplx_mag Complex Magnitude + * + * Computes the magnitude of the elements of a complex data vector. + * + * The <code>pSrc</code> points to the source data and + * <code>pDst</code> points to the where the result should be written. + * <code>numSamples</code> specifies the number of complex samples + * in the input array and the data is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * The input array has a total of <code>2*numSamples</code> values; + * the output array has a total of <code>numSamples</code> values. + * The underlying algorithm is used: + * + * <pre> + * for(n=0; n<numSamples; n++) { + * pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2); + * } + * </pre> + * + * There are separate functions for floating-point, Q15, and Q31 data types. + */ + +/** + * @addtogroup cmplx_mag + * @{ + */ +/** + * @brief Floating-point complex magnitude. + * @param[in] *pSrc points to complex input buffer + * @param[out] *pDst points to real output buffer + * @param[in] numSamples number of complex samples in the input vector + * @return none. + * + */ + + +void arm_cmplx_mag_f32( + float32_t * pSrc, + float32_t * pDst, + uint32_t numSamples) +{ + float32_t realIn, imagIn; /* Temporary variables to hold input values */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + + /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ + realIn = *pSrc++; + imagIn = *pSrc++; + /* store the result in the destination buffer. */ + arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); + + realIn = *pSrc++; + imagIn = *pSrc++; + arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); + + realIn = *pSrc++; + imagIn = *pSrc++; + arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); + + realIn = *pSrc++; + imagIn = *pSrc++; + arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); + + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ + realIn = *pSrc++; + imagIn = *pSrc++; + /* store the result in the destination buffer. */ + arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + /* out = sqrt((real * real) + (imag * imag)) */ + realIn = *pSrc++; + imagIn = *pSrc++; + /* store the result in the destination buffer. */ + arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++); + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of cmplx_mag group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c new file mode 100644 index 0000000..03d9b2a --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q15.c @@ -0,0 +1,141 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mag_q15.c + * Description: Q15 complex magnitude + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_mag + * @{ + */ + + +/** + * @brief Q15 complex magnitude + * @param *pSrc points to the complex input vector + * @param *pDst points to the real output vector + * @param numSamples number of complex samples in the input vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format. + */ + +void arm_cmplx_mag_q15( + q15_t * pSrc, + q15_t * pDst, + uint32_t numSamples) +{ + q31_t acc0, acc1; /* Accumulators */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + q31_t in1, in2, in3, in4; + q31_t acc2, acc3; + + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + + /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ + in1 = *__SIMD32(pSrc)++; + in2 = *__SIMD32(pSrc)++; + in3 = *__SIMD32(pSrc)++; + in4 = *__SIMD32(pSrc)++; + + acc0 = __SMUAD(in1, in1); + acc1 = __SMUAD(in2, in2); + acc2 = __SMUAD(in3, in3); + acc3 = __SMUAD(in4, in4); + + /* store the result in 2.14 format in the destination buffer. */ + arm_sqrt_q15((q15_t) ((acc0) >> 17), pDst++); + arm_sqrt_q15((q15_t) ((acc1) >> 17), pDst++); + arm_sqrt_q15((q15_t) ((acc2) >> 17), pDst++); + arm_sqrt_q15((q15_t) ((acc3) >> 17), pDst++); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ + in1 = *__SIMD32(pSrc)++; + acc0 = __SMUAD(in1, in1); + + /* store the result in 2.14 format in the destination buffer. */ + arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++); + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + q15_t real, imag; /* Temporary variables to hold input values */ + + while (numSamples > 0U) + { + /* out = sqrt(real * real + imag * imag) */ + real = *pSrc++; + imag = *pSrc++; + + acc0 = (real * real); + acc1 = (imag * imag); + + /* store the result in 2.14 format in the destination buffer. */ + arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++); + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of cmplx_mag group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c new file mode 100644 index 0000000..830ecb9 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_q31.c @@ -0,0 +1,173 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mag_q31.c + * Description: Q31 complex magnitude + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_mag + * @{ + */ + +/** + * @brief Q31 complex magnitude + * @param *pSrc points to the complex input vector + * @param *pDst points to the real output vector + * @param numSamples number of complex samples in the input vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format. + * Input down scaling is not required. + */ + +void arm_cmplx_mag_q31( + q31_t * pSrc, + q31_t * pDst, + uint32_t numSamples) +{ + q31_t real, imag; /* Temporary variables to hold input values */ + q31_t acc0, acc1; /* Accumulators */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + q31_t real1, real2, imag1, imag2; /* Temporary variables to hold input values */ + q31_t out1, out2, out3, out4; /* Accumulators */ + q63_t mul1, mul2, mul3, mul4; /* Temporary variables */ + + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* read complex input from source buffer */ + real1 = pSrc[0]; + imag1 = pSrc[1]; + real2 = pSrc[2]; + imag2 = pSrc[3]; + + /* calculate power of input values */ + mul1 = (q63_t) real1 *real1; + mul2 = (q63_t) imag1 *imag1; + mul3 = (q63_t) real2 *real2; + mul4 = (q63_t) imag2 *imag2; + + /* get the result to 3.29 format */ + out1 = (q31_t) (mul1 >> 33); + out2 = (q31_t) (mul2 >> 33); + out3 = (q31_t) (mul3 >> 33); + out4 = (q31_t) (mul4 >> 33); + + /* add real and imaginary accumulators */ + out1 = out1 + out2; + out3 = out3 + out4; + + /* read complex input from source buffer */ + real1 = pSrc[4]; + imag1 = pSrc[5]; + real2 = pSrc[6]; + imag2 = pSrc[7]; + + /* calculate square root */ + arm_sqrt_q31(out1, &pDst[0]); + + /* calculate power of input values */ + mul1 = (q63_t) real1 *real1; + + /* calculate square root */ + arm_sqrt_q31(out3, &pDst[1]); + + /* calculate power of input values */ + mul2 = (q63_t) imag1 *imag1; + mul3 = (q63_t) real2 *real2; + mul4 = (q63_t) imag2 *imag2; + + /* get the result to 3.29 format */ + out1 = (q31_t) (mul1 >> 33); + out2 = (q31_t) (mul2 >> 33); + out3 = (q31_t) (mul3 >> 33); + out4 = (q31_t) (mul4 >> 33); + + /* add real and imaginary accumulators */ + out1 = out1 + out2; + out3 = out3 + out4; + + /* calculate square root */ + arm_sqrt_q31(out1, &pDst[2]); + + /* increment destination by 8 to process next samples */ + pSrc += 8U; + + /* calculate square root */ + arm_sqrt_q31(out3, &pDst[3]); + + /* increment destination by 4 to process next samples */ + pDst += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = numSamples; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */ + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 2.30 format in the destination buffer. */ + arm_sqrt_q31(acc0 + acc1, pDst++); + + /* Decrement the loop counter */ + blkCnt--; + } +} + +/** + * @} end of cmplx_mag group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c new file mode 100644 index 0000000..59127a2 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_f32.c @@ -0,0 +1,204 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mag_squared_f32.c + * Description: Floating-point complex magnitude squared + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @defgroup cmplx_mag_squared Complex Magnitude Squared + * + * Computes the magnitude squared of the elements of a complex data vector. + * + * The <code>pSrc</code> points to the source data and + * <code>pDst</code> points to the where the result should be written. + * <code>numSamples</code> specifies the number of complex samples + * in the input array and the data is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * The input array has a total of <code>2*numSamples</code> values; + * the output array has a total of <code>numSamples</code> values. + * + * The underlying algorithm is used: + * + * <pre> + * for(n=0; n<numSamples; n++) { + * pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2; + * } + * </pre> + * + * There are separate functions for floating-point, Q15, and Q31 data types. + */ + +/** + * @addtogroup cmplx_mag_squared + * @{ + */ + + +/** + * @brief Floating-point complex magnitude squared + * @param[in] *pSrc points to the complex input vector + * @param[out] *pDst points to the real output vector + * @param[in] numSamples number of complex samples in the input vector + * @return none. + */ + +void arm_cmplx_mag_squared_f32( + float32_t * pSrc, + float32_t * pDst, + uint32_t numSamples) +{ + float32_t real, imag; /* Temporary variables to store real and imaginary values */ + uint32_t blkCnt; /* loop counter */ + +#if defined (ARM_MATH_DSP) + float32_t real1, real2, real3, real4; /* Temporary variables to hold real values */ + float32_t imag1, imag2, imag3, imag4; /* Temporary variables to hold imaginary values */ + float32_t mul1, mul2, mul3, mul4; /* Temporary variables */ + float32_t mul5, mul6, mul7, mul8; /* Temporary variables */ + float32_t out1, out2, out3, out4; /* Temporary variables to hold output values */ + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ + /* read real input sample from source buffer */ + real1 = pSrc[0]; + /* read imaginary input sample from source buffer */ + imag1 = pSrc[1]; + + /* calculate power of real value */ + mul1 = real1 * real1; + + /* read real input sample from source buffer */ + real2 = pSrc[2]; + + /* calculate power of imaginary value */ + mul2 = imag1 * imag1; + + /* read imaginary input sample from source buffer */ + imag2 = pSrc[3]; + + /* calculate power of real value */ + mul3 = real2 * real2; + + /* read real input sample from source buffer */ + real3 = pSrc[4]; + + /* calculate power of imaginary value */ + mul4 = imag2 * imag2; + + /* read imaginary input sample from source buffer */ + imag3 = pSrc[5]; + + /* calculate power of real value */ + mul5 = real3 * real3; + /* calculate power of imaginary value */ + mul6 = imag3 * imag3; + + /* read real input sample from source buffer */ + real4 = pSrc[6]; + + /* accumulate real and imaginary powers */ + out1 = mul1 + mul2; + + /* read imaginary input sample from source buffer */ + imag4 = pSrc[7]; + + /* accumulate real and imaginary powers */ + out2 = mul3 + mul4; + + /* calculate power of real value */ + mul7 = real4 * real4; + /* calculate power of imaginary value */ + mul8 = imag4 * imag4; + + /* store output to destination */ + pDst[0] = out1; + + /* accumulate real and imaginary powers */ + out3 = mul5 + mul6; + + /* store output to destination */ + pDst[1] = out2; + + /* accumulate real and imaginary powers */ + out4 = mul7 + mul8; + + /* store output to destination */ + pDst[2] = out3; + + /* increment destination pointer by 8 to process next samples */ + pSrc += 8U; + + /* store output to destination */ + pDst[3] = out4; + + /* increment destination pointer by 4 to process next samples */ + pDst += 4U; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + +#else + + /* Run the below code for Cortex-M0 */ + + blkCnt = numSamples; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ + real = *pSrc++; + imag = *pSrc++; + + /* out = (real * real) + (imag * imag) */ + /* store the result in the destination buffer. */ + *pDst++ = (real * real) + (imag * imag); + + /* Decrement the loop counter */ + blkCnt--; + } +} + +/** + * @} end of cmplx_mag_squared group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c new file mode 100644 index 0000000..3f740c3 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q15.c @@ -0,0 +1,136 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mag_squared_q15.c + * Description: Q15 complex magnitude squared + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_mag_squared + * @{ + */ + +/** + * @brief Q15 complex magnitude squared + * @param *pSrc points to the complex input vector + * @param *pDst points to the real output vector + * @param numSamples number of complex samples in the input vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format. + */ + +void arm_cmplx_mag_squared_q15( + q15_t * pSrc, + q15_t * pDst, + uint32_t numSamples) +{ + q31_t acc0, acc1; /* Accumulators */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + q31_t in1, in2, in3, in4; + q31_t acc2, acc3; + + /*loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ + in1 = *__SIMD32(pSrc)++; + in2 = *__SIMD32(pSrc)++; + in3 = *__SIMD32(pSrc)++; + in4 = *__SIMD32(pSrc)++; + + acc0 = __SMUAD(in1, in1); + acc1 = __SMUAD(in2, in2); + acc2 = __SMUAD(in3, in3); + acc3 = __SMUAD(in4, in4); + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = (q15_t) (acc0 >> 17); + *pDst++ = (q15_t) (acc1 >> 17); + *pDst++ = (q15_t) (acc2 >> 17); + *pDst++ = (q15_t) (acc3 >> 17); + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ + in1 = *__SIMD32(pSrc)++; + acc0 = __SMUAD(in1, in1); + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = (q15_t) (acc0 >> 17); + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + q15_t real, imag; /* Temporary variables to store real and imaginary values */ + + while (numSamples > 0U) + { + /* out = ((real * real) + (imag * imag)) */ + real = *pSrc++; + imag = *pSrc++; + acc0 = (real * real); + acc1 = (imag * imag); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17); + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of cmplx_mag_squared group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c new file mode 100644 index 0000000..c2b2c50 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mag_squared_q31.c @@ -0,0 +1,149 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mag_squared_q31.c + * Description: Q31 complex magnitude squared + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup cmplx_mag_squared + * @{ + */ + + +/** + * @brief Q31 complex magnitude squared + * @param *pSrc points to the complex input vector + * @param *pDst points to the real output vector + * @param numSamples number of complex samples in the input vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format. + * Input down scaling is not required. + */ + +void arm_cmplx_mag_squared_q31( + q31_t * pSrc, + q31_t * pDst, + uint32_t numSamples) +{ + q31_t real, imag; /* Temporary variables to store real and imaginary values */ + q31_t acc0, acc1; /* Accumulators */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counter */ + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 3.29 format in the destination buffer. */ + *pDst++ = acc0 + acc1; + + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 3.29 format in the destination buffer. */ + *pDst++ = acc0 + acc1; + + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 3.29 format in the destination buffer. */ + *pDst++ = acc0 + acc1; + + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 3.29 format in the destination buffer. */ + *pDst++ = acc0 + acc1; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[0] = (A[0] * A[0] + A[1] * A[1]) */ + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 3.29 format in the destination buffer. */ + *pDst++ = acc0 + acc1; + + /* Decrement the loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + /* out = ((real * real) + (imag * imag)) */ + real = *pSrc++; + imag = *pSrc++; + acc0 = (q31_t) (((q63_t) real * real) >> 33); + acc1 = (q31_t) (((q63_t) imag * imag) >> 33); + /* store the result in 3.29 format in the destination buffer. */ + *pDst++ = acc0 + acc1; + + /* Decrement the loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of cmplx_mag_squared group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c new file mode 100644 index 0000000..3717591 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_f32.c @@ -0,0 +1,196 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mult_cmplx_f32.c + * Description: Floating-point complex-by-complex multiplication + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication + * + * Multiplies a complex vector by another complex vector and generates a complex result. + * The data in the complex arrays is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * The parameter <code>numSamples</code> represents the number of complex + * samples processed. The complex arrays have a total of <code>2*numSamples</code> + * real values. + * + * The underlying algorithm is used: + * + * <pre> + * for(n=0; n<numSamples; n++) { + * pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1]; + * pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0]; + * } + * </pre> + * + * There are separate functions for floating-point, Q15, and Q31 data types. + */ + +/** + * @addtogroup CmplxByCmplxMult + * @{ + */ + + +/** + * @brief Floating-point complex-by-complex multiplication + * @param[in] *pSrcA points to the first input vector + * @param[in] *pSrcB points to the second input vector + * @param[out] *pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + * @return none. + */ + +void arm_cmplx_mult_cmplx_f32( + float32_t * pSrcA, + float32_t * pSrcB, + float32_t * pDst, + uint32_t numSamples) +{ + float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */ + uint32_t blkCnt; /* loop counters */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */ + float32_t acc1, acc2, acc3, acc4; + + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a1 = *pSrcA; /* A[2 * i] */ + c1 = *pSrcB; /* B[2 * i] */ + + b1 = *(pSrcA + 1); /* A[2 * i + 1] */ + acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */ + + a2 = *(pSrcA + 2); /* A[2 * i + 2] */ + acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */ + + d1 = *(pSrcB + 1); /* B[2 * i + 1] */ + c2 = *(pSrcB + 2); /* B[2 * i + 2] */ + acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */ + + d2 = *(pSrcB + 3); /* B[2 * i + 3] */ + acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */ + + b2 = *(pSrcA + 3); /* A[2 * i + 3] */ + acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */ + + a1 = *(pSrcA + 4); /* A[2 * i + 4] */ + acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */ + + c1 = *(pSrcB + 4); /* B[2 * i + 4] */ + acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */ + *pDst = acc1; /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */ + + b1 = *(pSrcA + 5); /* A[2 * i + 5] */ + acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */ + + *(pDst + 1) = acc2; /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */ + acc1 = (a1 * c1); + + d1 = *(pSrcB + 5); + acc2 = (b1 * c1); + + *(pDst + 2) = acc3; + *(pDst + 3) = acc4; + + a2 = *(pSrcA + 6); + acc1 -= (b1 * d1); + + c2 = *(pSrcB + 6); + acc2 += (a1 * d1); + + b2 = *(pSrcA + 7); + acc3 = (a2 * c2); + + d2 = *(pSrcB + 7); + acc4 = (b2 * c2); + + *(pDst + 4) = acc1; + pSrcA += 8U; + + acc3 -= (b2 * d2); + acc4 += (a2 * d2); + + *(pDst + 5) = acc2; + pSrcB += 8U; + + *(pDst + 6) = acc3; + *(pDst + 7) = acc4; + + pDst += 8U; + + /* Decrement the numSamples loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = numSamples; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a1 = *pSrcA++; + b1 = *pSrcA++; + c1 = *pSrcB++; + d1 = *pSrcB++; + + /* store the result in the destination buffer. */ + *pDst++ = (a1 * c1) - (b1 * d1); + *pDst++ = (a1 * d1) + (b1 * c1); + + /* Decrement the numSamples loop counter */ + blkCnt--; + } +} + +/** + * @} end of CmplxByCmplxMult group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c new file mode 100644 index 0000000..2869837 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q15.c @@ -0,0 +1,181 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mult_cmplx_q15.c + * Description: Q15 complex-by-complex multiplication + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup CmplxByCmplxMult + * @{ + */ + +/** + * @brief Q15 complex-by-complex multiplication + * @param[in] *pSrcA points to the first input vector + * @param[in] *pSrcB points to the second input vector + * @param[out] *pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format. + */ + +void arm_cmplx_mult_cmplx_q15( + q15_t * pSrcA, + q15_t * pSrcB, + q15_t * pDst, + uint32_t numSamples) +{ + q15_t a, b, c, d; /* Temporary variables to store real and imaginary values */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counters */ + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); + + /* Decrement the blockSize loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); + + /* Decrement the blockSize loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17); + /* store the result in 3.13 format in the destination buffer. */ + *pDst++ = + (q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17); + + /* Decrement the blockSize loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of CmplxByCmplxMult group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c new file mode 100644 index 0000000..b01c4f6 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_cmplx_q31.c @@ -0,0 +1,314 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mult_cmplx_q31.c + * Description: Q31 complex-by-complex multiplication + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup CmplxByCmplxMult + * @{ + */ + + +/** + * @brief Q31 complex-by-complex multiplication + * @param[in] *pSrcA points to the first input vector + * @param[in] *pSrcB points to the second input vector + * @param[out] *pDst points to the output vector + * @param[in] numSamples number of complex samples in each vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format. + * Input down scaling is not required. + */ + +void arm_cmplx_mult_cmplx_q31( + q31_t * pSrcA, + q31_t * pSrcB, + q31_t * pDst, + uint32_t numSamples) +{ + q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */ + uint32_t blkCnt; /* loop counters */ + q31_t mul1, mul2, mul3, mul4; + q31_t out1, out2; + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + /* Decrement the blockSize loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + /* Decrement the blockSize loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + /* loop Unrolling */ + blkCnt = numSamples >> 1U; + + /* First part of the processing with loop unrolling. Compute 2 outputs at a time. + ** a second loop below computes the remaining 1 sample. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + /* Decrement the blockSize loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 2, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x2U; + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ + /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ + a = *pSrcA++; + b = *pSrcA++; + c = *pSrcB++; + d = *pSrcB++; + + mul1 = (q31_t) (((q63_t) a * c) >> 32); + mul2 = (q31_t) (((q63_t) b * d) >> 32); + mul3 = (q31_t) (((q63_t) a * d) >> 32); + mul4 = (q31_t) (((q63_t) b * c) >> 32); + + mul1 = (mul1 >> 1); + mul2 = (mul2 >> 1); + mul3 = (mul3 >> 1); + mul4 = (mul4 >> 1); + + out1 = mul1 - mul2; + out2 = mul3 + mul4; + + /* store the real result in 3.29 format in the destination buffer. */ + *pDst++ = out1; + /* store the imag result in 3.29 format in the destination buffer. */ + *pDst++ = out2; + + /* Decrement the blockSize loop counter */ + blkCnt--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of CmplxByCmplxMult group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c new file mode 100644 index 0000000..8c7ca31 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_f32.c @@ -0,0 +1,213 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mult_real_f32.c + * Description: Floating-point complex by real multiplication + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @defgroup CmplxByRealMult Complex-by-Real Multiplication + * + * Multiplies a complex vector by a real vector and generates a complex result. + * The data in the complex arrays is stored in an interleaved fashion + * (real, imag, real, imag, ...). + * The parameter <code>numSamples</code> represents the number of complex + * samples processed. The complex arrays have a total of <code>2*numSamples</code> + * real values while the real array has a total of <code>numSamples</code> + * real values. + * + * The underlying algorithm is used: + * + * <pre> + * for(n=0; n<numSamples; n++) { + * pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n]; + * pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n]; + * } + * </pre> + * + * There are separate functions for floating-point, Q15, and Q31 data types. + */ + +/** + * @addtogroup CmplxByRealMult + * @{ + */ + + +/** + * @brief Floating-point complex-by-real multiplication + * @param[in] *pSrcCmplx points to the complex input vector + * @param[in] *pSrcReal points to the real input vector + * @param[out] *pCmplxDst points to the complex output vector + * @param[in] numSamples number of samples in each vector + * @return none. + */ + +void arm_cmplx_mult_real_f32( + float32_t * pSrcCmplx, + float32_t * pSrcReal, + float32_t * pCmplxDst, + uint32_t numSamples) +{ + float32_t in; /* Temporary variable to store input value */ + uint32_t blkCnt; /* loop counters */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + float32_t inA1, inA2, inA3, inA4; /* Temporary variables to hold input data */ + float32_t inA5, inA6, inA7, inA8; /* Temporary variables to hold input data */ + float32_t inB1, inB2, inB3, inB4; /* Temporary variables to hold input data */ + float32_t out1, out2, out3, out4; /* Temporary variables to hold output data */ + float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */ + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[i]. */ + /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ + /* read input from complex input buffer */ + inA1 = pSrcCmplx[0]; + inA2 = pSrcCmplx[1]; + /* read input from real input buffer */ + inB1 = pSrcReal[0]; + + /* read input from complex input buffer */ + inA3 = pSrcCmplx[2]; + + /* multiply complex buffer real input with real buffer input */ + out1 = inA1 * inB1; + + /* read input from complex input buffer */ + inA4 = pSrcCmplx[3]; + + /* multiply complex buffer imaginary input with real buffer input */ + out2 = inA2 * inB1; + + /* read input from real input buffer */ + inB2 = pSrcReal[1]; + /* read input from complex input buffer */ + inA5 = pSrcCmplx[4]; + + /* multiply complex buffer real input with real buffer input */ + out3 = inA3 * inB2; + + /* read input from complex input buffer */ + inA6 = pSrcCmplx[5]; + /* read input from real input buffer */ + inB3 = pSrcReal[2]; + + /* multiply complex buffer imaginary input with real buffer input */ + out4 = inA4 * inB2; + + /* read input from complex input buffer */ + inA7 = pSrcCmplx[6]; + + /* multiply complex buffer real input with real buffer input */ + out5 = inA5 * inB3; + + /* read input from complex input buffer */ + inA8 = pSrcCmplx[7]; + + /* multiply complex buffer imaginary input with real buffer input */ + out6 = inA6 * inB3; + + /* read input from real input buffer */ + inB4 = pSrcReal[3]; + + /* store result to destination bufer */ + pCmplxDst[0] = out1; + + /* multiply complex buffer real input with real buffer input */ + out7 = inA7 * inB4; + + /* store result to destination bufer */ + pCmplxDst[1] = out2; + + /* multiply complex buffer imaginary input with real buffer input */ + out8 = inA8 * inB4; + + /* store result to destination bufer */ + pCmplxDst[2] = out3; + pCmplxDst[3] = out4; + pCmplxDst[4] = out5; + + /* incremnet complex input buffer by 8 to process next samples */ + pSrcCmplx += 8U; + + /* store result to destination bufer */ + pCmplxDst[5] = out6; + + /* increment real input buffer by 4 to process next samples */ + pSrcReal += 4U; + + /* store result to destination bufer */ + pCmplxDst[6] = out7; + pCmplxDst[7] = out8; + + /* increment destination buffer by 8 to process next sampels */ + pCmplxDst += 8U; + + /* Decrement the numSamples loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + +#else + + /* Run the below code for Cortex-M0 */ + blkCnt = numSamples; + +#endif /* #if defined (ARM_MATH_DSP) */ + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[i]. */ + /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ + in = *pSrcReal++; + /* store the result in the destination buffer. */ + *pCmplxDst++ = (*pSrcCmplx++) * (in); + *pCmplxDst++ = (*pSrcCmplx++) * (in); + + /* Decrement the numSamples loop counter */ + blkCnt--; + } +} + +/** + * @} end of CmplxByRealMult group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c new file mode 100644 index 0000000..340d852 --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q15.c @@ -0,0 +1,191 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mult_real_q15.c + * Description: Q15 complex by real multiplication + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup CmplxByRealMult + * @{ + */ + + +/** + * @brief Q15 complex-by-real multiplication + * @param[in] *pSrcCmplx points to the complex input vector + * @param[in] *pSrcReal points to the real input vector + * @param[out] *pCmplxDst points to the complex output vector + * @param[in] numSamples number of samples in each vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function uses saturating arithmetic. + * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated. + */ + +void arm_cmplx_mult_real_q15( + q15_t * pSrcCmplx, + q15_t * pSrcReal, + q15_t * pCmplxDst, + uint32_t numSamples) +{ + q15_t in; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counters */ + q31_t inA1, inA2; /* Temporary variables to hold input data */ + q31_t inB1; /* Temporary variables to hold input data */ + q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */ + q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */ + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[i]. */ + /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ + /* read complex number both real and imaginary from complex input buffer */ + inA1 = *__SIMD32(pSrcCmplx)++; + /* read two real values at a time from real input buffer */ + inB1 = *__SIMD32(pSrcReal)++; + /* read complex number both real and imaginary from complex input buffer */ + inA2 = *__SIMD32(pSrcCmplx)++; + + /* multiply complex number with real numbers */ +#ifndef ARM_MATH_BIG_ENDIAN + + mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1)); + mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1)); + mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16)); + mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16)); + +#else + + mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16)); + mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16)); + mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1); + mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1); + +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ + + /* saturate the result */ + out1 = (q15_t) __SSAT(mul1 >> 15U, 16); + out2 = (q15_t) __SSAT(mul2 >> 15U, 16); + out3 = (q15_t) __SSAT(mul3 >> 15U, 16); + out4 = (q15_t) __SSAT(mul4 >> 15U, 16); + + /* pack real and imaginary outputs and store them to destination */ + *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16); + *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16); + + inA1 = *__SIMD32(pSrcCmplx)++; + inB1 = *__SIMD32(pSrcReal)++; + inA2 = *__SIMD32(pSrcCmplx)++; + +#ifndef ARM_MATH_BIG_ENDIAN + + mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1)); + mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1)); + mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16)); + mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16)); + +#else + + mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16)); + mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16)); + mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1); + mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1); + +#endif /* #ifndef ARM_MATH_BIG_ENDIAN */ + + out1 = (q15_t) __SSAT(mul1 >> 15U, 16); + out2 = (q15_t) __SSAT(mul2 >> 15U, 16); + out3 = (q15_t) __SSAT(mul3 >> 15U, 16); + out4 = (q15_t) __SSAT(mul4 >> 15U, 16); + + *__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16); + *__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16); + + /* Decrement the numSamples loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[i]. */ + /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ + in = *pSrcReal++; + /* store the result in the destination buffer. */ + *pCmplxDst++ = + (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); + *pCmplxDst++ = + (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); + + /* Decrement the numSamples loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + /* realOut = realA * realB. */ + /* imagOut = imagA * realB. */ + in = *pSrcReal++; + /* store the result in the destination buffer. */ + *pCmplxDst++ = + (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); + *pCmplxDst++ = + (q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16); + + /* Decrement the numSamples loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of CmplxByRealMult group + */ diff --git a/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c new file mode 100644 index 0000000..19fc55b --- /dev/null +++ b/DSP/Source/ComplexMathFunctions/arm_cmplx_mult_real_q31.c @@ -0,0 +1,211 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_cmplx_mult_real_q31.c + * Description: Q31 complex by real multiplication + * + * $Date: 27. January 2017 + * $Revision: V.1.5.1 + * + * Target Processor: Cortex-M cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arm_math.h" + +/** + * @ingroup groupCmplxMath + */ + +/** + * @addtogroup CmplxByRealMult + * @{ + */ + + +/** + * @brief Q31 complex-by-real multiplication + * @param[in] *pSrcCmplx points to the complex input vector + * @param[in] *pSrcReal points to the real input vector + * @param[out] *pCmplxDst points to the complex output vector + * @param[in] numSamples number of samples in each vector + * @return none. + * + * <b>Scaling and Overflow Behavior:</b> + * \par + * The function uses saturating arithmetic. + * Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated. + */ + +void arm_cmplx_mult_real_q31( + q31_t * pSrcCmplx, + q31_t * pSrcReal, + q31_t * pCmplxDst, + uint32_t numSamples) +{ + q31_t inA1; /* Temporary variable to store input value */ + +#if defined (ARM_MATH_DSP) + + /* Run the below code for Cortex-M4 and Cortex-M3 */ + uint32_t blkCnt; /* loop counters */ + q31_t inA2, inA3, inA4; /* Temporary variables to hold input data */ + q31_t inB1, inB2; /* Temporary variabels to hold input data */ + q31_t out1, out2, out3, out4; /* Temporary variables to hold output data */ + + /* loop Unrolling */ + blkCnt = numSamples >> 2U; + + /* First part of the processing with loop unrolling. Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[i]. */ + /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ + /* read real input from complex input buffer */ + inA1 = *pSrcCmplx++; + inA2 = *pSrcCmplx++; + /* read input from real input bufer */ + inB1 = *pSrcReal++; + inB2 = *pSrcReal++; + /* read imaginary input from complex input buffer */ + inA3 = *pSrcCmplx++; + inA4 = *pSrcCmplx++; + + /* multiply complex input with real input */ + out1 = ((q63_t) inA1 * inB1) >> 32; + out2 = ((q63_t) inA2 * inB1) >> 32; + out3 = ((q63_t) inA3 * inB2) >> 32; + out4 = ((q63_t) inA4 * inB2) >> 32; + + /* sature the result */ + out1 = __SSAT(out1, 31); + out2 = __SSAT(out2, 31); + out3 = __SSAT(out3, 31); + out4 = __SSAT(out4, 31); + + /* get result in 1.31 format */ + out1 = out1 << 1; + out2 = out2 << 1; + out3 = out3 << 1; + out4 = out4 << 1; + + /* store the result to destination buffer */ + *pCmplxDst++ = out1; + *pCmplxDst++ = out2; + *pCmplxDst++ = out3; + *pCmplxDst++ = out4; + + /* read real input from complex input buffer */ + inA1 = *pSrcCmplx++; + inA2 = *pSrcCmplx++; + /* read input from real input bufer */ + inB1 = *pSrcReal++; + inB2 = *pSrcReal++; + /* read imaginary input from complex input buffer */ + inA3 = *pSrcCmplx++; + inA4 = *pSrcCmplx++; + + /* multiply complex input with real input */ + out1 = ((q63_t) inA1 * inB1) >> 32; + out2 = ((q63_t) inA2 * inB1) >> 32; + out3 = ((q63_t) inA3 * inB2) >> 32; + out4 = ((q63_t) inA4 * inB2) >> 32; + + /* sature the result */ + out1 = __SSAT(out1, 31); + out2 = __SSAT(out2, 31); + out3 = __SSAT(out3, 31); + out4 = __SSAT(out4, 31); + + /* get result in 1.31 format */ + out1 = out1 << 1; + out2 = out2 << 1; + out3 = out3 << 1; + out4 = out4 << 1; + + /* store the result to destination buffer */ + *pCmplxDst++ = out1; + *pCmplxDst++ = out2; + *pCmplxDst++ = out3; + *pCmplxDst++ = out4; + + /* Decrement the numSamples loop counter */ + blkCnt--; + } + + /* If the numSamples is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = numSamples % 0x4U; + + while (blkCnt > 0U) + { + /* C[2 * i] = A[2 * i] * B[i]. */ + /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */ + /* read real input from complex input buffer */ + inA1 = *pSrcCmplx++; + inA2 = *pSrcCmplx++; + /* read input from real input bufer */ + inB1 = *pSrcReal++; + + /* multiply complex input with real input */ + out1 = ((q63_t) inA1 * inB1) >> 32; + out2 = ((q63_t) inA2 * inB1) >> 32; + + /* sature the result */ + out1 = __SSAT(out1, 31); + out2 = __SSAT(out2, 31); + + /* get result in 1.31 format */ + out1 = out1 << 1; + out2 = out2 << 1; + + /* store the result to destination buffer */ + *pCmplxDst++ = out1; + *pCmplxDst++ = out2; + + /* Decrement the numSamples loop counter */ + blkCnt--; + } + +#else + + /* Run the below code for Cortex-M0 */ + + while (numSamples > 0U) + { + /* realOut = realA * realB. */ + /* imagReal = imagA * realB. */ + inA1 = *pSrcReal++; + /* store the result in the destination buffer. */ + *pCmplxDst++ = + (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31); + *pCmplxDst++ = + (q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31); + + /* Decrement the numSamples loop counter */ + numSamples--; + } + +#endif /* #if defined (ARM_MATH_DSP) */ + +} + +/** + * @} end of CmplxByRealMult group + */ |