From 96d6da4e252b06dcfdc041e7df23e86161c33007 Mon Sep 17 00:00:00 2001 From: rihab kouki Date: Tue, 28 Jul 2020 11:24:49 +0100 Subject: Official ARM version: v5.6.0 --- DSP/Source/TransformFunctions/arm_dct4_q15.c | 133 +++++++++++++-------------- 1 file changed, 66 insertions(+), 67 deletions(-) (limited to 'DSP/Source/TransformFunctions/arm_dct4_q15.c') diff --git a/DSP/Source/TransformFunctions/arm_dct4_q15.c b/DSP/Source/TransformFunctions/arm_dct4_q15.c index 918f0bd..f926a1d 100644 --- a/DSP/Source/TransformFunctions/arm_dct4_q15.c +++ b/DSP/Source/TransformFunctions/arm_dct4_q15.c @@ -3,13 +3,13 @@ * Title: arm_dct4_q15.c * Description: Processing function of DCT4 & IDCT4 Q15 * - * $Date: 27. January 2017 - * $Revision: V.1.5.1 + * $Date: 18. March 2019 + * $Revision: V1.6.0 * * Target Processor: Cortex-M cores * -------------------------------------------------------------------- */ /* - * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved. + * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved. * * SPDX-License-Identifier: Apache-2.0 * @@ -29,35 +29,35 @@ #include "arm_math.h" /** - * @addtogroup DCT4_IDCT4 - * @{ + @addtogroup DCT4_IDCT4 + @{ */ /** - * @brief Processing function for the Q15 DCT4/IDCT4. - * @param[in] *S points to an instance of the Q15 DCT4 structure. - * @param[in] *pState points to state buffer. - * @param[in,out] *pInlineBuffer points to the in-place input and output buffer. - * @return none. - * - * \par Input an output formats: - * Internally inputs are downscaled in the RFFT process function to avoid overflows. - * Number of bits downscaled, depends on the size of the transform. - * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below: - * - * \image html dct4FormatsQ15Table.gif + @brief Processing function for the Q15 DCT4/IDCT4. + @param[in] S points to an instance of the Q15 DCT4 structure. + @param[in] pState points to state buffer. + @param[in,out] pInlineBuffer points to the in-place input and output buffer. + @return none + + @par Input an output formats + Internally inputs are downscaled in the RFFT process function to avoid overflows. + Number of bits downscaled, depends on the size of the transform. The input and output + formats for different DCT sizes and number of bits to upscale are mentioned in the table below: + + \image html dct4FormatsQ15Table.gif */ void arm_dct4_q15( const arm_dct4_instance_q15 * S, - q15_t * pState, - q15_t * pInlineBuffer) + q15_t * pState, + q15_t * pInlineBuffer) { - uint32_t i; /* Loop counter */ - q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */ - q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ - q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ - q15_t in; /* Temporary variable */ + const q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */ + const q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */ + q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */ + q15_t in; /* Temporary variable */ + uint32_t i; /* Loop counter */ /* DCT4 computation involves DCT2 (which is calculated using RFFT) @@ -79,10 +79,10 @@ void arm_dct4_q15( * (d) Multiplying the output with the normalizing factor sqrt(2/N). */ - /*-------- Pre-processing ------------*/ + /*-------- Pre-processing ------------*/ /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */ - arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N); - arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N); + arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N); + arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N); /* ---------------------------------------------------------------- * Step1: Re-ordering of even and odd elements as @@ -100,12 +100,10 @@ void arm_dct4_q15( pbuff = pInlineBuffer; -#if defined (ARM_MATH_DSP) - - /* Run the below code for Cortex-M4 and Cortex-M3 */ +#if defined (ARM_MATH_LOOPUNROLL) /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */ - i = (uint32_t) S->Nby2 >> 2U; + i = S->Nby2 >> 2U; /* First part of the processing with loop unrolling. Compute 4 outputs at a time. ** a second loop below computes the remaining 1 to 3 samples. */ @@ -126,7 +124,7 @@ void arm_dct4_q15( *pS1++ = *pbuff++; *pS2-- = *pbuff++; - /* Decrement the loop counter */ + /* Decrement loop counter */ i--; } while (i > 0U); @@ -137,7 +135,7 @@ void arm_dct4_q15( pS1 = pState; /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = (uint32_t) S->N >> 2U; + i = S->N >> 2U; /* Processing with loop unrolling 4 times as N is always multiple of 4. * Compute 4 outputs at a time */ @@ -158,16 +156,16 @@ void arm_dct4_q15( * Step2: Calculate RFFT for N-point input * ---------------------------------------------------------- */ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_q15(S->pRfft, pInlineBuffer, pState); + arm_rfft_q15 (S->pRfft, pInlineBuffer, pState); - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N); + /*---------------------------------------------------------------------- + * Step3: Multiply the FFT output with the weights. + *----------------------------------------------------------------------*/ + arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N); /* The output of complex multiplication is in 3.13 format. * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ - arm_shift_q15(pState, 2, pState, S->N * 2); + arm_shift_q15 (pState, 2, pState, S->N * 2); /* ----------- Post-processing ---------- */ /* DCT-IV can be obtained from DCT-II by the equation, @@ -176,7 +174,7 @@ void arm_dct4_q15( /* Getting only real part from the output and Converting to DCT-IV */ /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */ - i = ((uint32_t) S->N - 1U) >> 2U; + i = (S->N - 1U) >> 2U; /* pbuff initialized to input buffer. */ pbuff = pInlineBuffer; @@ -221,7 +219,7 @@ void arm_dct4_q15( /* If the blockSize is not a multiple of 4, compute any remaining output samples here. ** No loop unrolling is used. */ - i = ((uint32_t) S->N - 1U) % 0x4U; + i = (S->N - 1U) % 0x4U; while (i > 0U) { @@ -229,18 +227,19 @@ void arm_dct4_q15( /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ in = *pS1++ - in; *pbuff++ = in; + /* points to the next real value */ pS1++; - /* Decrement the loop counter */ + /* Decrement loop counter */ i--; } - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ + /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ /* Initializing the loop counter to N/4 instead of N for loop unrolling */ - i = (uint32_t) S->N >> 2U; + i = S->N >> 2U; /* pbuff initialized to the pInlineBuffer(now contains the output values) */ pbuff = pInlineBuffer; @@ -261,17 +260,15 @@ void arm_dct4_q15( in = *pbuff; *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - /* Decrement the loop counter */ + /* Decrement loop counter */ i--; } while (i > 0U); #else - /* Run the below code for Cortex-M0 */ - /* Initializing the loop counter to N/2 */ - i = (uint32_t) S->Nby2; + i = S->Nby2; do { @@ -292,7 +289,7 @@ void arm_dct4_q15( pS1 = pState; /* Initializing the loop counter */ - i = (uint32_t) S->N; + i = S->N; do { @@ -308,16 +305,16 @@ void arm_dct4_q15( * Step2: Calculate RFFT for N-point input * ---------------------------------------------------------- */ /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */ - arm_rfft_q15(S->pRfft, pInlineBuffer, pState); + arm_rfft_q15 (S->pRfft, pInlineBuffer, pState); - /*---------------------------------------------------------------------- - * Step3: Multiply the FFT output with the weights. - *----------------------------------------------------------------------*/ - arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N); + /*---------------------------------------------------------------------- + * Step3: Multiply the FFT output with the weights. + *----------------------------------------------------------------------*/ + arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N); /* The output of complex multiplication is in 3.13 format. * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */ - arm_shift_q15(pState, 2, pState, S->N * 2); + arm_shift_q15 (pState, 2, pState, S->N * 2); /* ----------- Post-processing ---------- */ /* DCT-IV can be obtained from DCT-II by the equation, @@ -325,9 +322,6 @@ void arm_dct4_q15( * Hence, Y4(0) = Y2(0)/2 */ /* Getting only real part from the output and Converting to DCT-IV */ - /* Initializing the loop counter */ - i = ((uint32_t) S->N - 1U); - /* pbuff initialized to input buffer. */ pbuff = pInlineBuffer; @@ -342,25 +336,29 @@ void arm_dct4_q15( /* pState pointer is incremented twice as the real values are located alternatively in the array */ pS1++; + /* Initializing the loop counter */ + i = (S->N - 1U); + do { /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */ /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */ in = *pS1++ - in; *pbuff++ = in; + /* points to the next real value */ pS1++; - /* Decrement the loop counter */ + /* Decrement loop counter */ i--; } while (i > 0U); - /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ + /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/ - /* Initializing the loop counter */ - i = (uint32_t) S->N; + /* Initializing loop counter */ + i = S->N; - /* pbuff initialized to the pInlineBuffer(now contains the output values) */ + /* pbuff initialized to the pInlineBuffer (now contains the output values) */ pbuff = pInlineBuffer; do @@ -369,14 +367,15 @@ void arm_dct4_q15( in = *pbuff; *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15)); - /* Decrement the loop counter */ + /* Decrement loop counter */ i--; + } while (i > 0U); -#endif /* #if defined (ARM_MATH_DSP) */ +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ } /** - * @} end of DCT4_IDCT4 group - */ + @} end of DCT4_IDCT4 group + */ -- cgit