From 96d6da4e252b06dcfdc041e7df23e86161c33007 Mon Sep 17 00:00:00 2001
From: rihab kouki <rihab.kouki@st.com>
Date: Tue, 28 Jul 2020 11:24:49 +0100
Subject: Official ARM version: v5.6.0

---
 DSP/Source/TransformFunctions/arm_dct4_q15.c | 133 +++++++++++++--------------
 1 file changed, 66 insertions(+), 67 deletions(-)

(limited to 'DSP/Source/TransformFunctions/arm_dct4_q15.c')

diff --git a/DSP/Source/TransformFunctions/arm_dct4_q15.c b/DSP/Source/TransformFunctions/arm_dct4_q15.c
index 918f0bd..f926a1d 100644
--- a/DSP/Source/TransformFunctions/arm_dct4_q15.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_q15.c
@@ -3,13 +3,13 @@
  * Title:        arm_dct4_q15.c
  * Description:  Processing function of DCT4 & IDCT4 Q15
  *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
  *
  * Target Processor: Cortex-M cores
  * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -29,35 +29,35 @@
 #include "arm_math.h"
 
 /**
- * @addtogroup DCT4_IDCT4
- * @{
+  @addtogroup DCT4_IDCT4
+  @{
  */
 
 /**
- * @brief Processing function for the Q15 DCT4/IDCT4.
- * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
- * @param[in]       *pState        points to state buffer.
- * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
- * @return none.
- *
- * \par Input an output formats:
- * Internally inputs are downscaled in the RFFT process function to avoid overflows.
- * Number of bits downscaled, depends on the size of the transform.
- * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
- *
- * \image html dct4FormatsQ15Table.gif
+  @brief         Processing function for the Q15 DCT4/IDCT4.
+  @param[in]     S             points to an instance of the Q15 DCT4 structure.
+  @param[in]     pState        points to state buffer.
+  @param[in,out] pInlineBuffer points to the in-place input and output buffer.
+  @return        none
+ 
+  @par           Input an output formats
+                   Internally inputs are downscaled in the RFFT process function to avoid overflows.
+                   Number of bits downscaled, depends on the size of the transform. The input and output
+                   formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
+
+                   \image html dct4FormatsQ15Table.gif
  */
 
 void arm_dct4_q15(
   const arm_dct4_instance_q15 * S,
-  q15_t * pState,
-  q15_t * pInlineBuffer)
+        q15_t * pState,
+        q15_t * pInlineBuffer)
 {
-  uint32_t i;                                    /* Loop counter */
-  q15_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
-  q15_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
-  q15_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
-  q15_t in;                                      /* Temporary variable */
+  const q15_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
+  const q15_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
+        q15_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
+        q15_t in;                                      /* Temporary variable */
+        uint32_t i;                                    /* Loop counter */
 
 
   /* DCT4 computation involves DCT2 (which is calculated using RFFT)
@@ -79,10 +79,10 @@ void arm_dct4_q15(
    * (d) Multiplying the output with the normalizing factor sqrt(2/N).
    */
 
-        /*-------- Pre-processing ------------*/
+  /*-------- Pre-processing ------------*/
   /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
-  arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
-  arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
+  arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
+  arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N);
 
   /* ----------------------------------------------------------------
    * Step1: Re-ordering of even and odd elements as
@@ -100,12 +100,10 @@ void arm_dct4_q15(
   pbuff = pInlineBuffer;
 
 
-#if defined (ARM_MATH_DSP)
-
-  /* Run the below code for Cortex-M4 and Cortex-M3 */
+#if defined (ARM_MATH_LOOPUNROLL)
 
   /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
-  i = (uint32_t) S->Nby2 >> 2U;
+  i = S->Nby2 >> 2U;
 
   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
    ** a second loop below computes the remaining 1 to 3 samples. */
@@ -126,7 +124,7 @@ void arm_dct4_q15(
     *pS1++ = *pbuff++;
     *pS2-- = *pbuff++;
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     i--;
   } while (i > 0U);
 
@@ -137,7 +135,7 @@ void arm_dct4_q15(
   pS1 = pState;
 
   /* Initializing the loop counter to N/4 instead of N for loop unrolling */
-  i = (uint32_t) S->N >> 2U;
+  i = S->N >> 2U;
 
   /* Processing with loop unrolling 4 times as N is always multiple of 4.
    * Compute 4 outputs at a time */
@@ -158,16 +156,16 @@ void arm_dct4_q15(
    *     Step2: Calculate RFFT for N-point input
    * ---------------------------------------------------------- */
   /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
 
- /*----------------------------------------------------------------------
-  *  Step3: Multiply the FFT output with the weights.
-  *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
+  /*----------------------------------------------------------------------
+   *  Step3: Multiply the FFT output with the weights.
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
 
   /* The output of complex multiplication is in 3.13 format.
    * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
-  arm_shift_q15(pState, 2, pState, S->N * 2);
+  arm_shift_q15 (pState, 2, pState, S->N * 2);
 
   /* ----------- Post-processing ---------- */
   /* DCT-IV can be obtained from DCT-II by the equation,
@@ -176,7 +174,7 @@ void arm_dct4_q15(
   /* Getting only real part from the output and Converting to DCT-IV */
 
   /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
-  i = ((uint32_t) S->N - 1U) >> 2U;
+  i = (S->N - 1U) >> 2U;
 
   /* pbuff initialized to input buffer. */
   pbuff = pInlineBuffer;
@@ -221,7 +219,7 @@ void arm_dct4_q15(
 
   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
    ** No loop unrolling is used. */
-  i = ((uint32_t) S->N - 1U) % 0x4U;
+  i = (S->N - 1U) % 0x4U;
 
   while (i > 0U)
   {
@@ -229,18 +227,19 @@ void arm_dct4_q15(
     /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
     in = *pS1++ - in;
     *pbuff++ = in;
+
     /* points to the next real value */
     pS1++;
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     i--;
   }
 
 
-   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
 
   /* Initializing the loop counter to N/4 instead of N for loop unrolling */
-  i = (uint32_t) S->N >> 2U;
+  i = S->N >> 2U;
 
   /* pbuff initialized to the pInlineBuffer(now contains the output values) */
   pbuff = pInlineBuffer;
@@ -261,17 +260,15 @@ void arm_dct4_q15(
     in = *pbuff;
     *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     i--;
   } while (i > 0U);
 
 
 #else
 
-  /* Run the below code for Cortex-M0 */
-
   /* Initializing the loop counter to N/2 */
-  i = (uint32_t) S->Nby2;
+  i = S->Nby2;
 
   do
   {
@@ -292,7 +289,7 @@ void arm_dct4_q15(
   pS1 = pState;
 
   /* Initializing the loop counter */
-  i = (uint32_t) S->N;
+  i = S->N;
 
   do
   {
@@ -308,16 +305,16 @@ void arm_dct4_q15(
    *     Step2: Calculate RFFT for N-point input
    * ---------------------------------------------------------- */
   /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);
 
- /*----------------------------------------------------------------------
-  *  Step3: Multiply the FFT output with the weights.
-  *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
+  /*----------------------------------------------------------------------
+   *  Step3: Multiply the FFT output with the weights.
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);
 
   /* The output of complex multiplication is in 3.13 format.
    * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
-  arm_shift_q15(pState, 2, pState, S->N * 2);
+  arm_shift_q15 (pState, 2, pState, S->N * 2);
 
   /* ----------- Post-processing ---------- */
   /* DCT-IV can be obtained from DCT-II by the equation,
@@ -325,9 +322,6 @@ void arm_dct4_q15(
    *       Hence, Y4(0) = Y2(0)/2  */
   /* Getting only real part from the output and Converting to DCT-IV */
 
-  /* Initializing the loop counter */
-  i = ((uint32_t) S->N - 1U);
-
   /* pbuff initialized to input buffer. */
   pbuff = pInlineBuffer;
 
@@ -342,25 +336,29 @@ void arm_dct4_q15(
   /* pState pointer is incremented twice as the real values are located alternatively in the array */
   pS1++;
 
+  /* Initializing the loop counter */
+  i = (S->N - 1U);
+
   do
   {
     /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
     /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
     in = *pS1++ - in;
     *pbuff++ = in;
+
     /* points to the next real value */
     pS1++;
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     i--;
   } while (i > 0U);
 
-   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
 
-  /* Initializing the loop counter */
-  i = (uint32_t) S->N;
+  /* Initializing loop counter */
+  i = S->N;
 
-  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  /* pbuff initialized to the pInlineBuffer (now contains the output values) */
   pbuff = pInlineBuffer;
 
   do
@@ -369,14 +367,15 @@ void arm_dct4_q15(
     in = *pbuff;
     *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
 
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
     i--;
+
   } while (i > 0U);
 
-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
 
 }
 
 /**
-   * @} end of DCT4_IDCT4 group
-   */
+  @} end of DCT4_IDCT4 group
+ */
-- 
cgit